From 92de28d003e3d05ce84c2c2c2e3a5babf8cf7e5e Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 23 Jan 2026 03:55:25 -0800 Subject: [PATCH 01/18] Move dependent dpctl.tensor headers from utils and kernels namespace --- CMakeLists.txt | 1 - .../libtensor/include/kernels/alignment.hpp | 46 + .../include/kernels/dpctl_tensor_types.hpp | 40 + .../kernels/elementwise_functions/common.hpp | 1045 +++++++++++++++++ .../elementwise_functions/common_detail.hpp | 70 ++ .../elementwise_functions/logaddexp.hpp | 268 +++++ .../kernels/elementwise_functions/maximum.hpp | 322 +++++ .../kernels/elementwise_functions/minimum.hpp | 321 +++++ .../elementwise_functions/sycl_complex.hpp | 44 + .../elementwise_functions/vec_size_util.hpp | 70 ++ .../include/utils/indexing_utils.hpp | 153 +++ .../libtensor/include/utils/math_utils.hpp | 148 +++ .../include/utils/memory_overlap.hpp | 157 +++ .../libtensor/include/utils/offset_utils.hpp | 824 +++++++++++++ .../include/utils/output_validation.hpp | 79 ++ .../libtensor/include/utils/strided_iters.hpp | 996 ++++++++++++++++ .../include/utils/sycl_alloc_utils.hpp | 223 ++++ .../libtensor/include/utils/sycl_utils.hpp | 662 +++++++++++ .../libtensor/include/utils/type_dispatch.hpp | 134 +++ .../include/utils/type_dispatch_building.hpp | 300 +++++ .../libtensor/include/utils/type_utils.hpp | 164 +++ dpnp/backend/CMakeLists.txt | 1 - dpnp/backend/extensions/blas/CMakeLists.txt | 5 +- dpnp/backend/extensions/blas/dot_common.hpp | 1 + dpnp/backend/extensions/common/ext/common.hpp | 2 + dpnp/backend/extensions/fft/CMakeLists.txt | 5 +- .../extensions/indexing/CMakeLists.txt | 5 +- dpnp/backend/extensions/lapack/CMakeLists.txt | 5 +- .../extensions/statistics/CMakeLists.txt | 5 +- dpnp/backend/extensions/ufunc/CMakeLists.txt | 5 +- dpnp/backend/extensions/vm/CMakeLists.txt | 5 +- dpnp/backend/extensions/window/CMakeLists.txt | 5 +- pyproject.toml | 2 +- 33 files changed, 6102 insertions(+), 11 deletions(-) create mode 100644 dpctl/tensor/libtensor/include/kernels/alignment.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/dpctl_tensor_types.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/common_detail.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/sycl_complex.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/vec_size_util.hpp create mode 100644 dpctl/tensor/libtensor/include/utils/indexing_utils.hpp create mode 100644 dpctl/tensor/libtensor/include/utils/math_utils.hpp create mode 100644 dpctl/tensor/libtensor/include/utils/memory_overlap.hpp create mode 100644 dpctl/tensor/libtensor/include/utils/offset_utils.hpp create mode 100644 dpctl/tensor/libtensor/include/utils/output_validation.hpp create mode 100644 dpctl/tensor/libtensor/include/utils/strided_iters.hpp create mode 100644 dpctl/tensor/libtensor/include/utils/sycl_alloc_utils.hpp create mode 100644 dpctl/tensor/libtensor/include/utils/sycl_utils.hpp create mode 100644 dpctl/tensor/libtensor/include/utils/type_dispatch.hpp create mode 100644 dpctl/tensor/libtensor/include/utils/type_dispatch_building.hpp create mode 100644 dpctl/tensor/libtensor/include/utils/type_utils.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d676232f08e..386b17b44294 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,7 +97,6 @@ find_package(Cython REQUIRED) find_package(Dpctl REQUIRED) message(STATUS "Dpctl_INCLUDE_DIR=" ${Dpctl_INCLUDE_DIR}) -message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR}) option(DPNP_USE_ONEMATH "Build DPNP with oneMath" OFF) set(DPNP_TARGET_CUDA diff --git a/dpctl/tensor/libtensor/include/kernels/alignment.hpp b/dpctl/tensor/libtensor/include/kernels/alignment.hpp new file mode 100644 index 000000000000..a67e9b15306e --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/alignment.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +namespace dpctl::tensor::kernels::alignment_utils +{ +inline constexpr std::size_t required_alignment = 64UL; + +template +bool is_aligned(Ptr p) +{ + return !(reinterpret_cast(p) % alignment); +} + +template +class disabled_sg_loadstore_wrapper_krn; +} // namespace dpctl::tensor::kernels::alignment_utils diff --git a/dpctl/tensor/libtensor/include/kernels/dpctl_tensor_types.hpp b/dpctl/tensor/libtensor/include/kernels/dpctl_tensor_types.hpp new file mode 100644 index 000000000000..4db78e1805e3 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/dpctl_tensor_types.hpp @@ -0,0 +1,40 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions +//===--------------------------------------------------------------------===// + +#pragma once + +#include + +namespace dpctl::tensor +{ +typedef std::ptrdiff_t ssize_t; +} // namespace dpctl::tensor diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp new file mode 100644 index 000000000000..d19930b722a9 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp @@ -0,0 +1,1045 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines common code for elementwise tensor operations. +//===---------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "common_detail.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/sycl_alloc_utils.hpp" +#include "utils/sycl_utils.hpp" + +#include "kernels/alignment.hpp" +#include "kernels/dpctl_tensor_types.hpp" + +namespace dpctl::tensor::kernels::elementwise_common +{ +using dpctl::tensor::ssize_t; +using dpctl::tensor::kernels::alignment_utils:: + disabled_sg_loadstore_wrapper_krn; +using dpctl::tensor::kernels::alignment_utils::is_aligned; +using dpctl::tensor::kernels::alignment_utils::required_alignment; + +using dpctl::tensor::sycl_utils::sub_group_load; +using dpctl::tensor::sycl_utils::sub_group_store; + +/*! @brief Functor for unary function evaluation on contiguous array */ +template +struct UnaryContigFunctor +{ +private: + const argT *in = nullptr; + resT *out = nullptr; + std::size_t nelems_; + +public: + UnaryContigFunctor(const argT *inp, resT *res, const std::size_t n_elems) + : in(inp), out(res), nelems_(n_elems) + { + } + + void operator()(sycl::nd_item<1> ndit) const + { + static constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz; + UnaryOperatorT op{}; + /* Each work-item processes vec_sz elements, contiguous in memory */ + /* NOTE: work-group size must be divisible by sub-group size */ + + if constexpr (enable_sg_loadstore && UnaryOperatorT::is_constant::value) + { + // value of operator is known to be a known constant + constexpr resT const_val = UnaryOperatorT::constant_value; + + auto sg = ndit.get_sub_group(); + const std::uint16_t sgSize = sg.get_max_local_range()[0]; + + const std::size_t base = + elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) + + sg.get_group_id()[0] * sgSize); + if (base + elems_per_wi * sgSize < nelems_) { + static constexpr sycl::vec res_vec(const_val); +#pragma unroll + for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) { + const std::size_t offset = base + it * sgSize; + auto out_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out[offset]); + + sub_group_store(sg, res_vec, out_multi_ptr); + } + } + else { + const std::size_t lane_id = sg.get_local_id()[0]; + for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) { + out[k] = const_val; + } + } + } + else if constexpr (enable_sg_loadstore && + UnaryOperatorT::supports_sg_loadstore::value && + UnaryOperatorT::supports_vec::value && (vec_sz > 1)) + { + auto sg = ndit.get_sub_group(); + const std::uint16_t sgSize = sg.get_max_local_range()[0]; + + const std::size_t base = + elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) + + sg.get_group_id()[0] * sgSize); + if (base + elems_per_wi * sgSize < nelems_) { +#pragma unroll + for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) { + const std::size_t offset = base + it * sgSize; + auto in_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&in[offset]); + auto out_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out[offset]); + + const sycl::vec x = + sub_group_load(sg, in_multi_ptr); + const sycl::vec res_vec = op(x); + sub_group_store(sg, res_vec, out_multi_ptr); + } + } + else { + const std::size_t lane_id = sg.get_local_id()[0]; + for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) { + // scalar call + out[k] = op(in[k]); + } + } + } + else if constexpr (enable_sg_loadstore && + UnaryOperatorT::supports_sg_loadstore::value && + std::is_same_v) + { + // default: use scalar-value function + + auto sg = ndit.get_sub_group(); + const std::uint16_t sgSize = sg.get_max_local_range()[0]; + const std::size_t base = + elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) + + sg.get_group_id()[0] * sgSize); + + if (base + elems_per_wi * sgSize < nelems_) { +#pragma unroll + for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) { + const std::size_t offset = base + it * sgSize; + auto in_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&in[offset]); + auto out_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out[offset]); + + sycl::vec arg_vec = + sub_group_load(sg, in_multi_ptr); +#pragma unroll + for (std::uint32_t k = 0; k < vec_sz; ++k) { + arg_vec[k] = op(arg_vec[k]); + } + sub_group_store(sg, arg_vec, out_multi_ptr); + } + } + else { + const std::size_t lane_id = sg.get_local_id()[0]; + for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) { + out[k] = op(in[k]); + } + } + } + else if constexpr (enable_sg_loadstore && + UnaryOperatorT::supports_sg_loadstore::value) + { + // default: use scalar-value function + + auto sg = ndit.get_sub_group(); + const std::uint16_t sgSize = sg.get_max_local_range()[0]; + const std::size_t base = + elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) + + sg.get_group_id()[0] * sgSize); + + if (base + elems_per_wi * sgSize < nelems_) { +#pragma unroll + for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) { + const std::size_t offset = base + it * sgSize; + auto in_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&in[offset]); + auto out_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out[offset]); + + const sycl::vec arg_vec = + sub_group_load(sg, in_multi_ptr); + sycl::vec res_vec; +#pragma unroll + for (std::uint8_t k = 0; k < vec_sz; ++k) { + res_vec[k] = op(arg_vec[k]); + } + sub_group_store(sg, res_vec, out_multi_ptr); + } + } + else { + const std::size_t lane_id = sg.get_local_id()[0]; + for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) { + out[k] = op(in[k]); + } + } + } + else { + const std::uint16_t sgSize = + ndit.get_sub_group().get_local_range()[0]; + const std::size_t gid = ndit.get_global_linear_id(); + const std::uint16_t elems_per_sg = sgSize * elems_per_wi; + + const std::size_t start = + (gid / sgSize) * (elems_per_sg - sgSize) + gid; + const std::size_t end = std::min(nelems_, start + elems_per_sg); + for (std::size_t offset = start; offset < end; offset += sgSize) { + out[offset] = op(in[offset]); + } + } + } +}; + +template +struct UnaryStridedFunctor +{ +private: + const argT *inp_ = nullptr; + resT *res_ = nullptr; + IndexerT inp_out_indexer_; + +public: + UnaryStridedFunctor(const argT *inp_p, + resT *res_p, + const IndexerT &inp_out_indexer) + : inp_(inp_p), res_(res_p), inp_out_indexer_(inp_out_indexer) + { + } + + void operator()(sycl::id<1> wid) const + { + const auto &offsets_ = inp_out_indexer_(wid.get(0)); + const ssize_t &inp_offset = offsets_.get_first_offset(); + const ssize_t &res_offset = offsets_.get_second_offset(); + + UnaryOpT op{}; + + res_[res_offset] = op(inp_[inp_offset]); + } +}; + +template +SizeT select_lws(const sycl::device &, SizeT n_work_items_needed) +{ + // TODO: make the decision based on device descriptors + + // constexpr SizeT few_threshold = (SizeT(1) << 17); + static constexpr SizeT med_threshold = (SizeT(1) << 21); + + const SizeT lws = + (n_work_items_needed <= med_threshold ? SizeT(128) : SizeT(256)); + + return lws; +} + +template + class UnaryOutputType, + template + class ContigFunctorT, + template + class kernel_name, + std::uint8_t vec_sz = 4u, + std::uint8_t n_vecs = 2u> +sycl::event unary_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + static constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz; + const std::size_t n_work_items_needed = nelems / elems_per_wi; + const std::size_t lws = + select_lws(exec_q.get_device(), n_work_items_needed); + + const std::size_t n_groups = + ((nelems + lws * elems_per_wi - 1) / (lws * elems_per_wi)); + const auto gws_range = sycl::range<1>(n_groups * lws); + const auto lws_range = sycl::range<1>(lws); + + using resTy = typename UnaryOutputType::value_type; + using BaseKernelName = kernel_name; + + const argTy *arg_tp = reinterpret_cast(arg_p); + resTy *res_tp = reinterpret_cast(res_p); + + sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + if (is_aligned(arg_p) && + is_aligned(res_p)) + { + static constexpr bool enable_sg_loadstore = true; + using KernelName = BaseKernelName; + using Impl = ContigFunctorT; + + cgh.parallel_for( + sycl::nd_range<1>(gws_range, lws_range), + Impl(arg_tp, res_tp, nelems)); + } + else { + static constexpr bool disable_sg_loadstore = false; + using KernelName = + disabled_sg_loadstore_wrapper_krn; + using Impl = ContigFunctorT; + + cgh.parallel_for( + sycl::nd_range<1>(gws_range, lws_range), + Impl(arg_tp, res_tp, nelems)); + } + }); + + return comp_ev; +} + +template + class UnaryOutputType, + template + class StridedFunctorT, + template + class kernel_name> +sycl::event + unary_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + cgh.depends_on(additional_depends); + + using resTy = typename UnaryOutputType::value_type; + using IndexerT = + typename dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; + + const IndexerT indexer{nd, arg_offset, res_offset, shape_and_strides}; + + const argTy *arg_tp = reinterpret_cast(arg_p); + resTy *res_tp = reinterpret_cast(res_p); + + using Impl = StridedFunctorT; + + cgh.parallel_for>( + {nelems}, Impl(arg_tp, res_tp, indexer)); + }); + return comp_ev; +} + +template +struct BinaryContigFunctor +{ +private: + const argT1 *in1 = nullptr; + const argT2 *in2 = nullptr; + resT *out = nullptr; + std::size_t nelems_; + +public: + BinaryContigFunctor(const argT1 *inp1, + const argT2 *inp2, + resT *res, + const std::size_t n_elems) + : in1(inp1), in2(inp2), out(res), nelems_(n_elems) + { + } + + void operator()(sycl::nd_item<1> ndit) const + { + static constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz; + BinaryOperatorT op{}; + /* Each work-item processes vec_sz elements, contiguous in memory */ + /* NOTE: work-group size must be divisible by sub-group size */ + + if constexpr (enable_sg_loadstore && + BinaryOperatorT::supports_sg_loadstore::value && + BinaryOperatorT::supports_vec::value && (vec_sz > 1)) + { + auto sg = ndit.get_sub_group(); + std::uint16_t sgSize = sg.get_max_local_range()[0]; + + const std::size_t base = + elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) + + sg.get_group_id()[0] * sgSize); + + if (base + elems_per_wi * sgSize < nelems_) { + sycl::vec res_vec; + +#pragma unroll + for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) { + std::size_t offset = base + it * sgSize; + auto in1_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&in1[offset]); + auto in2_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&in2[offset]); + auto out_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out[offset]); + + const sycl::vec arg1_vec = + sub_group_load(sg, in1_multi_ptr); + const sycl::vec arg2_vec = + sub_group_load(sg, in2_multi_ptr); + res_vec = op(arg1_vec, arg2_vec); + sub_group_store(sg, res_vec, out_multi_ptr); + } + } + else { + const std::size_t lane_id = sg.get_local_id()[0]; + for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) { + out[k] = op(in1[k], in2[k]); + } + } + } + else if constexpr (enable_sg_loadstore && + BinaryOperatorT::supports_sg_loadstore::value) + { + auto sg = ndit.get_sub_group(); + const std::uint16_t sgSize = sg.get_max_local_range()[0]; + + const std::size_t base = + elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) + + sg.get_group_id()[0] * sgSize); + + if (base + elems_per_wi * sgSize < nelems_) { +#pragma unroll + for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) { + const std::size_t offset = base + it * sgSize; + auto in1_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&in1[offset]); + auto in2_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&in2[offset]); + auto out_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out[offset]); + + const sycl::vec arg1_vec = + sub_group_load(sg, in1_multi_ptr); + const sycl::vec arg2_vec = + sub_group_load(sg, in2_multi_ptr); + + sycl::vec res_vec; +#pragma unroll + for (std::uint8_t vec_id = 0; vec_id < vec_sz; ++vec_id) { + res_vec[vec_id] = + op(arg1_vec[vec_id], arg2_vec[vec_id]); + } + sub_group_store(sg, res_vec, out_multi_ptr); + } + } + else { + const std::size_t lane_id = sg.get_local_id()[0]; + for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) { + out[k] = op(in1[k], in2[k]); + } + } + } + else { + const std::size_t sgSize = + ndit.get_sub_group().get_local_range()[0]; + const std::size_t gid = ndit.get_global_linear_id(); + const std::size_t elems_per_sg = sgSize * elems_per_wi; + + const std::size_t start = + (gid / sgSize) * (elems_per_sg - sgSize) + gid; + const std::size_t end = std::min(nelems_, start + elems_per_sg); + for (std::size_t offset = start; offset < end; offset += sgSize) { + out[offset] = op(in1[offset], in2[offset]); + } + } + } +}; + +template +struct BinaryStridedFunctor +{ +private: + const argT1 *in1 = nullptr; + const argT2 *in2 = nullptr; + resT *out = nullptr; + ThreeOffsets_IndexerT three_offsets_indexer_; + +public: + BinaryStridedFunctor(const argT1 *inp1_tp, + const argT2 *inp2_tp, + resT *res_tp, + const ThreeOffsets_IndexerT &inps_res_indexer) + : in1(inp1_tp), in2(inp2_tp), out(res_tp), + three_offsets_indexer_(inps_res_indexer) + { + } + + void operator()(sycl::id<1> wid) const + { + const auto &three_offsets_ = + three_offsets_indexer_(static_cast(wid.get(0))); + + const auto &inp1_offset = three_offsets_.get_first_offset(); + const auto &inp2_offset = three_offsets_.get_second_offset(); + const auto &out_offset = three_offsets_.get_third_offset(); + + BinaryOperatorT op{}; + out[out_offset] = op(in1[inp1_offset], in2[inp2_offset]); + } +}; + +template +struct BinaryContigMatrixContigRowBroadcastingFunctor +{ +private: + const argT1 *mat; + const argT2 *padded_vec; + resT *res; + std::size_t n_elems; + std::size_t n1; + +public: + BinaryContigMatrixContigRowBroadcastingFunctor(const argT1 *mat_tp, + const argT2 *row_tp, + resT *res_tp, + std::size_t n_elems_in_mat, + std::size_t n_elems_in_row) + : mat(mat_tp), padded_vec(row_tp), res(res_tp), n_elems(n_elems_in_mat), + n1(n_elems_in_row) + { + } + + void operator()(sycl::nd_item<1> ndit) const + { + /* NOTE: work-group size must be divisible by sub-group size */ + + BinaryOperatorT op{}; + static_assert(BinaryOperatorT::supports_sg_loadstore::value); + + const auto &sg = ndit.get_sub_group(); + const std::size_t gid = ndit.get_global_linear_id(); + + const std::size_t sgSize = sg.get_max_local_range()[0]; + const std::size_t base = gid - sg.get_local_id()[0]; + + if (base + sgSize < n_elems) { + auto in1_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&mat[base]); + + auto in2_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&padded_vec[base % n1]); + + auto out_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&res[base]); + + const argT1 mat_el = sub_group_load(sg, in1_multi_ptr); + const argT2 vec_el = sub_group_load(sg, in2_multi_ptr); + + resT res_el = op(mat_el, vec_el); + + sub_group_store(sg, res_el, out_multi_ptr); + } + else { + const std::size_t lane_id = sg.get_local_id()[0]; + for (std::size_t k = base + lane_id; k < n_elems; k += sgSize) { + res[k] = op(mat[k], padded_vec[k % n1]); + } + } + } +}; + +template +struct BinaryContigRowContigMatrixBroadcastingFunctor +{ +private: + const argT1 *padded_vec; + const argT2 *mat; + resT *res; + std::size_t n_elems; + std::size_t n1; + +public: + BinaryContigRowContigMatrixBroadcastingFunctor(const argT1 *row_tp, + const argT2 *mat_tp, + resT *res_tp, + std::size_t n_elems_in_mat, + std::size_t n_elems_in_row) + : padded_vec(row_tp), mat(mat_tp), res(res_tp), n_elems(n_elems_in_mat), + n1(n_elems_in_row) + { + } + + void operator()(sycl::nd_item<1> ndit) const + { + /* NOTE: work-group size must be divisible by sub-group size */ + BinaryOperatorT op{}; + static_assert(BinaryOperatorT::supports_sg_loadstore::value); + + const auto &sg = ndit.get_sub_group(); + std::size_t gid = ndit.get_global_linear_id(); + + const std::size_t sgSize = sg.get_max_local_range()[0]; + const std::size_t base = gid - sg.get_local_id()[0]; + + if (base + sgSize < n_elems) { + auto in1_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&padded_vec[base % n1]); + + auto in2_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&mat[base]); + + auto out_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&res[base]); + + const argT2 mat_el = sub_group_load(sg, in2_multi_ptr); + const argT1 vec_el = sub_group_load(sg, in1_multi_ptr); + + resT res_el = op(vec_el, mat_el); + + sub_group_store(sg, res_el, out_multi_ptr); + } + else { + const std::size_t lane_id = sg.get_local_id()[0]; + for (std::size_t k = base + lane_id; k < n_elems; k += sgSize) { + res[k] = op(padded_vec[k % n1], mat[k]); + } + } + } +}; + +// Typedefs for function pointers + +typedef sycl::event (*unary_contig_impl_fn_ptr_t)( + sycl::queue &, + std::size_t, + const char *, + char *, + const std::vector &); + +typedef sycl::event (*unary_strided_impl_fn_ptr_t)( + sycl::queue &, + std::size_t, + int, + const ssize_t *, + const char *, + ssize_t, + char *, + ssize_t, + const std::vector &, + const std::vector &); + +typedef sycl::event (*binary_contig_impl_fn_ptr_t)( + sycl::queue &, + std::size_t, + const char *, + ssize_t, + const char *, + ssize_t, + char *, + ssize_t, + const std::vector &); + +typedef sycl::event (*binary_strided_impl_fn_ptr_t)( + sycl::queue &, + std::size_t, + int, + const ssize_t *, + const char *, + ssize_t, + const char *, + ssize_t, + char *, + ssize_t, + const std::vector &, + const std::vector &); + +typedef sycl::event (*binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t)( + sycl::queue &, + std::vector &, + std::size_t, + std::size_t, + const char *, + ssize_t, + const char *, + ssize_t, + char *, + ssize_t, + const std::vector &); + +typedef sycl::event (*binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t)( + sycl::queue &, + std::vector &, + std::size_t, + std::size_t, + const char *, + ssize_t, + const char *, + ssize_t, + char *, + ssize_t, + const std::vector &); + +template + class BinaryOutputType, + template + class BinaryContigFunctorT, + template + class kernel_name, + std::uint8_t vec_sz = 4u, + std::uint8_t n_vecs = 2u> +sycl::event binary_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + const std::size_t n_work_items_needed = nelems / (n_vecs * vec_sz); + const std::size_t lws = + select_lws(exec_q.get_device(), n_work_items_needed); + + const std::size_t n_groups = + ((nelems + lws * n_vecs * vec_sz - 1) / (lws * n_vecs * vec_sz)); + const auto gws_range = sycl::range<1>(n_groups * lws); + const auto lws_range = sycl::range<1>(lws); + + using resTy = typename BinaryOutputType::value_type; + using BaseKernelName = kernel_name; + + const argTy1 *arg1_tp = + reinterpret_cast(arg1_p) + arg1_offset; + const argTy2 *arg2_tp = + reinterpret_cast(arg2_p) + arg2_offset; + resTy *res_tp = reinterpret_cast(res_p) + res_offset; + + sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + if (is_aligned(arg1_tp) && + is_aligned(arg2_tp) && + is_aligned(res_tp)) + { + static constexpr bool enable_sg_loadstore = true; + using KernelName = BaseKernelName; + using Impl = BinaryContigFunctorT; + + cgh.parallel_for( + sycl::nd_range<1>(gws_range, lws_range), + Impl(arg1_tp, arg2_tp, res_tp, nelems)); + } + else { + static constexpr bool disable_sg_loadstore = false; + using KernelName = + disabled_sg_loadstore_wrapper_krn; + using Impl = BinaryContigFunctorT; + + cgh.parallel_for( + sycl::nd_range<1>(gws_range, lws_range), + Impl(arg1_tp, arg2_tp, res_tp, nelems)); + } + }); + return comp_ev; +} + +template + class BinaryOutputType, + template + class BinaryStridedFunctorT, + template + class kernel_name> +sycl::event + binary_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + cgh.depends_on(additional_depends); + + using resTy = typename BinaryOutputType::value_type; + + using IndexerT = + typename dpctl::tensor::offset_utils::ThreeOffsets_StridedIndexer; + + const IndexerT indexer{nd, arg1_offset, arg2_offset, res_offset, + shape_and_strides}; + + const argTy1 *arg1_tp = reinterpret_cast(arg1_p); + const argTy2 *arg2_tp = reinterpret_cast(arg2_p); + resTy *res_tp = reinterpret_cast(res_p); + + using Impl = BinaryStridedFunctorT; + + cgh.parallel_for>( + {nelems}, Impl(arg1_tp, arg2_tp, res_tp, indexer)); + }); + return comp_ev; +} + +template + class BinaryContigMatrixContigRowBroadcastFunctorT, + template + class kernel_name> +sycl::event binary_contig_matrix_contig_row_broadcast_impl( + sycl::queue &exec_q, + std::vector &host_tasks, + std::size_t n0, + std::size_t n1, + const char *mat_p, // typeless pointer to (n0, n1) C-contiguous matrix + ssize_t mat_offset, + const char *vec_p, // typeless pointer to (n1,) contiguous row + ssize_t vec_offset, + char *res_p, // typeless pointer to (n0, n1) result C-contig. matrix, + // res[i,j] = op(mat[i,j], vec[j]) + ssize_t res_offset, + const std::vector &depends = {}) +{ + const argT1 *mat = reinterpret_cast(mat_p) + mat_offset; + const argT2 *vec = reinterpret_cast(vec_p) + vec_offset; + resT *res = reinterpret_cast(res_p) + res_offset; + + const auto &dev = exec_q.get_device(); + const auto &sg_sizes = dev.get_info(); + // Get device-specific kernel info max_sub_group_size + std::size_t max_sgSize = + *(std::max_element(std::begin(sg_sizes), std::end(sg_sizes))); + + std::size_t n1_padded = n1 + max_sgSize; + auto padded_vec_owner = + dpctl::tensor::alloc_utils::smart_malloc_device(n1_padded, + exec_q); + argT2 *padded_vec = padded_vec_owner.get(); + + sycl::event make_padded_vec_ev = + dpctl::tensor::kernels::elementwise_detail::populate_padded_vector< + argT2>(exec_q, vec, n1, padded_vec, n1_padded, depends); + + // sub-group spans work-items [I, I + sgSize) + // base = ndit.get_global_linear_id() - sg.get_local_id()[0] + // Generically, sub_group_load( &mat[base]) may load arrays from + // different rows of mat. The start corresponds to row (base / n0) + // We read sub_group_load(&padded_vec[(base / n0)]). + // The vector is padded to ensure that reads are accessible + + const std::size_t lws = 128; + + sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(make_padded_vec_ev); + + auto lwsRange = sycl::range<1>(lws); + std::size_t n_elems = n0 * n1; + std::size_t n_groups = (n_elems + lws - 1) / lws; + auto gwsRange = sycl::range<1>(n_groups * lws); + + using Impl = + BinaryContigMatrixContigRowBroadcastFunctorT; + + cgh.parallel_for>( + sycl::nd_range<1>(gwsRange, lwsRange), + Impl(mat, padded_vec, res, n_elems, n1)); + }); + + sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free( + exec_q, {comp_ev}, padded_vec_owner); + + host_tasks.push_back(tmp_cleanup_ev); + + return comp_ev; +} + +template + class BinaryContigRowContigMatrixBroadcastFunctorT, + template + class kernel_name> +sycl::event binary_contig_row_contig_matrix_broadcast_impl( + sycl::queue &exec_q, + std::vector &host_tasks, + std::size_t n0, + std::size_t n1, + const char *vec_p, // typeless pointer to (n1,) contiguous row + ssize_t vec_offset, + const char *mat_p, // typeless pointer to (n0, n1) C-contiguous matrix + ssize_t mat_offset, + char *res_p, // typeless pointer to (n0, n1) result C-contig. matrix, + // res[i,j] = op(vec[j], mat[i,j]) + ssize_t res_offset, + const std::vector &depends = {}) +{ + const argT1 *vec = reinterpret_cast(vec_p) + vec_offset; + const argT2 *mat = reinterpret_cast(mat_p) + mat_offset; + resT *res = reinterpret_cast(res_p) + res_offset; + + const auto &dev = exec_q.get_device(); + const auto &sg_sizes = dev.get_info(); + // Get device-specific kernel info max_sub_group_size + std::size_t max_sgSize = + *(std::max_element(std::begin(sg_sizes), std::end(sg_sizes))); + + std::size_t n1_padded = n1 + max_sgSize; + auto padded_vec_owner = + dpctl::tensor::alloc_utils::smart_malloc_device(n1_padded, + exec_q); + argT2 *padded_vec = padded_vec_owner.get(); + + sycl::event make_padded_vec_ev = + dpctl::tensor::kernels::elementwise_detail::populate_padded_vector< + argT2>(exec_q, vec, n1, padded_vec, n1_padded, depends); + + // sub-group spans work-items [I, I + sgSize) + // base = ndit.get_global_linear_id() - sg.get_local_id()[0] + // Generically, sub_group_load( &mat[base]) may load arrays from + // different rows of mat. The start corresponds to row (base / n0) + // We read sub_group_load(&padded_vec[(base / n0)]). The vector is + // padded to ensure that reads are accessible + + const std::size_t lws = 128; + + sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(make_padded_vec_ev); + + auto lwsRange = sycl::range<1>(lws); + std::size_t n_elems = n0 * n1; + std::size_t n_groups = (n_elems + lws - 1) / lws; + auto gwsRange = sycl::range<1>(n_groups * lws); + + using Impl = + BinaryContigRowContigMatrixBroadcastFunctorT; + + cgh.parallel_for>( + sycl::nd_range<1>(gwsRange, lwsRange), + Impl(padded_vec, mat, res, n_elems, n1)); + }); + + sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free( + exec_q, {comp_ev}, padded_vec_owner); + + host_tasks.push_back(tmp_cleanup_ev); + + return comp_ev; +}; +} // namespace dpctl::tensor::kernels::elementwise_common diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common_detail.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common_detail.hpp new file mode 100644 index 000000000000..b304b5ac3a39 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common_detail.hpp @@ -0,0 +1,70 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines common code for elementwise tensor operations. +//===---------------------------------------------------------------------===// + +#pragma once + +#include +#include + +#include + +namespace dpctl::tensor::kernels::elementwise_detail +{ +template +class populate_padded_vec_krn; + +template +sycl::event + populate_padded_vector(sycl::queue &exec_q, + const T *vec, + std::size_t vec_sz, + T *padded_vec, + size_t padded_vec_sz, + const std::vector &dependent_events) +{ + sycl::event populate_padded_vec_ev = exec_q.submit([&](sycl::handler &cgh) { + // ensure vec contains actual data + cgh.depends_on(dependent_events); + + sycl::range<1> gRange{padded_vec_sz}; + + cgh.parallel_for>( + gRange, [=](sycl::id<1> id) + { + std::size_t i = id[0]; + padded_vec[i] = vec[i % vec_sz]; + }); + }); + + return populate_padded_vec_ev; +} +} // namespace dpctl::tensor::kernels::elementwise_detail diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp new file mode 100644 index 000000000000..8565df2cf528 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -0,0 +1,268 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines kernels for elementwise evaluation of LOGADDEXP(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +#include "common.hpp" +#include "vec_size_util.hpp" + +#include "utils/math_utils.hpp" +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" + +namespace dpctl::tensor::kernels::logaddexp +{ +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +using dpctl::tensor::type_utils::is_complex; +using dpctl::tensor::type_utils::vec_cast; + +template +struct LogAddExpFunctor +{ + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + using dpctl::tensor::math_utils::logaddexp; + return logaddexp(in1, in2); + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + sycl::vec res; + auto diff = in1 - in2; // take advantange of faster vec arithmetic + +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + if (std::isfinite(diff[i])) { + res[i] = std::max(in1[i], in2[i]) + + impl_finite(-sycl::fabs(diff[i])); + } + else { + using dpctl::tensor::math_utils::logaddexp; + res[i] = logaddexp(in1[i], in2[i]); + } + } + + return res; + } + +private: + template + T impl_finite(T const &in) const + { + return (in > 0) ? (in + sycl::log1p(sycl::exp(-in))) + : sycl::log1p(sycl::exp(in)); + } +}; + +template +using LogAddExpContigFunctor = elementwise_common::BinaryContigFunctor< + argT1, + argT2, + resT, + LogAddExpFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using LogAddExpStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + LogAddExpFunctor>; + +template +struct LogAddExpOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct LogAddExpContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class logaddexp_contig_kernel; + +template +sycl::event logaddexp_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using LogAddExpHS = + hyperparam_detail::LogAddExpContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = LogAddExpHS::vec_sz; + static constexpr std::uint8_t n_vecs = LogAddExpHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, LogAddExpOutputType, LogAddExpContigFunctor, + logaddexp_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template +struct LogAddExpContigFactory +{ + fnT get() + { + if constexpr (!LogAddExpOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = logaddexp_contig_impl; + return fn; + } + } +}; + +template +struct LogAddExpTypeMapFactory +{ + /*! @brief get typeid for output type of logaddexp(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename LogAddExpOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class logaddexp_strided_kernel; + +template +sycl::event + logaddexp_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, LogAddExpOutputType, LogAddExpStridedFunctor, + logaddexp_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct LogAddExpStridedFactory +{ + fnT get() + { + if constexpr (!LogAddExpOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = logaddexp_strided_impl; + return fn; + } + } +}; + +template +class logaddexp_matrix_row_broadcast_sg_krn; + +} // namespace dpctl::tensor::kernels::logaddexp diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp new file mode 100644 index 000000000000..067ccd84f059 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp @@ -0,0 +1,322 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines kernels for elementwise evaluation of MAXIMUM(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include "common.hpp" +#include "vec_size_util.hpp" + +#include "utils/math_utils.hpp" +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" + +namespace dpctl::tensor::kernels::maximum +{ +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct MaximumFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::conjunction< + std::is_same, + std::negation, + tu_ns::is_complex>>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (tu_ns::is_complex::value || + tu_ns::is_complex::value) + { + static_assert(std::is_same_v); + using dpctl::tensor::math_utils::max_complex; + return max_complex(in1, in2); + } + else if constexpr (std::is_floating_point_v || + std::is_same_v) + { + const bool choose_first = (sycl::isnan(in1) || (in1 > in2)); + return (choose_first) ? in1 : in2; + } + else { + return (in1 > in2) ? in1 : in2; + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + sycl::vec res; +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + const auto &v1 = in1[i]; + const auto &v2 = in2[i]; + if constexpr (std::is_floating_point_v || + std::is_same_v) + { + const bool choose_first = (sycl::isnan(v1) || (v1 > v2)); + res[i] = (choose_first) ? v1 : v2; + } + else { + res[i] = (v1 > v2) ? v1 : v2; + } + } + return res; + } +}; + +template +using MaximumContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using MaximumStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + MaximumFunctor>; + +template +struct MaximumOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct MaximumContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class maximum_contig_kernel; + +template +sycl::event maximum_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using MaxHS = + hyperparam_detail::MaximumContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = MaxHS::vec_sz; + static constexpr std::uint8_t n_vecs = MaxHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, MaximumOutputType, MaximumContigFunctor, + maximum_contig_kernel, vec_sz, n_vecs>(exec_q, nelems, arg1_p, + arg1_offset, arg2_p, arg2_offset, + res_p, res_offset, depends); +} + +template +struct MaximumContigFactory +{ + fnT get() + { + if constexpr (!MaximumOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = maximum_contig_impl; + return fn; + } + } +}; + +template +struct MaximumTypeMapFactory +{ + /*! @brief get typeid for output type of maximum(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename MaximumOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class maximum_strided_kernel; + +template +sycl::event + maximum_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, MaximumOutputType, MaximumStridedFunctor, + maximum_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct MaximumStridedFactory +{ + fnT get() + { + if constexpr (!MaximumOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = maximum_strided_impl; + return fn; + } + } +}; +} // namespace dpctl::tensor::kernels::maximum diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp new file mode 100644 index 000000000000..a38945f89a25 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp @@ -0,0 +1,321 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines kernels for elementwise evaluation of MINIMUM(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include + +#include + +#include "common.hpp" +#include "vec_size_util.hpp" + +#include "utils/math_utils.hpp" +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" + +namespace dpctl::tensor::kernels::minimum +{ +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct MinimumFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::conjunction< + std::is_same, + std::negation, + tu_ns::is_complex>>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (tu_ns::is_complex::value || + tu_ns::is_complex::value) + { + static_assert(std::is_same_v); + using dpctl::tensor::math_utils::min_complex; + return min_complex(in1, in2); + } + else if constexpr (std::is_floating_point_v || + std::is_same_v) + { + const bool choose_first = sycl::isnan(in1) || (in1 < in2); + return (choose_first) ? in1 : in2; + } + else { + return (in1 < in2) ? in1 : in2; + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + sycl::vec res; +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + const auto &v1 = in1[i]; + const auto &v2 = in2[i]; + if constexpr (std::is_floating_point_v || + std::is_same_v) + { + const bool choose_first = sycl::isnan(v1) || (v1 < v2); + res[i] = (choose_first) ? v1 : v2; + } + else { + res[i] = (v1 < v2) ? v1 : v2; + } + } + return res; + } +}; + +template +using MinimumContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using MinimumStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + MinimumFunctor>; + +template +struct MinimumOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct MinimumContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class minimum_contig_kernel; + +template +sycl::event minimum_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using MinHS = + hyperparam_detail::MinimumContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = MinHS::vec_sz; + static constexpr std::uint8_t n_vecs = MinHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, MinimumOutputType, MinimumContigFunctor, + minimum_contig_kernel, vec_sz, n_vecs>(exec_q, nelems, arg1_p, + arg1_offset, arg2_p, arg2_offset, + res_p, res_offset, depends); +} + +template +struct MinimumContigFactory +{ + fnT get() + { + if constexpr (!MinimumOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = minimum_contig_impl; + return fn; + } + } +}; + +template +struct MinimumTypeMapFactory +{ + /*! @brief get typeid for output type of minimum(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename MinimumOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class minimum_strided_kernel; + +template +sycl::event + minimum_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, MinimumOutputType, MinimumStridedFunctor, + minimum_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct MinimumStridedFactory +{ + fnT get() + { + if constexpr (!MinimumOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = minimum_strided_impl; + return fn; + } + } +}; +} // namespace dpctl::tensor::kernels::minimum diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sycl_complex.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sycl_complex.hpp new file mode 100644 index 000000000000..5cadec6ce2a4 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sycl_complex.hpp @@ -0,0 +1,44 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines a macro for defining the SYCL_EXT_ONEAPI_COMPLEX macro +/// and indirect inclusion of the experimental oneAPI SYCL complex extension +/// header file. +//===---------------------------------------------------------------------===// + +#pragma once + +#define SYCL_EXT_ONEAPI_COMPLEX +#if __has_include() +#include +#else +#include +#endif + +namespace exprm_ns = sycl::ext::oneapi::experimental; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/vec_size_util.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/vec_size_util.hpp new file mode 100644 index 000000000000..bdbc7e50cc86 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/vec_size_util.hpp @@ -0,0 +1,70 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines utilities for selection of hyperparameters for kernels +/// implementing unary and binary elementwise functions for contiguous inputs +//===---------------------------------------------------------------------===// + +#pragma once + +#include +#include + +namespace dpctl::tensor::kernels::vec_size_utils +{ +template +struct BinaryContigHyperparameterSetEntry + : std::conjunction, std::is_same> +{ + static constexpr std::uint8_t vec_sz = vec_sz_v; + static constexpr std::uint8_t n_vecs = n_vecs_v; +}; + +template +struct UnaryContigHyperparameterSetEntry : std::is_same +{ + static constexpr std::uint8_t vec_sz = vec_sz_v; + static constexpr std::uint8_t n_vecs = n_vecs_v; +}; + +template +struct ContigHyperparameterSetDefault : std::true_type +{ + static constexpr std::uint8_t vec_sz = vec_sz_v; + static constexpr std::uint8_t n_vecs = n_vecs_v; +}; +} // namespace dpctl::tensor::kernels::vec_size_utils diff --git a/dpctl/tensor/libtensor/include/utils/indexing_utils.hpp b/dpctl/tensor/libtensor/include/utils/indexing_utils.hpp new file mode 100644 index 000000000000..d28c8174c39c --- /dev/null +++ b/dpctl/tensor/libtensor/include/utils/indexing_utils.hpp @@ -0,0 +1,153 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines utilities for handling out-of-bounds integer indices in +/// kernels that involve indexing operations, such as take, put, or advanced +/// tensor integer indexing. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include + +#include + +#include "kernels/dpctl_tensor_types.hpp" + +namespace dpctl::tensor::indexing_utils +{ +using dpctl::tensor::ssize_t; + +/* + * ssize_t for indices is a design choice, dpctl::tensor::usm_ndarray + * uses py::ssize_t for shapes and strides internally and Python uses + * py_ssize_t for sizes of e.g. lists. + */ + +template +struct WrapIndex +{ + static_assert(std::is_integral_v); + + ssize_t operator()(ssize_t max_item, IndT ind) const + { + ssize_t projected; + static constexpr ssize_t unit(1); + max_item = sycl::max(max_item, unit); + + static constexpr std::uintmax_t ind_max = + std::numeric_limits::max(); + static constexpr std::uintmax_t ssize_max = + std::numeric_limits::max(); + + if constexpr (std::is_signed_v) { + static constexpr std::intmax_t ind_min = + std::numeric_limits::min(); + static constexpr std::intmax_t ssize_min = + std::numeric_limits::min(); + + if constexpr (ind_max <= ssize_max && ind_min >= ssize_min) { + const ssize_t ind_ = static_cast(ind); + const ssize_t lb = -max_item; + const ssize_t ub = max_item - 1; + projected = sycl::clamp(ind_, lb, ub); + } + else { + const IndT lb = static_cast(-max_item); + const IndT ub = static_cast(max_item - 1); + projected = static_cast(sycl::clamp(ind, lb, ub)); + } + return (projected < 0) ? projected + max_item : projected; + } + else { + if constexpr (ind_max <= ssize_max) { + const ssize_t ind_ = static_cast(ind); + const ssize_t ub = max_item - 1; + projected = sycl::min(ind_, ub); + } + else { + const IndT ub = static_cast(max_item - 1); + projected = static_cast(sycl::min(ind, ub)); + } + return projected; + } + } +}; + +template +struct ClipIndex +{ + static_assert(std::is_integral_v); + + ssize_t operator()(ssize_t max_item, IndT ind) const + { + ssize_t projected; + static constexpr ssize_t unit(1); + max_item = sycl::max(max_item, unit); + + static constexpr std::uintmax_t ind_max = + std::numeric_limits::max(); + static constexpr std::uintmax_t ssize_max = + std::numeric_limits::max(); + if constexpr (std::is_signed_v) { + static constexpr std::intmax_t ind_min = + std::numeric_limits::min(); + static constexpr std::intmax_t ssize_min = + std::numeric_limits::min(); + + if constexpr (ind_max <= ssize_max && ind_min >= ssize_min) { + const ssize_t ind_ = static_cast(ind); + static constexpr ssize_t lb(0); + const ssize_t ub = max_item - 1; + projected = sycl::clamp(ind_, lb, ub); + } + else { + static constexpr IndT lb(0); + const IndT ub = static_cast(max_item - 1); + projected = static_cast(sycl::clamp(ind, lb, ub)); + } + } + else { + if constexpr (ind_max <= ssize_max) { + const ssize_t ind_ = static_cast(ind); + const ssize_t ub = max_item - 1; + projected = sycl::min(ind_, ub); + } + else { + const IndT ub = static_cast(max_item - 1); + projected = static_cast(sycl::min(ind, ub)); + } + } + return projected; + } +}; +} // namespace dpctl::tensor::indexing_utils diff --git a/dpctl/tensor/libtensor/include/utils/math_utils.hpp b/dpctl/tensor/libtensor/include/utils/math_utils.hpp new file mode 100644 index 000000000000..d35eff0074dc --- /dev/null +++ b/dpctl/tensor/libtensor/include/utils/math_utils.hpp @@ -0,0 +1,148 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines math utility functions. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include + +#include + +namespace dpctl::tensor::math_utils +{ +template +bool less_complex(const T &x1, const T &x2) +{ + using realT = typename T::value_type; + realT real1 = std::real(x1); + realT real2 = std::real(x2); + realT imag1 = std::imag(x1); + realT imag2 = std::imag(x2); + + return (real1 == real2) + ? (imag1 < imag2) + : (real1 < real2 && !std::isnan(imag1) && !std::isnan(imag2)); +} + +template +bool greater_complex(const T &x1, const T &x2) +{ + using realT = typename T::value_type; + realT real1 = std::real(x1); + realT real2 = std::real(x2); + realT imag1 = std::imag(x1); + realT imag2 = std::imag(x2); + + return (real1 == real2) + ? (imag1 > imag2) + : (real1 > real2 && !std::isnan(imag1) && !std::isnan(imag2)); +} + +template +bool less_equal_complex(const T &x1, const T &x2) +{ + using realT = typename T::value_type; + realT real1 = std::real(x1); + realT real2 = std::real(x2); + realT imag1 = std::imag(x1); + realT imag2 = std::imag(x2); + + return (real1 == real2) + ? (imag1 <= imag2) + : (real1 < real2 && !std::isnan(imag1) && !std::isnan(imag2)); +} + +template +bool greater_equal_complex(const T &x1, const T &x2) +{ + using realT = typename T::value_type; + realT real1 = std::real(x1); + realT real2 = std::real(x2); + realT imag1 = std::imag(x1); + realT imag2 = std::imag(x2); + + return (real1 == real2) + ? (imag1 >= imag2) + : (real1 > real2 && !std::isnan(imag1) && !std::isnan(imag2)); +} + +template +T max_complex(const T &x1, const T &x2) +{ + using realT = typename T::value_type; + realT real1 = std::real(x1); + realT real2 = std::real(x2); + realT imag1 = std::imag(x1); + realT imag2 = std::imag(x2); + + bool isnan_imag1 = std::isnan(imag1); + bool gt = (real1 == real2) + ? (imag1 > imag2) + : (real1 > real2 && !isnan_imag1 && !std::isnan(imag2)); + return (std::isnan(real1) || isnan_imag1 || gt) ? x1 : x2; +} + +template +T min_complex(const T &x1, const T &x2) +{ + using realT = typename T::value_type; + realT real1 = std::real(x1); + realT real2 = std::real(x2); + realT imag1 = std::imag(x1); + realT imag2 = std::imag(x2); + + bool isnan_imag1 = std::isnan(imag1); + bool lt = (real1 == real2) + ? (imag1 < imag2) + : (real1 < real2 && !isnan_imag1 && !std::isnan(imag2)); + return (std::isnan(real1) || isnan_imag1 || lt) ? x1 : x2; +} + +template +T logaddexp(T x, T y) +{ + if (x == y) { // handle signed infinities + const T log2 = sycl::log(T(2)); + return x + log2; + } + else { + const T tmp = x - y; + static constexpr T zero(0); + + return (tmp > zero) + ? (x + sycl::log1p(sycl::exp(-tmp))) + : ((tmp <= zero) ? y + sycl::log1p(sycl::exp(tmp)) + : std::numeric_limits::quiet_NaN()); + } +} +} // namespace dpctl::tensor::math_utils diff --git a/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp b/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp new file mode 100644 index 000000000000..3b1bc772b514 --- /dev/null +++ b/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp @@ -0,0 +1,157 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines utility to determine whether two arrays have memory +/// overlap. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +#include + +#include "dpctl4pybind11.hpp" + +/* @brief check for overlap of memory regions behind arrays. + +Presently assume that array occupies all bytes between smallest and largest +displaced elements. + +TODO: Write proper Frobenius solver to account for holes, e.g. + overlap( x_contig[::2], x_contig[1::2]) should give False, + while this implementation gives True. +*/ +namespace dpctl::tensor::overlap +{ +namespace py = pybind11; + +struct MemoryOverlap +{ + bool operator()(dpctl::tensor::usm_ndarray ar1, + dpctl::tensor::usm_ndarray ar2) const + { + const char *ar1_data = ar1.get_data(); + + const auto &ar1_offsets = ar1.get_minmax_offsets(); + py::ssize_t ar1_elem_size = + static_cast(ar1.get_elemsize()); + + const char *ar2_data = ar2.get_data(); + const auto &ar2_offsets = ar2.get_minmax_offsets(); + py::ssize_t ar2_elem_size = + static_cast(ar2.get_elemsize()); + + /* Memory of array1 extends from */ + /* [ar1_data + ar1_offsets.first * ar1_elem_size, ar1_data + + * ar1_offsets.second * ar1_elem_size + ar1_elem_size] */ + /* Memory of array2 extends from */ + /* [ar2_data + ar2_offsets.first * ar2_elem_size, ar2_data + + * ar2_offsets.second * ar2_elem_size + ar2_elem_size] */ + + /* Intervals [x0, x1] and [y0, y1] do not overlap if (x0 <= x1) && (y0 + * <= y1) + * && (x1 <=y0 || y1 <= x0 ) */ + /* Given that x0 <= x1 and y0 <= y1 are true by construction, the + * condition for overlap us (x1 > y0) && (y1 > x0) */ + + /* Applying: + (ar1_data + ar1_offsets.second * ar1_elem_size + ar1_elem_size > + ar2_data + + ar2_offsets.first * ar2_elem_size) && (ar2_data + ar2_offsets.second * + ar2_elem_size + ar2_elem_size > ar1_data + ar1_offsets.first * + ar1_elem_size) + */ + + auto byte_distance = static_cast(ar2_data - ar1_data); + + py::ssize_t x1_minus_y0 = + (-byte_distance + + (ar1_elem_size + (ar1_offsets.second * ar1_elem_size) - + (ar2_offsets.first * ar2_elem_size))); + + py::ssize_t y1_minus_x0 = + (byte_distance + + (ar2_elem_size + (ar2_offsets.second * ar2_elem_size) - + (ar1_offsets.first * ar1_elem_size))); + + bool memory_overlap = (x1_minus_y0 > 0) && (y1_minus_x0 > 0); + + return memory_overlap; + } +}; + +struct SameLogicalTensors +{ + bool operator()(dpctl::tensor::usm_ndarray ar1, + dpctl::tensor::usm_ndarray ar2) const + { + // Same ndim + int nd1 = ar1.get_ndim(); + if (nd1 != ar2.get_ndim()) + return false; + + // Same dtype + int tn1 = ar1.get_typenum(); + if (tn1 != ar2.get_typenum()) + return false; + + // Same pointer + const char *ar1_data = ar1.get_data(); + const char *ar2_data = ar2.get_data(); + + if (ar1_data != ar2_data) + return false; + + // Same shape and strides + const py::ssize_t *ar1_shape = ar1.get_shape_raw(); + const py::ssize_t *ar2_shape = ar2.get_shape_raw(); + + if (!std::equal(ar1_shape, ar1_shape + nd1, ar2_shape)) + return false; + + // Same shape and strides + auto const &ar1_strides = ar1.get_strides_vector(); + auto const &ar2_strides = ar2.get_strides_vector(); + + auto ar1_beg_it = std::begin(ar1_strides); + auto ar1_end_it = std::end(ar1_strides); + + auto ar2_beg_it = std::begin(ar2_strides); + + if (!std::equal(ar1_beg_it, ar1_end_it, ar2_beg_it)) + return false; + + // all checks passed: arrays are logical views + // into the same memory + return true; + } +}; +} // namespace dpctl::tensor::overlap diff --git a/dpctl/tensor/libtensor/include/utils/offset_utils.hpp b/dpctl/tensor/libtensor/include/utils/offset_utils.hpp new file mode 100644 index 000000000000..19664c3d4e12 --- /dev/null +++ b/dpctl/tensor/libtensor/include/utils/offset_utils.hpp @@ -0,0 +1,824 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines Indexer callable operator to compute element offset in +/// an array addressed by gloabl_id. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "kernels/dpctl_tensor_types.hpp" +#include "utils/strided_iters.hpp" +#include "utils/sycl_alloc_utils.hpp" + +namespace dpctl::tensor::offset_utils +{ +namespace detail +{ +struct sink_t +{ + sink_t(){}; + template + sink_t(T &&){}; +}; + +template +std::size_t __accumulate_size(std::size_t &s, V &&v) +{ + return s += v.size(); +} + +template +sink_t __appender(V &lhs, U &&rhs) +{ + lhs.insert(lhs.end(), rhs.begin(), rhs.end()); + return {}; +} + +template +std::vector concat(std::vector lhs, Vs &&...vs) +{ + std::size_t s = lhs.size(); + { + // limited scope ensures array is freed + [[maybe_unused]] sink_t tmp[] = {__accumulate_size(s, vs)..., 0}; + } + lhs.reserve(s); + { + // array of no-data objects ensures ordering of calls to the appender + [[maybe_unused]] sink_t tmp[] = { + __appender(lhs, std::forward(vs))..., 0}; + } + + return std::move(lhs); // prevent return-value optimization +} +} // namespace detail + +template +std::tuple, + std::size_t, + sycl::event> + device_allocate_and_pack(sycl::queue &q, + std::vector &host_task_events, + Vs &&...vs) +{ + + using dpctl::tensor::alloc_utils::usm_host_allocator; + + // memory transfer optimization, use USM-host for temporary speeds up + // transfer to device, especially on dGPUs + using usm_host_allocatorT = usm_host_allocator; + using shT = std::vector; + + usm_host_allocatorT usm_host_alloc(q); + shT empty{0, usm_host_alloc}; + shT packed_shape_strides = detail::concat(std::move(empty), vs...); + + auto packed_shape_strides_owner = + std::make_shared(std::move(packed_shape_strides)); + + auto sz = packed_shape_strides_owner->size(); + auto shape_strides_owner = + dpctl::tensor::alloc_utils::smart_malloc_device(sz, q); + indT *shape_strides = shape_strides_owner.get(); + + sycl::event copy_ev = + q.copy(packed_shape_strides_owner->data(), shape_strides, sz); + + sycl::event cleanup_host_task_ev = q.submit([&](sycl::handler &cgh) { + cgh.depends_on(copy_ev); + cgh.host_task([packed_shape_strides_owner = + std::move(packed_shape_strides_owner)] { + // increment shared pointer ref-count to keep it alive + // till copy operation completes; + }); + }); + host_task_events.push_back(cleanup_host_task_ev); + + return std::make_tuple(std::move(shape_strides_owner), sz, copy_ev); +} + +struct NoOpIndexer +{ + constexpr NoOpIndexer() {} + constexpr std::size_t operator()(std::size_t gid) const + { + return gid; + } +}; + +using dpctl::tensor::ssize_t; + +/* @brief Indexer with shape and strides arrays of same size are packed */ +struct StridedIndexer +{ + StridedIndexer(int _nd, + ssize_t _offset, + ssize_t const *_packed_shape_strides) + : nd(_nd), starting_offset(_offset), + shape_strides(_packed_shape_strides) + { + } + + ssize_t operator()(ssize_t gid) const + { + return compute_offset(gid); + } + + ssize_t operator()(std::size_t gid) const + { + return compute_offset(static_cast(gid)); + } + +private: + int nd; + ssize_t starting_offset; + ssize_t const *shape_strides; + + ssize_t compute_offset(ssize_t gid) const + { + using dpctl::tensor::strides::CIndexer_vector; + + CIndexer_vector _ind(nd); + ssize_t relative_offset(0); + _ind.get_displacement( + gid, + shape_strides, // shape ptr + shape_strides + nd, // strides ptr + relative_offset); + return starting_offset + relative_offset; + } +}; + +// ensure that indexer is device copyable +static_assert(sycl::is_device_copyable_v); + +/* @brief Indexer with shape, strides provided separately */ +struct UnpackedStridedIndexer +{ + UnpackedStridedIndexer(int _nd, + ssize_t _offset, + ssize_t const *_shape, + ssize_t const *_strides) + : nd(_nd), starting_offset(_offset), shape(_shape), strides(_strides) + { + } + + ssize_t operator()(ssize_t gid) const + { + return compute_offset(gid); + } + + ssize_t operator()(std::size_t gid) const + { + return compute_offset(static_cast(gid)); + } + +private: + int nd; + ssize_t starting_offset; + ssize_t const *shape; + ssize_t const *strides; + + ssize_t compute_offset(ssize_t gid) const + { + using dpctl::tensor::strides::CIndexer_vector; + + CIndexer_vector _ind(nd); + ssize_t relative_offset(0); + _ind.get_displacement( + gid, + shape, // shape ptr + strides, // strides ptr + relative_offset); + return starting_offset + relative_offset; + } +}; + +// ensure that indexer is device copyable +static_assert(sycl::is_device_copyable_v); + +struct Strided1DIndexer +{ + Strided1DIndexer(std::size_t _size) : offset{}, size(_size), step(1) {} + Strided1DIndexer(ssize_t _size) + : offset{}, size(static_cast(_size)), step(1) + { + } + Strided1DIndexer(std::size_t _size, ssize_t _step) + : offset{}, size(_size), step(_step) + { + } + Strided1DIndexer(std::size_t _size, std::size_t _step) + : offset{}, size(_size), step(static_cast(_step)) + { + } + Strided1DIndexer(ssize_t _size, ssize_t _step) + : offset{}, size(static_cast(_size)), step(_step) + { + } + Strided1DIndexer(ssize_t _offset, std::size_t _size, ssize_t _step) + : offset(_offset), size(_size), step(_step) + { + } + Strided1DIndexer(ssize_t _offset, std::size_t _size, std::size_t _step) + : offset(_offset), size(_size), step(static_cast(_step)) + { + } + Strided1DIndexer(ssize_t _offset, ssize_t _size, ssize_t _step) + : offset(_offset), size(static_cast(_size)), step(_step) + { + } + + ssize_t operator()(std::size_t gid) const + { + // ensure 0 <= gid < size + return offset + std::min(gid, size - 1) * step; + } + +private: + ssize_t offset = 0; + std::size_t size = 1; + ssize_t step = 1; +}; + +static_assert(sycl::is_device_copyable_v); + +struct Strided1DCyclicIndexer +{ + Strided1DCyclicIndexer(ssize_t _offset, ssize_t _size, ssize_t _step) + : offset(_offset), size(static_cast(_size)), step(_step) + { + } + + ssize_t operator()(std::size_t gid) const + { + return offset + (gid % size) * step; + } + +private: + ssize_t offset = 0; + std::size_t size = 1; + ssize_t step = 1; +}; + +static_assert(sycl::is_device_copyable_v); + +template +struct TwoOffsets +{ + constexpr TwoOffsets() : first_offset(0), second_offset(0) {} + constexpr TwoOffsets(const displacementT &first_offset_, + const displacementT &second_offset_) + : first_offset(first_offset_), second_offset(second_offset_) + { + } + + constexpr displacementT get_first_offset() const + { + return first_offset; + } + constexpr displacementT get_second_offset() const + { + return second_offset; + } + +private: + displacementT first_offset = 0; + displacementT second_offset = 0; +}; + +struct TwoOffsets_StridedIndexer +{ + TwoOffsets_StridedIndexer(int common_nd, + ssize_t first_offset_, + ssize_t second_offset_, + ssize_t const *_packed_shape_strides) + : nd(common_nd), starting_first_offset(first_offset_), + starting_second_offset(second_offset_), + shape_strides(_packed_shape_strides) + { + } + + TwoOffsets operator()(ssize_t gid) const + { + return compute_offsets(gid); + } + + TwoOffsets operator()(std::size_t gid) const + { + return compute_offsets(static_cast(gid)); + } + +private: + int nd; + ssize_t starting_first_offset; + ssize_t starting_second_offset; + ssize_t const *shape_strides; + + TwoOffsets compute_offsets(ssize_t gid) const + { + using dpctl::tensor::strides::CIndexer_vector; + + CIndexer_vector _ind(nd); + ssize_t relative_first_offset(0); + ssize_t relative_second_offset(0); + _ind.get_displacement( + gid, + shape_strides, // shape ptr + shape_strides + nd, // strides ptr + shape_strides + 2 * nd, // strides ptr + relative_first_offset, relative_second_offset); + return TwoOffsets( + starting_first_offset + relative_first_offset, + starting_second_offset + relative_second_offset); + } +}; + +struct TwoZeroOffsets_Indexer +{ + constexpr TwoZeroOffsets_Indexer() {} + + constexpr TwoOffsets operator()(ssize_t) const + { + return TwoOffsets(); + } +}; + +static_assert(sycl::is_device_copyable_v); + +template +struct TwoOffsets_CombinedIndexer +{ +private: + FirstIndexerT first_indexer_; + SecondIndexerT second_indexer_; + +public: + constexpr TwoOffsets_CombinedIndexer(const FirstIndexerT &first_indexer, + const SecondIndexerT &second_indexer) + : first_indexer_(first_indexer), second_indexer_(second_indexer) + { + } + + constexpr TwoOffsets operator()(ssize_t gid) const + { + return TwoOffsets(first_indexer_(gid), second_indexer_(gid)); + } +}; + +template +struct ThreeOffsets +{ + constexpr ThreeOffsets() + : first_offset(0), second_offset(0), third_offset(0) + { + } + constexpr ThreeOffsets(const displacementT &first_offset_, + const displacementT &second_offset_, + const displacementT &third_offset_) + : first_offset(first_offset_), second_offset(second_offset_), + third_offset(third_offset_) + { + } + + constexpr displacementT get_first_offset() const + { + return first_offset; + } + constexpr displacementT get_second_offset() const + { + return second_offset; + } + constexpr displacementT get_third_offset() const + { + return third_offset; + } + +private: + displacementT first_offset = 0; + displacementT second_offset = 0; + displacementT third_offset = 0; +}; + +struct ThreeOffsets_StridedIndexer +{ + ThreeOffsets_StridedIndexer(int common_nd, + ssize_t first_offset_, + ssize_t second_offset_, + ssize_t third_offset_, + ssize_t const *_packed_shape_strides) + : nd(common_nd), starting_first_offset(first_offset_), + starting_second_offset(second_offset_), + starting_third_offset(third_offset_), + shape_strides(_packed_shape_strides) + { + } + + ThreeOffsets operator()(ssize_t gid) const + { + return compute_offsets(gid); + } + + ThreeOffsets operator()(std::size_t gid) const + { + return compute_offsets(static_cast(gid)); + } + +private: + int nd; + ssize_t starting_first_offset; + ssize_t starting_second_offset; + ssize_t starting_third_offset; + ssize_t const *shape_strides; + + ThreeOffsets compute_offsets(ssize_t gid) const + { + using dpctl::tensor::strides::CIndexer_vector; + + CIndexer_vector _ind(nd); + ssize_t relative_first_offset(0); + ssize_t relative_second_offset(0); + ssize_t relative_third_offset(0); + _ind.get_displacement( + gid, + shape_strides, // shape ptr + shape_strides + nd, // strides ptr + shape_strides + 2 * nd, // strides ptr + shape_strides + 3 * nd, // strides ptr + relative_first_offset, relative_second_offset, + relative_third_offset); + return ThreeOffsets( + starting_first_offset + relative_first_offset, + starting_second_offset + relative_second_offset, + starting_third_offset + relative_third_offset); + } +}; + +static_assert(sycl::is_device_copyable_v); + +struct ThreeZeroOffsets_Indexer +{ + constexpr ThreeZeroOffsets_Indexer() {} + + constexpr ThreeOffsets operator()(ssize_t) const + { + return ThreeOffsets(); + } + + constexpr ThreeOffsets operator()(std::size_t) const + { + return ThreeOffsets(); + } +}; + +static_assert(sycl::is_device_copyable_v); + +template +struct ThreeOffsets_CombinedIndexer +{ +private: + FirstIndexerT first_indexer_; + SecondIndexerT second_indexer_; + ThirdIndexerT third_indexer_; + +public: + constexpr ThreeOffsets_CombinedIndexer(const FirstIndexerT &first_indexer, + const SecondIndexerT &second_indexer, + const ThirdIndexerT &third_indexer) + : first_indexer_(first_indexer), second_indexer_(second_indexer), + third_indexer_(third_indexer) + { + } + + constexpr ThreeOffsets operator()(ssize_t gid) const + { + return ThreeOffsets(first_indexer_(gid), second_indexer_(gid), + third_indexer_(gid)); + } +}; + +template +struct FourOffsets +{ + constexpr FourOffsets() + : first_offset(0), second_offset(0), third_offset(0), fourth_offset(0) + { + } + constexpr FourOffsets(const displacementT &first_offset_, + const displacementT &second_offset_, + const displacementT &third_offset_, + const displacementT &fourth_offset_) + : first_offset(first_offset_), second_offset(second_offset_), + third_offset(third_offset_), fourth_offset(fourth_offset_) + { + } + + constexpr displacementT get_first_offset() const + { + return first_offset; + } + constexpr displacementT get_second_offset() const + { + return second_offset; + } + constexpr displacementT get_third_offset() const + { + return third_offset; + } + constexpr displacementT get_fourth_offset() const + { + return fourth_offset; + } + +private: + displacementT first_offset = 0; + displacementT second_offset = 0; + displacementT third_offset = 0; + displacementT fourth_offset = 0; +}; + +struct FourOffsets_StridedIndexer +{ + constexpr FourOffsets_StridedIndexer(int common_nd, + ssize_t first_offset_, + ssize_t second_offset_, + ssize_t third_offset_, + ssize_t fourth_offset_, + ssize_t const *_packed_shape_strides) + : nd(common_nd), starting_first_offset(first_offset_), + starting_second_offset(second_offset_), + starting_third_offset(third_offset_), + starting_fourth_offset(fourth_offset_), + shape_strides(_packed_shape_strides) + { + } + + constexpr FourOffsets operator()(ssize_t gid) const + { + return compute_offsets(gid); + } + + constexpr FourOffsets operator()(std::size_t gid) const + { + return compute_offsets(static_cast(gid)); + } + +private: + int nd; + ssize_t starting_first_offset; + ssize_t starting_second_offset; + ssize_t starting_third_offset; + ssize_t starting_fourth_offset; + ssize_t const *shape_strides; + + FourOffsets compute_offsets(ssize_t gid) const + { + using dpctl::tensor::strides::CIndexer_vector; + + CIndexer_vector _ind(nd); + ssize_t relative_first_offset(0); + ssize_t relative_second_offset(0); + ssize_t relative_third_offset(0); + ssize_t relative_fourth_offset(0); + _ind.get_displacement( + gid, + shape_strides, // shape ptr + shape_strides + nd, // strides ptr + shape_strides + 2 * nd, // strides ptr + shape_strides + 3 * nd, // strides ptr + shape_strides + 4 * nd, // strides ptr + relative_first_offset, relative_second_offset, + relative_third_offset, relative_fourth_offset); + return FourOffsets( + starting_first_offset + relative_first_offset, + starting_second_offset + relative_second_offset, + starting_third_offset + relative_third_offset, + starting_fourth_offset + relative_fourth_offset); + } +}; + +static_assert(sycl::is_device_copyable_v); + +struct FourZeroOffsets_Indexer +{ + constexpr FourZeroOffsets_Indexer() {} + + constexpr FourOffsets operator()(ssize_t) const + { + return FourOffsets(); + } +}; + +static_assert(sycl::is_device_copyable_v); + +struct NthStrideOffset +{ + NthStrideOffset(int common_nd, + ssize_t const *_offsets, + ssize_t const *_packed_shape_strides) + : _ind(common_nd), nd(common_nd), offsets(_offsets), + shape_strides(_packed_shape_strides) + { + } + + std::size_t operator()(ssize_t gid, int n) const + { + ssize_t relative_offset(0); + _ind.get_displacement( + gid, shape_strides, shape_strides + ((n + 1) * nd), + relative_offset); + + return relative_offset + offsets[n]; + } + +private: + dpctl::tensor::strides::CIndexer_vector _ind; + + int nd; + ssize_t const *offsets; + ssize_t const *shape_strides; +}; + +static_assert(sycl::is_device_copyable_v); + +template +struct FixedDimStridedIndexer +{ + FixedDimStridedIndexer(const std::array &_shape, + const std::array &_strides, + ssize_t _offset) + : _ind(_shape), strides(_strides), starting_offset(_offset) + { + } + std::size_t operator()(std::size_t gid) const + { + dpctl::tensor::strides::CIndexer_array local_indexer( + std::move(_ind)); + local_indexer.set(gid); + auto mi = local_indexer.get(); + + ssize_t relative_offset = 0; + +#pragma unroll + for (int i = 0; i < nd; ++i) { + relative_offset += mi[i] * strides[i]; + } + return starting_offset + relative_offset; + } + +private: + dpctl::tensor::strides::CIndexer_array _ind; + + std::array strides; + ssize_t starting_offset; +}; + +static_assert(sycl::is_device_copyable_v>); + +template +struct TwoOffsets_FixedDimStridedIndexer +{ + TwoOffsets_FixedDimStridedIndexer(const std::array &_shape, + const std::array &_strides1, + const std::array &_strides2, + ssize_t _offset1, + ssize_t _offset2) + : _ind(_shape), strides1(_strides1), strides2(_strides2), + starting_offset1(_offset1), starting_offset2(_offset2) + { + } + + TwoOffsets operator()(std::size_t gid) const + { + dpctl::tensor::strides::CIndexer_array local_indexer( + std::move(_ind)); + local_indexer.set(gid); + auto mi = local_indexer.get(); + + ssize_t relative_offset1 = 0; +#pragma unroll + for (int i = 0; i < nd; ++i) { + relative_offset1 += mi[i] * strides1[i]; + } + + ssize_t relative_offset2 = 0; +#pragma unroll + for (int i = 0; i < nd; ++i) { + relative_offset2 += mi[i] * strides2[i]; + } + + return TwoOffsets(starting_offset1 + relative_offset1, + starting_offset2 + relative_offset2); + } + +private: + dpctl::tensor::strides::CIndexer_array _ind; + + std::array strides1; + std::array strides2; + ssize_t starting_offset1; + ssize_t starting_offset2; +}; + +static_assert(sycl::is_device_copyable_v>); + +template +struct ThreeOffsets_FixedDimStridedIndexer +{ + ThreeOffsets_FixedDimStridedIndexer( + const std::array &_shape, + const std::array &_strides1, + const std::array &_strides2, + const std::array &_strides3, + ssize_t _offset1, + ssize_t _offset2, + ssize_t _offset3) + : _ind(_shape), strides1(_strides1), strides2(_strides2), + strides3(_strides3), starting_offset1(_offset1), + starting_offset2(_offset2), starting_offset3(_offset3) + { + } + + ThreeOffsets operator()(std::size_t gid) const + { + dpctl::tensor::strides::CIndexer_array local_indexer( + std::move(_ind)); + local_indexer.set(gid); + auto mi = local_indexer.get(); + + ssize_t relative_offset1 = 0; +#pragma unroll + for (int i = 0; i < nd; ++i) { + relative_offset1 += mi[i] * strides1[i]; + } + + ssize_t relative_offset2 = 0; +#pragma unroll + for (int i = 0; i < nd; ++i) { + relative_offset2 += mi[i] * strides2[i]; + } + + ssize_t relative_offset3 = 0; +#pragma unroll + for (int i = 0; i < nd; ++i) { + relative_offset3 += mi[i] * strides3[i]; + } + + return ThreeOffsets(starting_offset1 + relative_offset1, + starting_offset2 + relative_offset2, + starting_offset3 + relative_offset3); + } + +private: + dpctl::tensor::strides::CIndexer_array _ind; + + std::array strides1; + std::array strides2; + std::array strides3; + ssize_t starting_offset1; + ssize_t starting_offset2; + ssize_t starting_offset3; +}; + +static_assert( + sycl::is_device_copyable_v>); +} // namespace dpctl::tensor::offset_utils diff --git a/dpctl/tensor/libtensor/include/utils/output_validation.hpp b/dpctl/tensor/libtensor/include/utils/output_validation.hpp new file mode 100644 index 000000000000..1397efdee230 --- /dev/null +++ b/dpctl/tensor/libtensor/include/utils/output_validation.hpp @@ -0,0 +1,79 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines utilities for determining if an array is a valid output +/// array. +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#include + +#include "dpctl4pybind11.hpp" + +namespace dpctl::tensor::validation +{ +namespace py = pybind11; + +/*! @brief Raises a value error if an array is read-only. + + This should be called with an array before writing.*/ +struct CheckWritable +{ + static void throw_if_not_writable(const dpctl::tensor::usm_ndarray &arr) + { + if (!arr.is_writable()) { + throw py::value_error("output array is read-only."); + } + return; + } +}; + +/*! @brief Raises a value error if an array's memory is not sufficiently ample + to accommodate an input number of elements. + + This should be called with an array before writing.*/ +struct AmpleMemory +{ + template + static void throw_if_not_ample(const dpctl::tensor::usm_ndarray &arr, + T nelems) + { + auto arr_offsets = arr.get_minmax_offsets(); + T range = static_cast(arr_offsets.second - arr_offsets.first); + if (range + 1 < nelems) { + throw py::value_error("Memory addressed by the output array is not " + "sufficiently ample."); + } + return; + } +}; +} // namespace dpctl::tensor::validation diff --git a/dpctl/tensor/libtensor/include/utils/strided_iters.hpp b/dpctl/tensor/libtensor/include/utils/strided_iters.hpp new file mode 100644 index 000000000000..0bed181802ae --- /dev/null +++ b/dpctl/tensor/libtensor/include/utils/strided_iters.hpp @@ -0,0 +1,996 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines CIndexer_array, and CIndexer_vector classes, as well +/// iteration space simplifiers. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace dpctl::tensor::strides +{ +/* An N-dimensional array can be stored in a single + * contiguous chunk of memory by contiguously laying + * array elements in lexicographinc order of their + * array indices. Such a layout is called C-contiguous. + * + * E.g. for (2, 3, 2) array `a` with zero-based indexing convention + * the C-array's elements are + * { a[0,0,0], a[0,0,1], a[0,1,0], a[0,1,1], a[0,2,0], a[0,2,1], + * a[1,0,0], a[1,0,1], a[1,1,0], a[1,1,1], a[1,2,0], a[1,2,1] } + * + * Indexer maps zero-based index in C-array to a multi-index + * for the purpose of computing element displacement in the + * strided array, i.e. in the above example for k = 5, the displacement + * is (s0*0 + s1*2 + s2*1), and for k = 7 it is (s0*1 + s1*0 + s2*1) + * for N-dimensional array with strides (s0, s1, s2). + * + * Cindexer_vector need not know array rank `dim` at compile time. + * Shape and strides are stored in std::vector, which are not trivially + * copyable. + * + * For the class to be trivially copyable for offloading displacement + * computation methods take accessor/pointer arguments of type T for + * shape and stride and modify displacement argument passed by reference. + */ +template +class CIndexer_vector +{ + static_assert(std::is_integral::value, "Integral type is required"); + static_assert(std::is_signed::value, + "Signed integral type is required"); + int nd; + +public: + CIndexer_vector(int dim) : nd(dim) {} + + template + indT size(const ShapeTy &shape) const + { + indT s = static_cast(1); + for (int i = 0; i < nd; ++i) { + s *= shape[i]; + } + return s; + } + + template + void get_displacement(const indT i, + const ShapeTy &shape, + const StridesTy &stride, + indT &disp) const + { + if (nd == 1) { + disp = i * stride[0]; + return; + } + + indT i_ = i; + indT d = 0; + for (int dim = nd; --dim > 0;) { + const indT si = shape[dim]; + const indT q = i_ / si; + const indT r = (i_ - q * si); + d += r * stride[dim]; + i_ = q; + } + disp = d + i_ * stride[0]; + } + + template + void get_displacement(const indT i, + const ShapeTy &shape, + const StridesTy &stride1, + const StridesTy &stride2, + indT &disp1, + indT &disp2) const + { + if (nd == 1) { + disp1 = i * stride1[0]; + disp2 = i * stride2[0]; + return; + } + + indT i_ = i; + indT d1 = 0, d2 = 0; + for (int dim = nd; --dim > 0;) { + const indT si = shape[dim]; + const indT q = i_ / si; + const indT r = (i_ - q * si); + i_ = q; + d1 += r * stride1[dim]; + d2 += r * stride2[dim]; + } + disp1 = d1 + i_ * stride1[0]; + disp2 = d2 + i_ * stride2[0]; + return; + } + + template + void get_displacement(const indT i, + const ShapeTy &shape, + const StridesTy &stride1, + const StridesTy &stride2, + const StridesTy &stride3, + indT &disp1, + indT &disp2, + indT &disp3) const + { + if (nd == 1) { + disp1 = i * stride1[0]; + disp2 = i * stride2[0]; + disp3 = i * stride3[0]; + return; + } + + indT i_ = i; + indT d1 = 0, d2 = 0, d3 = 0; + for (int dim = nd; --dim > 0;) { + const indT si = shape[dim]; + const indT q = i_ / si; + const indT r = (i_ - q * si); + i_ = q; + d1 += r * stride1[dim]; + d2 += r * stride2[dim]; + d3 += r * stride3[dim]; + }; + disp1 = d1 + i_ * stride1[0]; + disp2 = d2 + i_ * stride2[0]; + disp3 = d3 + i_ * stride3[0]; + return; + } + + template + void get_displacement(const indT i, + const ShapeTy &shape, + const StridesTy &stride1, + const StridesTy &stride2, + const StridesTy &stride3, + const StridesTy &stride4, + indT &disp1, + indT &disp2, + indT &disp3, + indT &disp4) const + { + if (nd == 1) { + disp1 = i * stride1[0]; + disp2 = i * stride2[0]; + disp3 = i * stride3[0]; + disp4 = i * stride4[0]; + return; + } + + indT i_ = i; + indT d1 = 0, d2 = 0, d3 = 0, d4 = 0; + for (int dim = nd; --dim > 0;) { + const indT si = shape[dim]; + const indT q = i_ / si; + const indT r = (i_ - q * si); + i_ = q; + d1 += r * stride1[dim]; + d2 += r * stride2[dim]; + d3 += r * stride3[dim]; + d4 += r * stride4[dim]; + } + disp1 = d1 + i_ * stride1[0]; + disp2 = d2 + i_ * stride2[0]; + disp3 = d3 + i_ * stride3[0]; + disp4 = d4 + i_ * stride4[0]; + return; + } + + template + void get_displacement(const indT i, + const ShapeTy &shape, + const std::array &strides, + std::array &disps) const + { + if (nd == 1) { + for (int k = 0; k < nstrides; ++k) { + disps[k] = i * strides[k][0]; + } + return; + } + + indT i_ = i; + std::array ds; + for (int k = 0; k < nstrides; ++k) { + ds[k] = 0; + } + + for (int dim = nd; --dim > 0;) { + const indT si = shape[dim]; + const indT q = i_ / si; + const indT r = (i_ - q * si); + for (int k = 0; k < nstrides; ++k) { + ds[k] += r * strides[k][dim]; + } + i_ = q; + }; + for (int k = 0; k < nstrides; ++k) { + disps[k] = ds[k] + i_ * strides[k][0]; + } + return; + } + + template + void get_left_rolled_displacement(const indT i, + const ShapeTy &shape, + const StridesTy &stride, + const StridesTy &shifts, + indT &disp) const + { + indT i_ = i; + indT d(0); + for (int dim = nd; --dim > 0;) { + const indT si = shape[dim]; + const indT q = i_ / si; + const indT r = (i_ - q * si); + // assumes si > shifts[dim] >= 0 + const indT shifted_r = + (r < shifts[dim] ? r + si - shifts[dim] : r - shifts[dim]); + d += shifted_r * stride[dim]; + i_ = q; + } + const indT shifted_r = + (i_ < shifts[0] ? i_ + shape[0] - shifts[0] : i_ - shifts[0]); + disp = d + shifted_r * stride[0]; + } +}; + +/* + * CIndexer is for arrays whose array-rank is known at compile time. + * Statically allocated shape and multi_index arrays are members of + * the class instance, and it remains trivially copyable. + * + * Method `set(k)` populates work-item private array multi_index, which + * can be accessed using `get()` to compute the displacement as needed. + */ + +template +class CIndexer_array +{ + static constexpr int ndim = _ndim; + + static_assert(std::is_integral::value, "Integral type is required"); + static_assert(std::is_signed::value, + "Signed integral type is required"); + static_assert(ndim > 0, "Dimensionality must be positive"); + +private: + typedef std::array index_t; + + indT elem_count; + index_t shape; + index_t multi_index; + +public: + CIndexer_array() : elem_count(0), shape{}, multi_index{} {} + + explicit CIndexer_array(const index_t &input_shape) + : elem_count(0), shape{}, multi_index{} + { + indT s(1); + for (int i = 0; i < ndim; ++i) { + shape[i] = input_shape[i]; + s *= input_shape[i]; + } + elem_count = s; + } + + indT size() const + { + return elem_count; + } + indT rank() const + { + return ndim; + } + + void set(const indT i) + { + if (ndim == 1) { + multi_index[0] = i; + return; + } + + indT i_ = i; +#pragma unroll + for (int dim = ndim; --dim > 0;) { + indT si = shape[dim]; + indT q = i_ / si; + multi_index[dim] = i_ - q * si; + i_ = q; + } + multi_index[0] = i_; + } + + const index_t &get() const + { + return multi_index; + } +}; + +/* + For purposes of iterating over elements of array with + `shape` and `strides` given as pointers + `simplify_iteration_strides(nd, shape_ptr, strides_ptr, disp)` + may modify memory and returns new length of these arrays. + + The new shape and new strides, as well as the offset + `(new_shape, new_strides, disp)` are such that iterating over + them will traverse the same elements, possibly in + different order. + + ..Example: python + import itertools + # for some array Y over whose elements we iterate + csh, cst, cp = contract_iter(Y.shape, Y.strides) + def pointers_set(sh, st, p): + citers = itertools.product(*map(lambda s: range(s), sh)) + dot = lambda st, it: sum(st[k]*it[k] for k in range(len(st))) + return set(p + dot(st, it) for it in citers) + ps1 = pointers_set(csh, cst, cp) + ps2 = pointers_set(Y.shape, Y.strides, 0) + assert ps1 == ps2 + + */ +template +int simplify_iteration_stride(const int nd, + ShapeTy *shape, + StridesTy *strides, + StridesTy &disp) +{ + disp = StridesTy(0); + if (nd < 2) + return nd; + + std::vector pos(nd); + std::iota(pos.begin(), pos.end(), 0); + + std::stable_sort( + pos.begin(), pos.end(), [&strides, &shape](int i1, int i2) { + auto abs_str1 = (strides[i1] < 0) ? -strides[i1] : strides[i1]; + auto abs_str2 = (strides[i2] < 0) ? -strides[i2] : strides[i2]; + return (abs_str1 > abs_str2) || + (abs_str1 == abs_str2 && shape[i1] > shape[i2]); + }); + + std::vector shape_w; + std::vector strides_w; + int nd_ = nd; + shape_w.reserve(nd_); + strides_w.reserve(nd_); + + for (int i = 0; i < nd; ++i) { + auto p = pos[i]; + auto sh_p = shape[p]; + auto str_p = strides[p]; + shape_w.push_back(sh_p); + if (str_p < 0) { + disp += str_p * (sh_p - 1); + str_p = -str_p; + } + strides_w.push_back(str_p); + } + + { + bool changed; + do { + changed = false; + for (int i = 0; i + 1 < nd_; ++i) { + StridesTy step = strides_w[i + 1]; + StridesTy jump = strides_w[i] - (shape_w[i + 1] - 1) * step; + if (jump == step) { + changed = true; + for (int k = i; k + 1 < nd_; ++k) { + strides_w[k] = strides_w[k + 1]; + } + shape_w[i] *= shape_w[i + 1]; + for (int k = i + 1; k + 1 < nd_; ++k) { + shape_w[k] = shape_w[k + 1]; + } + --nd_; + } + } + } while (changed); + } + + for (int i = 0; i < nd_; ++i) { + shape[i] = shape_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides[i] = strides_w[i]; + } + + return nd_; +} + +/* + For purposes of iterating over pairs of elements of two arrays + with `shape` and strides `strides1`, `strides2` given as pointers + `simplify_iteration_two_strides(nd, shape_ptr, strides1_ptr, + strides2_ptr, disp1, disp2)` + may modify memory and returns new length of these arrays. + + The new shape and new strides, as well as the offset + `(new_shape, new_strides1, disp1, new_stride2, disp2)` are such that + iterating over them will traverse the same set of pairs of elements, + possibly in a different order. + */ +template +int simplify_iteration_two_strides(const int nd, + ShapeTy *shape, + StridesTy *strides1, + StridesTy *strides2, + StridesTy &disp1, + StridesTy &disp2) +{ + disp1 = StridesTy(0); + disp2 = StridesTy(0); + if (nd < 2) + return nd; + + std::vector pos(nd); + std::iota(pos.begin(), pos.end(), 0); + + std::stable_sort( + pos.begin(), pos.end(), [&strides1, &strides2, &shape](int i1, int i2) { + auto abs_str1_i1 = + (strides1[i1] < 0) ? -strides1[i1] : strides1[i1]; + auto abs_str1_i2 = + (strides1[i2] < 0) ? -strides1[i2] : strides1[i2]; + auto abs_str2_i1 = + (strides2[i1] < 0) ? -strides2[i1] : strides2[i1]; + auto abs_str2_i2 = + (strides2[i2] < 0) ? -strides2[i2] : strides2[i2]; + return (abs_str2_i1 > abs_str2_i2) || + (abs_str2_i1 == abs_str2_i2 && + (abs_str1_i1 > abs_str1_i2 || + (abs_str1_i1 == abs_str1_i2 && shape[i1] > shape[i2]))); + }); + + std::vector shape_w; + std::vector strides1_w; + std::vector strides2_w; + + bool contractable = true; + for (int i = 0; i < nd; ++i) { + auto p = pos[i]; + auto sh_p = shape[p]; + auto str1_p = strides1[p]; + auto str2_p = strides2[p]; + shape_w.push_back(sh_p); + if (str1_p <= 0 && str2_p <= 0 && std::min(str1_p, str2_p) < 0) { + disp1 += str1_p * (sh_p - 1); + str1_p = -str1_p; + disp2 += str2_p * (sh_p - 1); + str2_p = -str2_p; + } + if (str1_p < 0 || str2_p < 0) { + contractable = false; + } + strides1_w.push_back(str1_p); + strides2_w.push_back(str2_p); + } + + int nd_ = nd; + while (contractable) { + bool changed = false; + for (int i = 0; i + 1 < nd_; ++i) { + StridesTy str1 = strides1_w[i + 1]; + StridesTy str2 = strides2_w[i + 1]; + StridesTy jump1 = strides1_w[i] - (shape_w[i + 1] - 1) * str1; + StridesTy jump2 = strides2_w[i] - (shape_w[i + 1] - 1) * str2; + + if (jump1 == str1 && jump2 == str2) { + changed = true; + shape_w[i] *= shape_w[i + 1]; + for (int j = i; j < nd_; ++j) { + strides1_w[j] = strides1_w[j + 1]; + } + for (int j = i; j < nd_; ++j) { + strides2_w[j] = strides2_w[j + 1]; + } + for (int j = i + 1; j + 1 < nd_; ++j) { + shape_w[j] = shape_w[j + 1]; + } + --nd_; + break; + } + } + if (!changed) + break; + } + for (int i = 0; i < nd_; ++i) { + shape[i] = shape_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides1[i] = strides1_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides2[i] = strides2_w[i]; + } + + return nd_; +} + +template > +std::tuple contract_iter(const vecT &shape, const vecT &strides) +{ + const std::size_t dim = shape.size(); + if (dim != strides.size()) { + throw Error("Shape and strides must be of equal size."); + } + vecT out_shape = shape; + vecT out_strides = strides; + T disp(0); + + int nd = simplify_iteration_stride(dim, out_shape.data(), + out_strides.data(), disp); + out_shape.resize(nd); + out_strides.resize(nd); + return std::make_tuple(out_shape, out_strides, disp); +} + +template > +std::tuple contract_iter2(const vecT &shape, + const vecT &strides1, + const vecT &strides2) +{ + const std::size_t dim = shape.size(); + if (dim != strides1.size() || dim != strides2.size()) { + throw Error("Shape and strides must be of equal size."); + } + vecT out_shape = shape; + vecT out_strides1 = strides1; + vecT out_strides2 = strides2; + T disp1(0); + T disp2(0); + + int nd = simplify_iteration_two_strides(dim, out_shape.data(), + out_strides1.data(), + out_strides2.data(), disp1, disp2); + out_shape.resize(nd); + out_strides1.resize(nd); + out_strides2.resize(nd); + return std::make_tuple(out_shape, out_strides1, disp1, out_strides2, disp2); +} + +/* + For purposes of iterating over pairs of elements of three arrays + with `shape` and strides `strides1`, `strides2`, `strides3` given as + pointers `simplify_iteration_three_strides(nd, shape_ptr, strides1_ptr, + strides2_ptr, strides3_ptr, disp1, disp2, disp3)` + may modify memory and returns new length of these arrays. + + The new shape and new strides, as well as the offset + `(new_shape, new_strides1, disp1, new_stride2, disp2, new_stride3, disp3)` + are such that iterating over them will traverse the same set of tuples of + elements, possibly in a different order. + */ +template +int simplify_iteration_three_strides(const int nd, + ShapeTy *shape, + StridesTy *strides1, + StridesTy *strides2, + StridesTy *strides3, + StridesTy &disp1, + StridesTy &disp2, + StridesTy &disp3) +{ + disp1 = StridesTy(0); + disp2 = StridesTy(0); + if (nd < 2) + return nd; + + std::vector pos(nd); + std::iota(pos.begin(), pos.end(), 0); + + std::stable_sort(pos.begin(), pos.end(), + [&strides1, &strides2, &strides3, &shape](int i1, int i2) { + auto abs_str1_i1 = + (strides1[i1] < 0) ? -strides1[i1] : strides1[i1]; + auto abs_str1_i2 = + (strides1[i2] < 0) ? -strides1[i2] : strides1[i2]; + auto abs_str2_i1 = + (strides2[i1] < 0) ? -strides2[i1] : strides2[i1]; + auto abs_str2_i2 = + (strides2[i2] < 0) ? -strides2[i2] : strides2[i2]; + auto abs_str3_i1 = + (strides3[i1] < 0) ? -strides3[i1] : strides3[i1]; + auto abs_str3_i2 = + (strides3[i2] < 0) ? -strides3[i2] : strides3[i2]; + return (abs_str3_i1 > abs_str3_i2) || + ((abs_str3_i1 == abs_str3_i2) && + ((abs_str2_i1 > abs_str2_i2) || + ((abs_str2_i1 == abs_str2_i2) && + ((abs_str1_i1 > abs_str1_i2) || + ((abs_str1_i1 == abs_str1_i2) && + (shape[i1] > shape[i2])))))); + }); + + std::vector shape_w; + std::vector strides1_w; + std::vector strides2_w; + std::vector strides3_w; + + bool contractable = true; + for (int i = 0; i < nd; ++i) { + auto p = pos[i]; + auto sh_p = shape[p]; + auto str1_p = strides1[p]; + auto str2_p = strides2[p]; + auto str3_p = strides3[p]; + shape_w.push_back(sh_p); + if (str1_p <= 0 && str2_p <= 0 && str3_p <= 0 && + std::min({str1_p, str2_p, str3_p}) < 0) + { + disp1 += str1_p * (sh_p - 1); + str1_p = -str1_p; + disp2 += str2_p * (sh_p - 1); + str2_p = -str2_p; + disp3 += str3_p * (sh_p - 1); + str3_p = -str3_p; + } + if (str1_p < 0 || str2_p < 0 || str3_p < 0) { + contractable = false; + } + strides1_w.push_back(str1_p); + strides2_w.push_back(str2_p); + strides3_w.push_back(str3_p); + } + int nd_ = nd; + while (contractable) { + bool changed = false; + for (int i = 0; i + 1 < nd_; ++i) { + StridesTy str1 = strides1_w[i + 1]; + StridesTy str2 = strides2_w[i + 1]; + StridesTy str3 = strides3_w[i + 1]; + StridesTy jump1 = strides1_w[i] - (shape_w[i + 1] - 1) * str1; + StridesTy jump2 = strides2_w[i] - (shape_w[i + 1] - 1) * str2; + StridesTy jump3 = strides3_w[i] - (shape_w[i + 1] - 1) * str3; + + if (jump1 == str1 && jump2 == str2 && jump3 == str3) { + changed = true; + shape_w[i] *= shape_w[i + 1]; + for (int j = i; j < nd_; ++j) { + strides1_w[j] = strides1_w[j + 1]; + } + for (int j = i; j < nd_; ++j) { + strides2_w[j] = strides2_w[j + 1]; + } + for (int j = i; j < nd_; ++j) { + strides3_w[j] = strides3_w[j + 1]; + } + for (int j = i + 1; j + 1 < nd_; ++j) { + shape_w[j] = shape_w[j + 1]; + } + --nd_; + break; + } + } + if (!changed) + break; + } + for (int i = 0; i < nd_; ++i) { + shape[i] = shape_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides1[i] = strides1_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides2[i] = strides2_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides3[i] = strides3_w[i]; + } + + return nd_; +} + +template > +std::tuple contract_iter3(const vecT &shape, + const vecT &strides1, + const vecT &strides2, + const vecT &strides3) +{ + const std::size_t dim = shape.size(); + if (dim != strides1.size() || dim != strides2.size() || + dim != strides3.size()) { + throw Error("Shape and strides must be of equal size."); + } + vecT out_shape = shape; + vecT out_strides1 = strides1; + vecT out_strides2 = strides2; + vecT out_strides3 = strides3; + T disp1(0); + T disp2(0); + T disp3(0); + + int nd = simplify_iteration_three_strides( + dim, out_shape.data(), out_strides1.data(), out_strides2.data(), + out_strides3.data(), disp1, disp2, disp3); + out_shape.resize(nd); + out_strides1.resize(nd); + out_strides2.resize(nd); + out_strides3.resize(nd); + return std::make_tuple(out_shape, out_strides1, disp1, out_strides2, disp2, + out_strides3, disp3); +} + +/* + For purposes of iterating over pairs of elements of four arrays + with `shape` and strides `strides1`, `strides2`, `strides3`, + `strides4` given as pointers `simplify_iteration_four_strides(nd, + shape_ptr, strides1_ptr, strides2_ptr, strides3_ptr, strides4_ptr, + disp1, disp2, disp3, disp4)` may modify memory and returns new + length of these arrays. + + The new shape and new strides, as well as the offset + `(new_shape, new_strides1, disp1, new_stride2, disp2, new_stride3, disp3, + new_stride4, disp4)` are such that iterating over them will traverse the + same set of tuples of elements, possibly in a different order. + */ +template +int simplify_iteration_four_strides(const int nd, + ShapeTy *shape, + StridesTy *strides1, + StridesTy *strides2, + StridesTy *strides3, + StridesTy *strides4, + StridesTy &disp1, + StridesTy &disp2, + StridesTy &disp3, + StridesTy &disp4) +{ + disp1 = StridesTy(0); + disp2 = StridesTy(0); + if (nd < 2) + return nd; + + std::vector pos(nd); + std::iota(pos.begin(), pos.end(), 0); + + std::stable_sort( + pos.begin(), pos.end(), + [&strides1, &strides2, &strides3, &strides4, &shape](int i1, int i2) { + auto abs_str1_i1 = + (strides1[i1] < 0) ? -strides1[i1] : strides1[i1]; + auto abs_str1_i2 = + (strides1[i2] < 0) ? -strides1[i2] : strides1[i2]; + auto abs_str2_i1 = + (strides2[i1] < 0) ? -strides2[i1] : strides2[i1]; + auto abs_str2_i2 = + (strides2[i2] < 0) ? -strides2[i2] : strides2[i2]; + auto abs_str3_i1 = + (strides3[i1] < 0) ? -strides3[i1] : strides3[i1]; + auto abs_str3_i2 = + (strides3[i2] < 0) ? -strides3[i2] : strides3[i2]; + auto abs_str4_i1 = + (strides4[i1] < 0) ? -strides4[i1] : strides4[i1]; + auto abs_str4_i2 = + (strides4[i2] < 0) ? -strides4[i2] : strides4[i2]; + return (abs_str4_i1 > abs_str4_i2) || + ((abs_str4_i1 == abs_str4_i2) && + ((abs_str3_i1 > abs_str3_i2) || + ((abs_str3_i1 == abs_str3_i2) && + ((abs_str2_i1 > abs_str2_i2) || + ((abs_str2_i1 == abs_str2_i2) && + ((abs_str1_i1 > abs_str1_i2) || + ((abs_str1_i1 == abs_str1_i2) && + (shape[i1] > shape[i2])))))))); + }); + + std::vector shape_w; + std::vector strides1_w; + std::vector strides2_w; + std::vector strides3_w; + std::vector strides4_w; + + bool contractable = true; + for (int i = 0; i < nd; ++i) { + auto p = pos[i]; + auto sh_p = shape[p]; + auto str1_p = strides1[p]; + auto str2_p = strides2[p]; + auto str3_p = strides3[p]; + auto str4_p = strides4[p]; + shape_w.push_back(sh_p); + if (str1_p <= 0 && str2_p <= 0 && str3_p <= 0 && str4_p <= 0 && + std::min({str1_p, str2_p, str3_p, str4_p}) < 0) + { + disp1 += str1_p * (sh_p - 1); + str1_p = -str1_p; + disp2 += str2_p * (sh_p - 1); + str2_p = -str2_p; + disp3 += str3_p * (sh_p - 1); + str3_p = -str3_p; + disp4 += str4_p * (sh_p - 1); + str4_p = -str4_p; + } + if (str1_p < 0 || str2_p < 0 || str3_p < 0 || str4_p < 0) { + contractable = false; + } + strides1_w.push_back(str1_p); + strides2_w.push_back(str2_p); + strides3_w.push_back(str3_p); + strides4_w.push_back(str4_p); + } + int nd_ = nd; + while (contractable) { + bool changed = false; + for (int i = 0; i + 1 < nd_; ++i) { + StridesTy str1 = strides1_w[i + 1]; + StridesTy str2 = strides2_w[i + 1]; + StridesTy str3 = strides3_w[i + 1]; + StridesTy str4 = strides4_w[i + 1]; + StridesTy jump1 = strides1_w[i] - (shape_w[i + 1] - 1) * str1; + StridesTy jump2 = strides2_w[i] - (shape_w[i + 1] - 1) * str2; + StridesTy jump3 = strides3_w[i] - (shape_w[i + 1] - 1) * str3; + StridesTy jump4 = strides4_w[i] - (shape_w[i + 1] - 1) * str4; + + if (jump1 == str1 && jump2 == str2 && jump3 == str3 && + jump4 == str4) { + changed = true; + shape_w[i] *= shape_w[i + 1]; + for (int j = i; j < nd_; ++j) { + strides1_w[j] = strides1_w[j + 1]; + } + for (int j = i; j < nd_; ++j) { + strides2_w[j] = strides2_w[j + 1]; + } + for (int j = i; j < nd_; ++j) { + strides3_w[j] = strides3_w[j + 1]; + } + for (int j = i; j < nd_; ++j) { + strides4_w[j] = strides4_w[j + 1]; + } + for (int j = i + 1; j + 1 < nd_; ++j) { + shape_w[j] = shape_w[j + 1]; + } + --nd_; + break; + } + } + if (!changed) + break; + } + for (int i = 0; i < nd_; ++i) { + shape[i] = shape_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides1[i] = strides1_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides2[i] = strides2_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides3[i] = strides3_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides4[i] = strides4_w[i]; + } + + return nd_; +} + +template > +std::tuple + contract_iter4(const vecT &shape, + const vecT &strides1, + const vecT &strides2, + const vecT &strides3, + const vecT &strides4) +{ + const std::size_t dim = shape.size(); + if (dim != strides1.size() || dim != strides2.size() || + dim != strides3.size() || dim != strides4.size()) + { + throw Error("Shape and strides must be of equal size."); + } + vecT out_shape = shape; + vecT out_strides1 = strides1; + vecT out_strides2 = strides2; + vecT out_strides3 = strides3; + vecT out_strides4 = strides4; + T disp1(0); + T disp2(0); + T disp3(0); + T disp4(0); + + int nd = simplify_iteration_four_strides( + dim, out_shape.data(), out_strides1.data(), out_strides2.data(), + out_strides3.data(), out_strides4.data(), disp1, disp2, disp3, disp4); + out_shape.resize(nd); + out_strides1.resize(nd); + out_strides2.resize(nd); + out_strides3.resize(nd); + out_strides4.resize(nd); + return std::make_tuple(out_shape, out_strides1, disp1, out_strides2, disp2, + out_strides3, disp3, out_strides4, disp4); +} + +/* + For purposes of iterating over elements of an array with `shape` and + strides `strides` given as pointers `compact_iteration(nd, shape, strides)` + may modify memory and returns the new length of the array. + + The new shape and new strides `(new_shape, new_strides)` are such that + iterating over them will traverse the same elements in the same order, + possibly with reduced dimensionality. + */ +template +int compact_iteration(const int nd, ShapeTy *shape, StridesTy *strides) +{ + if (nd < 2) + return nd; + + bool contractable = true; + for (int i = 0; i < nd; ++i) { + if (strides[i] < 0) { + contractable = false; + } + } + + int nd_ = nd; + while (contractable) { + bool changed = false; + for (int i = 0; i + 1 < nd_; ++i) { + StridesTy str = strides[i + 1]; + StridesTy jump = strides[i] - (shape[i + 1] - 1) * str; + + if (jump == str) { + changed = true; + shape[i] *= shape[i + 1]; + for (int j = i; j < nd_; ++j) { + strides[j] = strides[j + 1]; + } + for (int j = i + 1; j + 1 < nd_; ++j) { + shape[j] = shape[j + 1]; + } + --nd_; + break; + } + } + if (!changed) + break; + } + + return nd_; +} +} // namespace dpctl::tensor::strides diff --git a/dpctl/tensor/libtensor/include/utils/sycl_alloc_utils.hpp b/dpctl/tensor/libtensor/include/utils/sycl_alloc_utils.hpp new file mode 100644 index 000000000000..76f0174b9fdf --- /dev/null +++ b/dpctl/tensor/libtensor/include/utils/sycl_alloc_utils.hpp @@ -0,0 +1,223 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines CIndexer_array, and CIndexer_vector classes, as well +/// iteration space simplifiers. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace dpctl::tensor::alloc_utils +{ +template +class usm_host_allocator : public sycl::usm_allocator +{ +public: + using baseT = sycl::usm_allocator; + using baseT::baseT; + + template + struct rebind + { + typedef usm_host_allocator other; + }; + + void deallocate(T *ptr, std::size_t n) + { + try { + baseT::deallocate(ptr, n); + } catch (const std::exception &e) { + std::cerr + << "Exception caught in `usm_host_allocator::deallocate`: " + << e.what() << std::endl; + } + } +}; + +template +void sycl_free_noexcept(T *ptr, const sycl::context &ctx) noexcept +{ + try { + sycl::free(ptr, ctx); + } catch (const std::exception &e) { + std::cerr << "Call to sycl::free caught exception: " << e.what() + << std::endl; + } +} + +template +void sycl_free_noexcept(T *ptr, const sycl::queue &q) noexcept +{ + sycl_free_noexcept(ptr, q.get_context()); +} + +class USMDeleter +{ +private: + sycl::context ctx_; + +public: + USMDeleter(const sycl::queue &q) : ctx_(q.get_context()) {} + USMDeleter(const sycl::context &ctx) : ctx_(ctx) {} + + template + void operator()(T *ptr) const + { + sycl_free_noexcept(ptr, ctx_); + } +}; + +template +std::unique_ptr + smart_malloc(std::size_t count, + const sycl::queue &q, + sycl::usm::alloc kind, + const sycl::property_list &propList = {}) +{ + T *ptr = sycl::malloc(count, q, kind, propList); + if (nullptr == ptr) { + throw std::runtime_error("Unable to allocate device_memory"); + } + + auto usm_deleter = USMDeleter(q); + return std::unique_ptr(ptr, usm_deleter); +} + +template +std::unique_ptr + smart_malloc_device(std::size_t count, + const sycl::queue &q, + const sycl::property_list &propList = {}) +{ + return smart_malloc(count, q, sycl::usm::alloc::device, propList); +} + +template +std::unique_ptr + smart_malloc_shared(std::size_t count, + const sycl::queue &q, + const sycl::property_list &propList = {}) +{ + return smart_malloc(count, q, sycl::usm::alloc::shared, propList); +} + +template +std::unique_ptr + smart_malloc_host(std::size_t count, + const sycl::queue &q, + const sycl::property_list &propList = {}) +{ + return smart_malloc(count, q, sycl::usm::alloc::host, propList); +} + +namespace detail +{ +template +struct valid_smart_ptr : public std::false_type +{ +}; + +template +struct valid_smart_ptr &> + : public std::is_same +{ +}; + +template +struct valid_smart_ptr> + : public std::is_same +{ +}; + +// base case +template +struct all_valid_smart_ptrs +{ + static constexpr bool value = true; +}; + +template +struct all_valid_smart_ptrs +{ + static constexpr bool value = valid_smart_ptr::value && + (all_valid_smart_ptrs::value); +}; +} // end of namespace detail + +/*! @brief Submit host_task and transfer ownership from smart pointers to it */ +template +sycl::event async_smart_free(sycl::queue &exec_q, + const std::vector &depends, + UniquePtrTs &&...unique_pointers) +{ + static constexpr std::size_t n = sizeof...(UniquePtrTs); + static_assert( + n > 0, "async_smart_free requires at least one smart pointer argument"); + + static_assert( + detail::all_valid_smart_ptrs::value, + "async_smart_free requires unique_ptr created with smart_malloc"); + + std::vector ptrs; + ptrs.reserve(n); + (ptrs.push_back(reinterpret_cast(unique_pointers.get())), ...); + + std::vector dels; + dels.reserve(n); + (dels.emplace_back(unique_pointers.get_deleter()), ...); + + sycl::event ht_e = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + cgh.host_task([ptrs = std::move(ptrs), dels = std::move(dels)]() { + for (std::size_t i = 0; i < ptrs.size(); ++i) { + dels[i](ptrs[i]); + } + }); + }); + + // Upon successful submission of host_task, USM allocations are owned + // by the host_task. Release smart pointer ownership to avoid double + // deallocation + (unique_pointers.release(), ...); + + return ht_e; +} +} // namespace dpctl::tensor::alloc_utils diff --git a/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp b/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp new file mode 100644 index 000000000000..1cb70adafeec --- /dev/null +++ b/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp @@ -0,0 +1,662 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines utilities used for kernel submission. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +#include "math_utils.hpp" + +namespace dpctl::tensor::sycl_utils +{ +namespace detail +{ +template +struct TypeList; + +template +struct TypeList +{ + using head = Head; + using tail = TypeList; +}; + +using NullTypeList = TypeList<>; +template +struct IsNullTypeList : std::conditional_t, + std::true_type, + std::false_type> +{ +}; + +// recursively check if type is contained in given TypeList +template +struct IsContained + : std::conditional_t< + std::is_same_v>, + std::true_type, + IsContained> +{ +}; + +template <> +struct TypeList<> +{ +}; + +// std::false_type when last case has been checked for membership +template +struct IsContained : std::false_type +{ +}; + +template +struct IsComplex : std::false_type +{ +}; +template +struct IsComplex> : std::true_type +{ +}; +} // namespace detail + +template +using sycl_ops = detail::TypeList, + sycl::bit_or, + sycl::bit_xor, + sycl::bit_and, + sycl::maximum, + sycl::minimum, + sycl::multiplies>; + +template +struct IsSyclOp +{ + static constexpr bool value = + detail::IsContained>>::value || + detail::IsContained>>::value; +}; + +/*! @brief Find the smallest multiple of supported sub-group size larger than + * nelems */ +template +std::size_t choose_workgroup_size(const std::size_t nelems, + const std::vector &sg_sizes) +{ + std::vector wg_choices; + wg_choices.reserve(f * sg_sizes.size()); + + for (const auto &sg_size : sg_sizes) { +#pragma unroll + for (std::size_t i = 1; i <= f; ++i) { + wg_choices.push_back(sg_size * i); + } + } + std::sort(std::begin(wg_choices), std::end(wg_choices)); + + std::size_t wg = 1; + for (std::size_t i = 0; i < wg_choices.size(); ++i) { + if (wg_choices[i] == wg) { + continue; + } + wg = wg_choices[i]; + std::size_t n_groups = ((nelems + wg - 1) / wg); + if (n_groups == 1) + break; + } + + return wg; +} + +namespace detail +{ + +template +void _fold(LocAccT &local_mem_acc, + const std::uint32_t lid, + const std::uint32_t cutoff, + const std::uint32_t step, + const OpT &op) +{ + if (lid < cutoff) { + local_mem_acc[lid] = op(local_mem_acc[lid], local_mem_acc[step + lid]); + } +} + +template +void _fold(LocAccT &local_mem_acc, + const std::uint32_t lid, + const std::uint32_t step, + const OpT &op) +{ + if (lid < step) { + local_mem_acc[lid] = op(local_mem_acc[lid], local_mem_acc[step + lid]); + } +} + +} // end of namespace detail + +template +T custom_reduce_over_group(const GroupT &wg, + LocAccT local_mem_acc, + const T &local_val, + const OpT &op) +{ + // value experimentally tuned to achieve best runtime on Iris Xe, + // Arc A140V integrated Intel GPUs, and discrete Intel Max GPU. + static constexpr std::uint32_t low_sz = 8u; + // maximal work-group size + static constexpr std::uint32_t high_sz = 1024u; + const std::uint32_t wgs = wg.get_local_linear_range(); + const std::uint32_t lid = wg.get_local_linear_id(); + + local_mem_acc[lid] = local_val; + sycl::group_barrier(wg, sycl::memory_scope::work_group); + + std::uint32_t n_witems = wgs; + if (wgs & (wgs - 1)) { + // wgs is not a power of 2 +#pragma unroll + for (std::uint32_t sz = high_sz; sz >= low_sz; sz >>= 1) { + if (n_witems >= sz) { + const std::uint32_t n_witems_ = (n_witems + 1) >> 1; + detail::_fold(local_mem_acc, lid, n_witems - n_witems_, + n_witems_, op); + sycl::group_barrier(wg, sycl::memory_scope::work_group); + n_witems = n_witems_; + } + } + } + else { + // wgs is a power of 2 +#pragma unroll + for (std::uint32_t sz = high_sz; sz >= low_sz; sz >>= 1) { + if (n_witems >= sz) { + n_witems >>= 1; + detail::_fold(local_mem_acc, lid, n_witems, op); + sycl::group_barrier(wg, sycl::memory_scope::work_group); + } + } + } + + T red_val_over_wg = local_mem_acc[0]; + if (wg.leader()) { + for (std::uint32_t i = 1; i < n_witems; ++i) { + red_val_over_wg = op(red_val_over_wg, local_mem_acc[i]); + } + } + + return sycl::group_broadcast(wg, red_val_over_wg, 0); +} + +template +T custom_inclusive_scan_over_group(GroupT &&wg, + SubGroupT &&sg, + LocAccT &&local_mem_acc, + const T &local_val, + const T &identity, + OpT &&op) +{ + const std::uint32_t local_id = wg.get_local_id(0); + const std::uint32_t wgs = wg.get_local_range(0); + + const std::uint32_t lane_id = sg.get_local_id()[0]; + const std::uint32_t sgSize = sg.get_local_range()[0]; + + T scan_val = local_val; + for (std::uint32_t step = 1; step < sgSize; step *= 2) { + const bool advanced_lane = (lane_id >= step); + const std::uint32_t src_lane_id = + (advanced_lane ? lane_id - step : lane_id); + const T modifier = sycl::select_from_group(sg, scan_val, src_lane_id); + if (advanced_lane) { + scan_val = op(scan_val, modifier); + } + } + + local_mem_acc[local_id] = scan_val; + sycl::group_barrier(wg, sycl::memory_scope::work_group); + + const std::uint32_t max_sgSize = sg.get_max_local_range()[0]; + const std::uint32_t sgr_id = sg.get_group_id()[0]; + + // now scan + const std::uint32_t n_aggregates = 1 + ((wgs - 1) / max_sgSize); + const bool large_wg = (n_aggregates > max_sgSize); + if (large_wg) { + if (wg.leader()) { + T _scan_val = identity; + for (std::uint32_t i = 1; i <= n_aggregates - max_sgSize; ++i) { + _scan_val = op(local_mem_acc[i * max_sgSize - 1], _scan_val); + local_mem_acc[i * max_sgSize - 1] = _scan_val; + } + } + sycl::group_barrier(wg, sycl::memory_scope::work_group); + } + + if (sgr_id == 0) { + const std::uint32_t offset = + (large_wg) ? n_aggregates - max_sgSize : 0u; + const bool in_range = (lane_id < n_aggregates); + const bool in_bounds = in_range && (lane_id > 0 || large_wg); + + T __scan_val = (in_bounds) + ? local_mem_acc[(offset + lane_id) * max_sgSize - 1] + : identity; + for (std::uint32_t step = 1; step < sgSize; step *= 2) { + const bool advanced_lane = (lane_id >= step); + const std::uint32_t src_lane_id = + (advanced_lane ? lane_id - step : lane_id); + const T modifier = + sycl::select_from_group(sg, __scan_val, src_lane_id); + if (advanced_lane && in_range) { + __scan_val = op(__scan_val, modifier); + } + } + if (in_bounds) { + local_mem_acc[(offset + lane_id) * max_sgSize - 1] = __scan_val; + } + } + sycl::group_barrier(wg, sycl::memory_scope::work_group); + + if (sgr_id > 0) { + const T modifier = local_mem_acc[sgr_id * max_sgSize - 1]; + scan_val = op(scan_val, modifier); + } + + // ensure all work-items finished reading from SLM + sycl::group_barrier(wg, sycl::memory_scope::work_group); + + return scan_val; +} + +// Reduction functors + +// Maximum + +template +struct Maximum +{ + T operator()(const T &x, const T &y) const + { + if constexpr (detail::IsComplex::value) { + using dpctl::tensor::math_utils::max_complex; + return max_complex(x, y); + } + else if constexpr (std::is_floating_point_v || + std::is_same_v) { + return (std::isnan(x) || x > y) ? x : y; + } + else if constexpr (std::is_same_v) { + return x || y; + } + else { + return (x > y) ? x : y; + } + } +}; + +// Minimum + +template +struct Minimum +{ + T operator()(const T &x, const T &y) const + { + if constexpr (detail::IsComplex::value) { + using dpctl::tensor::math_utils::min_complex; + return min_complex(x, y); + } + else if constexpr (std::is_floating_point_v || + std::is_same_v) { + return (std::isnan(x) || x < y) ? x : y; + } + else if constexpr (std::is_same_v) { + return x && y; + } + else { + return (x < y) ? x : y; + } + } +}; + +// Define identities and operator checking structs + +template +struct GetIdentity +{ +}; + +// Maximum + +template +using IsMaximum = std::bool_constant> || + std::is_same_v>>; + +template +using IsSyclMaximum = std::bool_constant>>; + +template +struct GetIdentity::value>> +{ + static constexpr T value = + static_cast(std::numeric_limits::has_infinity + ? static_cast(-std::numeric_limits::infinity()) + : std::numeric_limits::lowest()); +}; + +template +struct GetIdentity::value>> +{ + static constexpr bool value = false; +}; + +template +struct GetIdentity, + std::enable_if_t, Op>::value>> +{ + static constexpr std::complex value{-std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}; +}; + +// Minimum + +template +using IsMinimum = std::bool_constant> || + std::is_same_v>>; + +template +using IsSyclMinimum = std::bool_constant>>; + +template +struct GetIdentity::value>> +{ + static constexpr T value = + static_cast(std::numeric_limits::has_infinity + ? static_cast(std::numeric_limits::infinity()) + : std::numeric_limits::max()); +}; + +template +struct GetIdentity::value>> +{ + static constexpr bool value = true; +}; + +template +struct GetIdentity, + std::enable_if_t, Op>::value>> +{ + static constexpr std::complex value{std::numeric_limits::infinity(), + std::numeric_limits::infinity()}; +}; + +// Plus + +template +using IsPlus = std::bool_constant> || + std::is_same_v>>; + +template +using IsSyclPlus = std::bool_constant>>; + +// Multiplies + +template +using IsMultiplies = + std::bool_constant> || + std::is_same_v>>; + +template +using IsSyclMultiplies = + std::bool_constant>>; + +template +struct GetIdentity::value>> +{ + static constexpr T value = static_cast(1); +}; + +// LogSumExp + +template +struct LogSumExp +{ + T operator()(const T &x, const T &y) const + { + using dpctl::tensor::math_utils::logaddexp; + return logaddexp(x, y); + } +}; + +template +using IsLogSumExp = std::bool_constant>>; + +// only defined for types with infinity +template +struct GetIdentity::value>> +{ + static constexpr T value = -std::numeric_limits::infinity(); +}; + +// Hypot + +template +struct Hypot +{ + T operator()(const T &x, const T &y) const + { + return sycl::hypot(x, y); + } +}; + +template +using IsHypot = std::bool_constant>>; + +template +struct GetIdentity::value>> +{ + static constexpr T value = 0; +}; + +// Logical_And + +template +using IsLogicalAnd = + std::bool_constant> || + std::is_same_v>>; + +template +using IsSyclLogicalAnd = + std::bool_constant>>; + +template +struct GetIdentity::value>> +{ + static constexpr T value = static_cast(1); +}; + +// Logical_Or + +template +using IsLogicalOr = + std::bool_constant> || + std::is_same_v>>; + +template +using IsSyclLogicalOr = + std::bool_constant>>; + +template +struct GetIdentity::value>> +{ + static constexpr T value = static_cast(0); +}; + +// Identity + +template +struct Identity +{ +}; + +template +using UseBuiltInIdentity = + std::conjunction, sycl::has_known_identity>; + +template +struct Identity::value>> +{ + static constexpr T value = GetIdentity::value; +}; + +template +struct Identity::value>> +{ + static constexpr T value = sycl::known_identity::value; +}; + +// Sub-group load/store + +#ifndef USE_GROUP_LOAD_STORE +#if defined(SYCL_EXT_ONEAPI_GROUP_LOAD_STORE) && \ + SYCL_EXT_ONEAPI_GROUP_LOAD_STORE +#define USE_GROUP_LOAD_STORE 1 +#else +#if defined(__LIBSYCL_MAJOR_VERSION) && (__LIBSYCL_MAJOR_VERSION >= 8u) +#define USE_GROUP_LOAD_STORE 1 +#else +#define USE_GROUP_LOAD_STORE 0 +#endif +#endif +#endif + +#if (USE_GROUP_LOAD_STORE) +namespace ls_ns = sycl::ext::oneapi::experimental; +#endif + +template +auto sub_group_load(const sycl::sub_group &sg, + sycl::multi_ptr m_ptr) +{ +#if (USE_GROUP_LOAD_STORE) + using ValueT = typename std::remove_cv_t; + sycl::vec x{}; + static constexpr auto striped = + ls_ns::properties{ls_ns::data_placement_striped}; + ls_ns::group_load(sg, m_ptr, x, striped); + return x; +#else + return sg.load(m_ptr); +#endif +} + +template +auto sub_group_load(const sycl::sub_group &sg, + sycl::multi_ptr m_ptr) +{ +#if (USE_GROUP_LOAD_STORE) + using ValueT = typename std::remove_cv_t; + ValueT x{}; + static constexpr auto striped = + ls_ns::properties{ls_ns::data_placement_striped}; + ls_ns::group_load(sg, m_ptr, x, striped); + return x; +#else + return sg.load(m_ptr); +#endif +} + +template +std::enable_if_t< + std::is_same_v, std::remove_cv_t>, + void> + sub_group_store(const sycl::sub_group &sg, + const sycl::vec &val, + sycl::multi_ptr m_ptr) +{ +#if (USE_GROUP_LOAD_STORE) + static_assert(std::is_same_v); + static constexpr auto striped = + ls_ns::properties{ls_ns::data_placement_striped}; + ls_ns::group_store(sg, val, m_ptr, striped); + return; +#else + sg.store(m_ptr, val); + return; +#endif +} + +template +std::enable_if_t< + std::is_same_v, std::remove_cv_t>, + void> + sub_group_store(const sycl::sub_group &sg, + const VecT &val, + sycl::multi_ptr m_ptr) +{ +#if (USE_GROUP_LOAD_STORE) + static constexpr auto striped = + ls_ns::properties{ls_ns::data_placement_striped}; + ls_ns::group_store(sg, val, m_ptr, striped); + return; +#else + sg.store(m_ptr, val); + return; +#endif +} +} // namespace dpctl::tensor::sycl_utils diff --git a/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp b/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp new file mode 100644 index 000000000000..5ec84783c901 --- /dev/null +++ b/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp @@ -0,0 +1,134 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines class to implement dispatch tables for pair of types +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "type_dispatch_building.hpp" + +namespace dpctl::tensor::type_dispatch +{ +struct usm_ndarray_types +{ + int typenum_to_lookup_id(int typenum) const + { + using typenum_t = ::dpctl::tensor::type_dispatch::typenum_t; + auto const &api = ::dpctl::detail::dpctl_capi::get(); + + if (typenum == api.UAR_DOUBLE_) { + return static_cast(typenum_t::DOUBLE); + } + else if (typenum == api.UAR_INT64_) { + return static_cast(typenum_t::INT64); + } + else if (typenum == api.UAR_INT32_) { + return static_cast(typenum_t::INT32); + } + else if (typenum == api.UAR_BOOL_) { + return static_cast(typenum_t::BOOL); + } + else if (typenum == api.UAR_CDOUBLE_) { + return static_cast(typenum_t::CDOUBLE); + } + else if (typenum == api.UAR_FLOAT_) { + return static_cast(typenum_t::FLOAT); + } + else if (typenum == api.UAR_INT16_) { + return static_cast(typenum_t::INT16); + } + else if (typenum == api.UAR_INT8_) { + return static_cast(typenum_t::INT8); + } + else if (typenum == api.UAR_UINT64_) { + return static_cast(typenum_t::UINT64); + } + else if (typenum == api.UAR_UINT32_) { + return static_cast(typenum_t::UINT32); + } + else if (typenum == api.UAR_UINT16_) { + return static_cast(typenum_t::UINT16); + } + else if (typenum == api.UAR_UINT8_) { + return static_cast(typenum_t::UINT8); + } + else if (typenum == api.UAR_CFLOAT_) { + return static_cast(typenum_t::CFLOAT); + } + else if (typenum == api.UAR_HALF_) { + return static_cast(typenum_t::HALF); + } + else if (typenum == api.UAR_INT_ || typenum == api.UAR_UINT_) { + switch (sizeof(int)) { + case sizeof(std::int32_t): + return ((typenum == api.UAR_INT_) + ? static_cast(typenum_t::INT32) + : static_cast(typenum_t::UINT32)); + case sizeof(std::int64_t): + return ((typenum == api.UAR_INT_) + ? static_cast(typenum_t::INT64) + : static_cast(typenum_t::UINT64)); + default: + throw_unrecognized_typenum_error(typenum); + } + } + else if (typenum == api.UAR_LONGLONG_ || typenum == api.UAR_ULONGLONG_) + { + switch (sizeof(long long)) { + case sizeof(std::int64_t): + return ((typenum == api.UAR_LONGLONG_) + ? static_cast(typenum_t::INT64) + : static_cast(typenum_t::UINT64)); + default: + throw_unrecognized_typenum_error(typenum); + } + } + else { + throw_unrecognized_typenum_error(typenum); + } + // return code signalling error, should never be reached + assert(false); + return -1; + } + +private: + void throw_unrecognized_typenum_error(int typenum) const + { + throw std::runtime_error("Unrecognized typenum " + + std::to_string(typenum) + " encountered."); + } +}; +} // namespace dpctl::tensor::type_dispatch diff --git a/dpctl/tensor/libtensor/include/utils/type_dispatch_building.hpp b/dpctl/tensor/libtensor/include/utils/type_dispatch_building.hpp new file mode 100644 index 000000000000..b1e02eb1513b --- /dev/null +++ b/dpctl/tensor/libtensor/include/utils/type_dispatch_building.hpp @@ -0,0 +1,300 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines class to implement dispatch tables for pair of types +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include + +#include + +namespace dpctl::tensor::type_dispatch +{ +enum class typenum_t : int +{ + BOOL = 0, + INT8, // 1 + UINT8, + INT16, + UINT16, + INT32, // 5 + UINT32, + INT64, + UINT64, + HALF, + FLOAT, // 10 + DOUBLE, + CFLOAT, + CDOUBLE, // 13 +}; +inline constexpr int num_types = 14; // number of elements in typenum_t + +template + typename factory, + int _num_types> +class DispatchTableBuilder +{ +private: + template + const std::vector row_per_dst_type() const + { + std::vector per_dstTy = { + factory{}.get(), + factory{}.get(), + factory{}.get(), + factory{}.get(), + factory{}.get(), + factory{}.get(), + factory{}.get(), + factory{}.get(), + factory{}.get(), + factory{}.get(), + factory{}.get(), + factory{}.get(), + factory>{}.get(), + factory>{}.get()}; + assert(per_dstTy.size() == _num_types); + return per_dstTy; + } + +public: + DispatchTableBuilder() = default; + ~DispatchTableBuilder() = default; + + void populate_dispatch_table(funcPtrT table[][_num_types]) const + { + const auto map_by_dst_type = {row_per_dst_type(), + row_per_dst_type(), + row_per_dst_type(), + row_per_dst_type(), + row_per_dst_type(), + row_per_dst_type(), + row_per_dst_type(), + row_per_dst_type(), + row_per_dst_type(), + row_per_dst_type(), + row_per_dst_type(), + row_per_dst_type(), + row_per_dst_type>(), + row_per_dst_type>()}; + assert(map_by_dst_type.size() == _num_types); + int dst_id = 0; + for (const auto &row : map_by_dst_type) { + int src_id = 0; + for (const auto &fn_ptr : row) { + table[dst_id][src_id] = fn_ptr; + ++src_id; + } + ++dst_id; + } + } +}; + +template + typename factory, + int _num_types> +class DispatchVectorBuilder +{ +private: + template + const funcPtrT func_per_type() const + { + funcPtrT f = factory{}.get(); + return f; + } + +public: + DispatchVectorBuilder() = default; + ~DispatchVectorBuilder() = default; + + void populate_dispatch_vector(funcPtrT vector[]) const + { + const auto fn_map_by_type = {func_per_type(), + func_per_type(), + func_per_type(), + func_per_type(), + func_per_type(), + func_per_type(), + func_per_type(), + func_per_type(), + func_per_type(), + func_per_type(), + func_per_type(), + func_per_type(), + func_per_type>(), + func_per_type>()}; + assert(fn_map_by_type.size() == _num_types); + int ty_id = 0; + for (const auto &fn : fn_map_by_type) { + vector[ty_id] = fn; + ++ty_id; + } + } +}; + +/*! @brief struct to define result_type typename for Ty == ArgTy */ +template +struct TypeMapResultEntry : std::is_same +{ + using result_type = ResTy; +}; + +/*! @brief struct to define result_type typename for Ty1 == ArgTy1 && Ty2 == + * ArgTy2 */ +template +struct BinaryTypeMapResultEntry + : std::conjunction, std::is_same> +{ + using result_type = ResTy; +}; + +/*! @brief fall-through struct with specified result_type, usually void */ +template +struct DefaultResultEntry : std::true_type +{ + using result_type = Ty; +}; + +/*! @brief Utility struct to convert C++ type into typeid integer */ +template +struct GetTypeid +{ + int get() + { + if constexpr (std::is_same_v) { + return static_cast(typenum_t::BOOL); + } + else if constexpr (std::is_same_v) { + return static_cast(typenum_t::INT8); + } + else if constexpr (std::is_same_v) { + return static_cast(typenum_t::UINT8); + } + else if constexpr (std::is_same_v) { + return static_cast(typenum_t::INT16); + } + else if constexpr (std::is_same_v) { + return static_cast(typenum_t::UINT16); + } + else if constexpr (std::is_same_v) { + return static_cast(typenum_t::INT32); + } + else if constexpr (std::is_same_v) { + return static_cast(typenum_t::UINT32); + } + else if constexpr (std::is_same_v) { + return static_cast(typenum_t::INT64); + } + else if constexpr (std::is_same_v) { + return static_cast(typenum_t::UINT64); + } + else if constexpr (std::is_same_v) { + return static_cast(typenum_t::HALF); + } + else if constexpr (std::is_same_v) { + return static_cast(typenum_t::FLOAT); + } + else if constexpr (std::is_same_v) { + return static_cast(typenum_t::DOUBLE); + } + else if constexpr (std::is_same_v>) { + return static_cast(typenum_t::CFLOAT); + } + else if constexpr (std::is_same_v>) { + return static_cast(typenum_t::CDOUBLE); + } + else if constexpr (std::is_same_v) { // special token + return -1; + } + + assert(("Unsupported type T", false)); + return -2; + } +}; + +/*! @brief Class to generate vector of null function pointers */ +template +struct NullPtrVector +{ + + using value_type = FunPtrT; + using const_reference = value_type const &; + + NullPtrVector() : val(nullptr) {} + + const_reference operator[](int) const + { + return val; + } + +private: + value_type val; +}; + +/*! @brief Class to generate table of null function pointers */ +template +struct NullPtrTable +{ + using value_type = NullPtrVector; + using const_reference = value_type const &; + + NullPtrTable() : val() {} + + const_reference operator[](int) const + { + return val; + } + +private: + value_type val; +}; + +template +struct TypePairDefinedEntry + : std::conjunction, std::is_same> +{ + static constexpr bool is_defined = true; +}; + +struct NotDefinedEntry : std::true_type +{ + static constexpr bool is_defined = false; +}; +} // namespace dpctl::tensor::type_dispatch diff --git a/dpctl/tensor/libtensor/include/utils/type_utils.hpp b/dpctl/tensor/libtensor/include/utils/type_utils.hpp new file mode 100644 index 000000000000..e5855081c727 --- /dev/null +++ b/dpctl/tensor/libtensor/include/utils/type_utils.hpp @@ -0,0 +1,164 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +/// +/// \file +/// This file defines functions for value casting. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace dpctl::tensor::type_utils +{ +template +struct is_complex : public std::false_type +{ +}; + +template +struct is_complex< + T, + std::enable_if_t, std::complex> || + std::is_same_v, std::complex>>> + : public std::true_type +{ +}; + +template +inline constexpr bool is_complex_v = is_complex::value; + +template +dstTy convert_impl(const srcTy &v) +{ + if constexpr (std::is_same_v) { + return v; + } + else if constexpr (std::is_same_v) { + if constexpr (is_complex_v) { + // bool(complex_v) == + // (complex_v.real() != 0) && (complex_v.imag() !=0) + return (convert_impl(v.real()) || + convert_impl(v.imag())); + } + else { + return static_cast(v != srcTy{0}); + } + } + else if constexpr (std::is_same_v) { + // C++ interprets a byte of storage behind bool by only + // testing is least significant bit, leading to both + // 0x00 and 0x02 interpreted as False, while 0x01 and 0xFF + // interpreted as True. NumPy's interpretation of underlying + // storage is different: any bit set is interpreted as True, + // no bits set as False, see gh-2121 + const std::uint8_t &u = sycl::bit_cast(v); + if constexpr (is_complex_v) { + return (u == 0) ? dstTy{} : dstTy{1, 0}; + } + else { + return (u == 0) ? dstTy{} : dstTy{1}; + } + } + else if constexpr (is_complex_v && !is_complex_v) { + // real_t(complex_v) == real_t(complex_v.real()) + return convert_impl(v.real()); + } + else if constexpr (!std::is_integral_v && + !std::is_same_v && + std::is_integral_v && std::is_unsigned_v) + { + // first cast to signed variant, the cast to unsigned one + using signedT = typename std::make_signed_t; + return static_cast(convert_impl(v)); + } + else { + return static_cast(v); + } +} + +template +void validate_type_for_device(const sycl::device &d) +{ + if constexpr (std::is_same_v) { + if (!d.has(sycl::aspect::fp64)) { + throw std::runtime_error("Device " + + d.get_info() + + " does not support type 'float64'"); + } + } + else if constexpr (std::is_same_v>) { + if (!d.has(sycl::aspect::fp64)) { + throw std::runtime_error("Device " + + d.get_info() + + " does not support type 'complex128'"); + } + } + else if constexpr (std::is_same_v) { + if (!d.has(sycl::aspect::fp16)) { + throw std::runtime_error("Device " + + d.get_info() + + " does not support type 'float16'"); + } + } +} + +template +void validate_type_for_device(const sycl::queue &q) +{ + validate_type_for_device(q.get_device()); +} + +template +auto vec_cast_impl(const Vec &v, std::index_sequence) +{ + return Op{v[I]...}; +} + +template > +auto vec_cast(const sycl::vec &s) +{ + if constexpr (std::is_same_v) { + return s; + } + else { + return vec_cast_impl, sycl::vec>(s, + Indices{}); + } +} +} // namespace dpctl::tensor::type_utils diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt index ddca557a08f4..433ab298d476 100644 --- a/dpnp/backend/CMakeLists.txt +++ b/dpnp/backend/CMakeLists.txt @@ -89,7 +89,6 @@ target_compile_definitions(${_trgt} PUBLIC PSTL_USE_PARALLEL_POLICIES=0) target_compile_definitions(${_trgt} PUBLIC ONEDPL_USE_PREDEFINED_POLICIES=0) target_include_directories(${_trgt} PUBLIC ${Dpctl_INCLUDE_DIR}) -target_include_directories(${_trgt} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) target_link_directories(${_trgt} PUBLIC "${Dpctl_INCLUDE_DIR}/..") target_link_libraries(${_trgt} PUBLIC DPCTLSyclInterface) diff --git a/dpnp/backend/extensions/blas/CMakeLists.txt b/dpnp/backend/extensions/blas/CMakeLists.txt index 267567c69e71..2da35cc695ac 100644 --- a/dpnp/backend/extensions/blas/CMakeLists.txt +++ b/dpnp/backend/extensions/blas/CMakeLists.txt @@ -71,9 +71,12 @@ target_include_directories( ${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common ) +target_include_directories( + ${python_module_name} + PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include +) target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}) -target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) if(WIN32) target_compile_options( diff --git a/dpnp/backend/extensions/blas/dot_common.hpp b/dpnp/backend/extensions/blas/dot_common.hpp index 1672e7217cba..369e3320473c 100644 --- a/dpnp/backend/extensions/blas/dot_common.hpp +++ b/dpnp/backend/extensions/blas/dot_common.hpp @@ -29,6 +29,7 @@ #pragma once #include + #include // dpctl tensor headers diff --git a/dpnp/backend/extensions/common/ext/common.hpp b/dpnp/backend/extensions/common/ext/common.hpp index d626b56ea00c..036eb635a3bd 100644 --- a/dpnp/backend/extensions/common/ext/common.hpp +++ b/dpnp/backend/extensions/common/ext/common.hpp @@ -29,8 +29,10 @@ #pragma once #include + #include #include + #include // dpctl tensor headers diff --git a/dpnp/backend/extensions/fft/CMakeLists.txt b/dpnp/backend/extensions/fft/CMakeLists.txt index 50468857e3b9..0631b049ad72 100644 --- a/dpnp/backend/extensions/fft/CMakeLists.txt +++ b/dpnp/backend/extensions/fft/CMakeLists.txt @@ -65,9 +65,12 @@ target_include_directories( ${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src ) +target_include_directories( + ${python_module_name} + PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include +) target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}) -target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) if(WIN32) target_compile_options( diff --git a/dpnp/backend/extensions/indexing/CMakeLists.txt b/dpnp/backend/extensions/indexing/CMakeLists.txt index a6691f31f559..28d38bc28f21 100644 --- a/dpnp/backend/extensions/indexing/CMakeLists.txt +++ b/dpnp/backend/extensions/indexing/CMakeLists.txt @@ -68,9 +68,12 @@ target_include_directories( ${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common ) +target_include_directories( + ${python_module_name} + PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include +) target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) -target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) if(WIN32) target_compile_options( diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt index 5e8b95963e94..aa0f6b718972 100644 --- a/dpnp/backend/extensions/lapack/CMakeLists.txt +++ b/dpnp/backend/extensions/lapack/CMakeLists.txt @@ -88,9 +88,12 @@ target_include_directories( ${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common ) +target_include_directories( + ${python_module_name} + PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include +) target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) -target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) if(WIN32) target_compile_options( diff --git a/dpnp/backend/extensions/statistics/CMakeLists.txt b/dpnp/backend/extensions/statistics/CMakeLists.txt index 9561daf27ce2..2d59f679793c 100644 --- a/dpnp/backend/extensions/statistics/CMakeLists.txt +++ b/dpnp/backend/extensions/statistics/CMakeLists.txt @@ -77,9 +77,12 @@ target_include_directories( ${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common ) +target_include_directories( + ${python_module_name} + PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include +) target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) -target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) if(WIN32) target_compile_options( diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt index b24d5d131cfe..f1378bf52d88 100644 --- a/dpnp/backend/extensions/ufunc/CMakeLists.txt +++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt @@ -90,9 +90,12 @@ target_include_directories( ${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common ) +target_include_directories( + ${python_module_name} + PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include +) target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) -target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) if(_dpnp_sycl_targets) # make fat binary diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 0e3a17df77e0..b7181616f546 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -113,9 +113,12 @@ target_include_directories( ${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common ) +target_include_directories( + ${python_module_name} + PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include +) target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) -target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) if(WIN32) target_compile_options( diff --git a/dpnp/backend/extensions/window/CMakeLists.txt b/dpnp/backend/extensions/window/CMakeLists.txt index fc446f523e74..186668bb1662 100644 --- a/dpnp/backend/extensions/window/CMakeLists.txt +++ b/dpnp/backend/extensions/window/CMakeLists.txt @@ -68,9 +68,12 @@ target_include_directories( ${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common ) +target_include_directories( + ${python_module_name} + PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include +) target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) -target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) if(WIN32) target_compile_options( diff --git a/pyproject.toml b/pyproject.toml index d659428877fc..6394cf118dcf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,7 +108,7 @@ target-version = ['py310', 'py311', 'py312', 'py313', 'py314'] [tool.codespell] builtin = "clear,rare,informal,names" check-filenames = true -ignore-words-list = "amin,arange,elemt,fro,hist,ith,mone,nd,nin,sinc,vart" +ignore-words-list = "amin,arange,elemt,fro,hist,ith,mone,nd,nin,sinc,vart,GroupT" quiet-level = 3 [tool.coverage.report] From 27a36233bfed2989c2c37b87cf786a07efb38e73 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 23 Jan 2026 04:18:46 -0800 Subject: [PATCH 02/18] Remove unused include dir for building fft and statistics extensions --- dpnp/backend/extensions/fft/CMakeLists.txt | 8 -------- dpnp/backend/extensions/statistics/CMakeLists.txt | 8 -------- 2 files changed, 16 deletions(-) diff --git a/dpnp/backend/extensions/fft/CMakeLists.txt b/dpnp/backend/extensions/fft/CMakeLists.txt index 0631b049ad72..0c2c446fe8a0 100644 --- a/dpnp/backend/extensions/fft/CMakeLists.txt +++ b/dpnp/backend/extensions/fft/CMakeLists.txt @@ -57,14 +57,6 @@ set_target_properties( PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON ) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include -) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src -) target_include_directories( ${python_module_name} PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include diff --git a/dpnp/backend/extensions/statistics/CMakeLists.txt b/dpnp/backend/extensions/statistics/CMakeLists.txt index 2d59f679793c..710a35346d63 100644 --- a/dpnp/backend/extensions/statistics/CMakeLists.txt +++ b/dpnp/backend/extensions/statistics/CMakeLists.txt @@ -65,14 +65,6 @@ set_target_properties( PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON ) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include -) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src -) target_include_directories( ${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common From 7c72c6eed1d423379feaee8b6d839fc6d8f2c115 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 23 Jan 2026 04:25:54 -0800 Subject: [PATCH 03/18] Add dpnp4pybind11.hpp --- dpnp/backend/include/dpnp4pybind11.hpp | 1373 ++++++++++++++++++++++++ 1 file changed, 1373 insertions(+) create mode 100644 dpnp/backend/include/dpnp4pybind11.hpp diff --git a/dpnp/backend/include/dpnp4pybind11.hpp b/dpnp/backend/include/dpnp4pybind11.hpp new file mode 100644 index 000000000000..cd287989bef2 --- /dev/null +++ b/dpnp/backend/include/dpnp4pybind11.hpp @@ -0,0 +1,1373 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include "dpctl_capi.h" + +#include +#include // for std::size_t for C++ linkage +#include +#include // for size_t for C linkage +#include +#include +#include + +#include + +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace detail +{ +// Lookup a type according to its size, and return a value corresponding to the +// NumPy typenum. +template +constexpr int platform_typeid_lookup() +{ + return -1; +} + +template +constexpr int platform_typeid_lookup(int I, Ints... Is) +{ + return sizeof(Concrete) == sizeof(T) + ? I + : platform_typeid_lookup(Is...); +} + +class dpctl_capi +{ +public: + // dpctl type objects + PyTypeObject *Py_SyclDeviceType_; + PyTypeObject *PySyclDeviceType_; + PyTypeObject *Py_SyclContextType_; + PyTypeObject *PySyclContextType_; + PyTypeObject *Py_SyclEventType_; + PyTypeObject *PySyclEventType_; + PyTypeObject *Py_SyclQueueType_; + PyTypeObject *PySyclQueueType_; + PyTypeObject *Py_MemoryType_; + PyTypeObject *PyMemoryUSMDeviceType_; + PyTypeObject *PyMemoryUSMSharedType_; + PyTypeObject *PyMemoryUSMHostType_; + PyTypeObject *PyUSMArrayType_; + PyTypeObject *PySyclProgramType_; + PyTypeObject *PySyclKernelType_; + + DPCTLSyclDeviceRef (*SyclDevice_GetDeviceRef_)(PySyclDeviceObject *); + PySyclDeviceObject *(*SyclDevice_Make_)(DPCTLSyclDeviceRef); + + DPCTLSyclContextRef (*SyclContext_GetContextRef_)(PySyclContextObject *); + PySyclContextObject *(*SyclContext_Make_)(DPCTLSyclContextRef); + + DPCTLSyclEventRef (*SyclEvent_GetEventRef_)(PySyclEventObject *); + PySyclEventObject *(*SyclEvent_Make_)(DPCTLSyclEventRef); + + DPCTLSyclQueueRef (*SyclQueue_GetQueueRef_)(PySyclQueueObject *); + PySyclQueueObject *(*SyclQueue_Make_)(DPCTLSyclQueueRef); + + // memory + DPCTLSyclUSMRef (*Memory_GetUsmPointer_)(Py_MemoryObject *); + void *(*Memory_GetOpaquePointer_)(Py_MemoryObject *); + DPCTLSyclContextRef (*Memory_GetContextRef_)(Py_MemoryObject *); + DPCTLSyclQueueRef (*Memory_GetQueueRef_)(Py_MemoryObject *); + size_t (*Memory_GetNumBytes_)(Py_MemoryObject *); + PyObject *(*Memory_Make_)(DPCTLSyclUSMRef, + size_t, + DPCTLSyclQueueRef, + PyObject *); + + // program + DPCTLSyclKernelRef (*SyclKernel_GetKernelRef_)(PySyclKernelObject *); + PySyclKernelObject *(*SyclKernel_Make_)(DPCTLSyclKernelRef, const char *); + + DPCTLSyclKernelBundleRef (*SyclProgram_GetKernelBundleRef_)( + PySyclProgramObject *); + PySyclProgramObject *(*SyclProgram_Make_)(DPCTLSyclKernelBundleRef); + + // tensor + char *(*UsmNDArray_GetData_)(PyUSMArrayObject *); + int (*UsmNDArray_GetNDim_)(PyUSMArrayObject *); + py::ssize_t *(*UsmNDArray_GetShape_)(PyUSMArrayObject *); + py::ssize_t *(*UsmNDArray_GetStrides_)(PyUSMArrayObject *); + int (*UsmNDArray_GetTypenum_)(PyUSMArrayObject *); + int (*UsmNDArray_GetElementSize_)(PyUSMArrayObject *); + int (*UsmNDArray_GetFlags_)(PyUSMArrayObject *); + DPCTLSyclQueueRef (*UsmNDArray_GetQueueRef_)(PyUSMArrayObject *); + py::ssize_t (*UsmNDArray_GetOffset_)(PyUSMArrayObject *); + PyObject *(*UsmNDArray_GetUSMData_)(PyUSMArrayObject *); + void (*UsmNDArray_SetWritableFlag_)(PyUSMArrayObject *, int); + PyObject *(*UsmNDArray_MakeSimpleFromMemory_)(int, + const py::ssize_t *, + int, + Py_MemoryObject *, + py::ssize_t, + char); + PyObject *(*UsmNDArray_MakeSimpleFromPtr_)(size_t, + int, + DPCTLSyclUSMRef, + DPCTLSyclQueueRef, + PyObject *); + PyObject *(*UsmNDArray_MakeFromPtr_)(int, + const py::ssize_t *, + int, + const py::ssize_t *, + DPCTLSyclUSMRef, + DPCTLSyclQueueRef, + py::ssize_t, + PyObject *); + + int USM_ARRAY_C_CONTIGUOUS_; + int USM_ARRAY_F_CONTIGUOUS_; + int USM_ARRAY_WRITABLE_; + int UAR_BOOL_, UAR_BYTE_, UAR_UBYTE_, UAR_SHORT_, UAR_USHORT_, UAR_INT_, + UAR_UINT_, UAR_LONG_, UAR_ULONG_, UAR_LONGLONG_, UAR_ULONGLONG_, + UAR_FLOAT_, UAR_DOUBLE_, UAR_CFLOAT_, UAR_CDOUBLE_, UAR_TYPE_SENTINEL_, + UAR_HALF_; + int UAR_INT8_, UAR_UINT8_, UAR_INT16_, UAR_UINT16_, UAR_INT32_, UAR_UINT32_, + UAR_INT64_, UAR_UINT64_; + + bool PySyclDevice_Check_(PyObject *obj) const + { + return PyObject_TypeCheck(obj, PySyclDeviceType_) != 0; + } + bool PySyclContext_Check_(PyObject *obj) const + { + return PyObject_TypeCheck(obj, PySyclContextType_) != 0; + } + bool PySyclEvent_Check_(PyObject *obj) const + { + return PyObject_TypeCheck(obj, PySyclEventType_) != 0; + } + bool PySyclQueue_Check_(PyObject *obj) const + { + return PyObject_TypeCheck(obj, PySyclQueueType_) != 0; + } + bool PySyclKernel_Check_(PyObject *obj) const + { + return PyObject_TypeCheck(obj, PySyclKernelType_) != 0; + } + bool PySyclProgram_Check_(PyObject *obj) const + { + return PyObject_TypeCheck(obj, PySyclProgramType_) != 0; + } + + ~dpctl_capi() + { + as_usm_memory_.reset(); + default_usm_ndarray_.reset(); + default_usm_memory_.reset(); + default_sycl_queue_.reset(); + }; + + static auto &get() + { + static dpctl_capi api{}; + return api; + } + + py::object default_sycl_queue_pyobj() + { + return *default_sycl_queue_; + } + py::object default_usm_memory_pyobj() + { + return *default_usm_memory_; + } + py::object default_usm_ndarray_pyobj() + { + return *default_usm_ndarray_; + } + py::object as_usm_memory_pyobj() + { + return *as_usm_memory_; + } + +private: + struct Deleter + { + void operator()(py::object *p) const + { + const bool initialized = Py_IsInitialized(); +#if PY_VERSION_HEX < 0x30d0000 + const bool finalizing = _Py_IsFinalizing(); +#else + const bool finalizing = Py_IsFinalizing(); +#endif + const bool guard = initialized && !finalizing; + + if (guard) { + delete p; + } + } + }; + + std::shared_ptr default_sycl_queue_; + std::shared_ptr default_usm_memory_; + std::shared_ptr default_usm_ndarray_; + std::shared_ptr as_usm_memory_; + + dpctl_capi() + : Py_SyclDeviceType_(nullptr), PySyclDeviceType_(nullptr), + Py_SyclContextType_(nullptr), PySyclContextType_(nullptr), + Py_SyclEventType_(nullptr), PySyclEventType_(nullptr), + Py_SyclQueueType_(nullptr), PySyclQueueType_(nullptr), + Py_MemoryType_(nullptr), PyMemoryUSMDeviceType_(nullptr), + PyMemoryUSMSharedType_(nullptr), PyMemoryUSMHostType_(nullptr), + PyUSMArrayType_(nullptr), PySyclProgramType_(nullptr), + PySyclKernelType_(nullptr), SyclDevice_GetDeviceRef_(nullptr), + SyclDevice_Make_(nullptr), SyclContext_GetContextRef_(nullptr), + SyclContext_Make_(nullptr), SyclEvent_GetEventRef_(nullptr), + SyclEvent_Make_(nullptr), SyclQueue_GetQueueRef_(nullptr), + SyclQueue_Make_(nullptr), Memory_GetUsmPointer_(nullptr), + Memory_GetOpaquePointer_(nullptr), Memory_GetContextRef_(nullptr), + Memory_GetQueueRef_(nullptr), Memory_GetNumBytes_(nullptr), + Memory_Make_(nullptr), SyclKernel_GetKernelRef_(nullptr), + SyclKernel_Make_(nullptr), SyclProgram_GetKernelBundleRef_(nullptr), + SyclProgram_Make_(nullptr), UsmNDArray_GetData_(nullptr), + UsmNDArray_GetNDim_(nullptr), UsmNDArray_GetShape_(nullptr), + UsmNDArray_GetStrides_(nullptr), UsmNDArray_GetTypenum_(nullptr), + UsmNDArray_GetElementSize_(nullptr), UsmNDArray_GetFlags_(nullptr), + UsmNDArray_GetQueueRef_(nullptr), UsmNDArray_GetOffset_(nullptr), + UsmNDArray_GetUSMData_(nullptr), UsmNDArray_SetWritableFlag_(nullptr), + UsmNDArray_MakeSimpleFromMemory_(nullptr), + UsmNDArray_MakeSimpleFromPtr_(nullptr), + UsmNDArray_MakeFromPtr_(nullptr), USM_ARRAY_C_CONTIGUOUS_(0), + USM_ARRAY_F_CONTIGUOUS_(0), USM_ARRAY_WRITABLE_(0), UAR_BOOL_(-1), + UAR_BYTE_(-1), UAR_UBYTE_(-1), UAR_SHORT_(-1), UAR_USHORT_(-1), + UAR_INT_(-1), UAR_UINT_(-1), UAR_LONG_(-1), UAR_ULONG_(-1), + UAR_LONGLONG_(-1), UAR_ULONGLONG_(-1), UAR_FLOAT_(-1), + UAR_DOUBLE_(-1), UAR_CFLOAT_(-1), UAR_CDOUBLE_(-1), + UAR_TYPE_SENTINEL_(-1), UAR_HALF_(-1), UAR_INT8_(-1), UAR_UINT8_(-1), + UAR_INT16_(-1), UAR_UINT16_(-1), UAR_INT32_(-1), UAR_UINT32_(-1), + UAR_INT64_(-1), UAR_UINT64_(-1), default_sycl_queue_{}, + default_usm_memory_{}, default_usm_ndarray_{}, as_usm_memory_{} + + { + // Import Cython-generated C-API for dpctl + // This imports python modules and initializes + // static variables such as function pointers for C-API, + // e.g. SyclDevice_GetDeviceRef, etc. + // pointers to Python types, i.e. PySyclDeviceType, etc. + // and exported constants, i.e. USM_ARRAY_C_CONTIGUOUS, etc. + import_dpctl(); + + // Python type objects for classes implemented by dpctl + this->Py_SyclDeviceType_ = &Py_SyclDeviceType; + this->PySyclDeviceType_ = &PySyclDeviceType; + this->Py_SyclContextType_ = &Py_SyclContextType; + this->PySyclContextType_ = &PySyclContextType; + this->Py_SyclEventType_ = &Py_SyclEventType; + this->PySyclEventType_ = &PySyclEventType; + this->Py_SyclQueueType_ = &Py_SyclQueueType; + this->PySyclQueueType_ = &PySyclQueueType; + this->Py_MemoryType_ = &Py_MemoryType; + this->PyMemoryUSMDeviceType_ = &PyMemoryUSMDeviceType; + this->PyMemoryUSMSharedType_ = &PyMemoryUSMSharedType; + this->PyMemoryUSMHostType_ = &PyMemoryUSMHostType; + this->PyUSMArrayType_ = &PyUSMArrayType; + this->PySyclProgramType_ = &PySyclProgramType; + this->PySyclKernelType_ = &PySyclKernelType; + + // SyclDevice API + this->SyclDevice_GetDeviceRef_ = SyclDevice_GetDeviceRef; + this->SyclDevice_Make_ = SyclDevice_Make; + + // SyclContext API + this->SyclContext_GetContextRef_ = SyclContext_GetContextRef; + this->SyclContext_Make_ = SyclContext_Make; + + // SyclEvent API + this->SyclEvent_GetEventRef_ = SyclEvent_GetEventRef; + this->SyclEvent_Make_ = SyclEvent_Make; + + // SyclQueue API + this->SyclQueue_GetQueueRef_ = SyclQueue_GetQueueRef; + this->SyclQueue_Make_ = SyclQueue_Make; + + // dpctl.memory API + this->Memory_GetUsmPointer_ = Memory_GetUsmPointer; + this->Memory_GetOpaquePointer_ = Memory_GetOpaquePointer; + this->Memory_GetContextRef_ = Memory_GetContextRef; + this->Memory_GetQueueRef_ = Memory_GetQueueRef; + this->Memory_GetNumBytes_ = Memory_GetNumBytes; + this->Memory_Make_ = Memory_Make; + + // dpctl.program API + this->SyclKernel_GetKernelRef_ = SyclKernel_GetKernelRef; + this->SyclKernel_Make_ = SyclKernel_Make; + this->SyclProgram_GetKernelBundleRef_ = SyclProgram_GetKernelBundleRef; + this->SyclProgram_Make_ = SyclProgram_Make; + + // dpctl.tensor.usm_ndarray API + this->UsmNDArray_GetData_ = UsmNDArray_GetData; + this->UsmNDArray_GetNDim_ = UsmNDArray_GetNDim; + this->UsmNDArray_GetShape_ = UsmNDArray_GetShape; + this->UsmNDArray_GetStrides_ = UsmNDArray_GetStrides; + this->UsmNDArray_GetTypenum_ = UsmNDArray_GetTypenum; + this->UsmNDArray_GetElementSize_ = UsmNDArray_GetElementSize; + this->UsmNDArray_GetFlags_ = UsmNDArray_GetFlags; + this->UsmNDArray_GetQueueRef_ = UsmNDArray_GetQueueRef; + this->UsmNDArray_GetOffset_ = UsmNDArray_GetOffset; + this->UsmNDArray_GetUSMData_ = UsmNDArray_GetUSMData; + this->UsmNDArray_SetWritableFlag_ = UsmNDArray_SetWritableFlag; + this->UsmNDArray_MakeSimpleFromMemory_ = + UsmNDArray_MakeSimpleFromMemory; + this->UsmNDArray_MakeSimpleFromPtr_ = UsmNDArray_MakeSimpleFromPtr; + this->UsmNDArray_MakeFromPtr_ = UsmNDArray_MakeFromPtr; + + // constants + this->USM_ARRAY_C_CONTIGUOUS_ = USM_ARRAY_C_CONTIGUOUS; + this->USM_ARRAY_F_CONTIGUOUS_ = USM_ARRAY_F_CONTIGUOUS; + this->USM_ARRAY_WRITABLE_ = USM_ARRAY_WRITABLE; + this->UAR_BOOL_ = UAR_BOOL; + this->UAR_BYTE_ = UAR_BYTE; + this->UAR_UBYTE_ = UAR_UBYTE; + this->UAR_SHORT_ = UAR_SHORT; + this->UAR_USHORT_ = UAR_USHORT; + this->UAR_INT_ = UAR_INT; + this->UAR_UINT_ = UAR_UINT; + this->UAR_LONG_ = UAR_LONG; + this->UAR_ULONG_ = UAR_ULONG; + this->UAR_LONGLONG_ = UAR_LONGLONG; + this->UAR_ULONGLONG_ = UAR_ULONGLONG; + this->UAR_FLOAT_ = UAR_FLOAT; + this->UAR_DOUBLE_ = UAR_DOUBLE; + this->UAR_CFLOAT_ = UAR_CFLOAT; + this->UAR_CDOUBLE_ = UAR_CDOUBLE; + this->UAR_TYPE_SENTINEL_ = UAR_TYPE_SENTINEL; + this->UAR_HALF_ = UAR_HALF; + + // deduced disjoint types + this->UAR_INT8_ = UAR_BYTE; + this->UAR_UINT8_ = UAR_UBYTE; + this->UAR_INT16_ = UAR_SHORT; + this->UAR_UINT16_ = UAR_USHORT; + this->UAR_INT32_ = + platform_typeid_lookup( + UAR_LONG, UAR_INT, UAR_SHORT); + this->UAR_UINT32_ = + platform_typeid_lookup(UAR_ULONG, UAR_UINT, + UAR_USHORT); + this->UAR_INT64_ = + platform_typeid_lookup( + UAR_LONG, UAR_LONGLONG, UAR_INT); + this->UAR_UINT64_ = + platform_typeid_lookup( + UAR_ULONG, UAR_ULONGLONG, UAR_UINT); + + // create shared pointers to python objects used in type-casters + // for dpctl::memory::usm_memory and dpctl::tensor::usm_ndarray + sycl::queue q_{}; + PySyclQueueObject *py_q_tmp = + SyclQueue_Make(reinterpret_cast(&q_)); + const py::object &py_sycl_queue = py::reinterpret_steal( + reinterpret_cast(py_q_tmp)); + + default_sycl_queue_ = std::shared_ptr( + new py::object(py_sycl_queue), Deleter{}); + + py::module_ mod_memory = py::module_::import("dpctl.memory"); + const py::object &py_as_usm_memory = mod_memory.attr("as_usm_memory"); + as_usm_memory_ = std::shared_ptr( + new py::object{py_as_usm_memory}, Deleter{}); + + auto mem_kl = mod_memory.attr("MemoryUSMHost"); + const py::object &py_default_usm_memory = + mem_kl(1, py::arg("queue") = py_sycl_queue); + default_usm_memory_ = std::shared_ptr( + new py::object{py_default_usm_memory}, Deleter{}); + + py::module_ mod_usmarray = + py::module_::import("dpctl.tensor._usmarray"); + auto tensor_kl = mod_usmarray.attr("usm_ndarray"); + + const py::object &py_default_usm_ndarray = + tensor_kl(py::tuple(), py::arg("dtype") = py::str("u1"), + py::arg("buffer") = py_default_usm_memory); + + default_usm_ndarray_ = std::shared_ptr( + new py::object{py_default_usm_ndarray}, Deleter{}); + } + + dpctl_capi(dpctl_capi const &) = default; + dpctl_capi &operator=(dpctl_capi const &) = default; + dpctl_capi &operator=(dpctl_capi &&) = default; + +}; // struct dpctl_capi +} // namespace detail +} // namespace dpctl + +namespace pybind11::detail +{ +#define DPCTL_TYPE_CASTER(type, py_name) \ +protected: \ + std::unique_ptr value; \ + \ +public: \ + static constexpr auto name = py_name; \ + template < \ + typename T_, \ + ::pybind11::detail::enable_if_t< \ + std::is_same>::value, \ + int> = 0> \ + static ::pybind11::handle cast(T_ *src, \ + ::pybind11::return_value_policy policy, \ + ::pybind11::handle parent) \ + { \ + if (!src) \ + return ::pybind11::none().release(); \ + if (policy == ::pybind11::return_value_policy::take_ownership) { \ + auto h = cast(std::move(*src), policy, parent); \ + delete src; \ + return h; \ + } \ + return cast(*src, policy, parent); \ + } \ + operator type *() \ + { \ + return value.get(); \ + } /* NOLINT(bugprone-macro-parentheses) */ \ + operator type &() \ + { \ + return *value; \ + } /* NOLINT(bugprone-macro-parentheses) */ \ + operator type &&() && \ + { \ + return std::move(*value); \ + } /* NOLINT(bugprone-macro-parentheses) */ \ + template \ + using cast_op_type = ::pybind11::detail::movable_cast_op_type + +/* This type caster associates ``sycl::queue`` C++ class with + * :class:`dpctl.SyclQueue` for the purposes of generation of + * Python bindings by pybind11. + */ +template <> +struct type_caster +{ +public: + bool load(handle src, bool) + { + PyObject *source = src.ptr(); + auto const &api = ::dpctl::detail::dpctl_capi::get(); + if (api.PySyclQueue_Check_(source)) { + DPCTLSyclQueueRef QRef = api.SyclQueue_GetQueueRef_( + reinterpret_cast(source)); + value = std::make_unique( + *(reinterpret_cast(QRef))); + return true; + } + else { + throw py::type_error( + "Input is of unexpected type, expected dpctl.SyclQueue"); + } + } + + static handle cast(sycl::queue src, return_value_policy, handle) + { + auto const &api = ::dpctl::detail::dpctl_capi::get(); + auto tmp = + api.SyclQueue_Make_(reinterpret_cast(&src)); + return handle(reinterpret_cast(tmp)); + } + + DPCTL_TYPE_CASTER(sycl::queue, _("dpctl.SyclQueue")); +}; + +/* This type caster associates ``sycl::device`` C++ class with + * :class:`dpctl.SyclDevice` for the purposes of generation of + * Python bindings by pybind11. + */ +template <> +struct type_caster +{ +public: + bool load(handle src, bool) + { + PyObject *source = src.ptr(); + auto const &api = ::dpctl::detail::dpctl_capi::get(); + if (api.PySyclDevice_Check_(source)) { + DPCTLSyclDeviceRef DRef = api.SyclDevice_GetDeviceRef_( + reinterpret_cast(source)); + value = std::make_unique( + *(reinterpret_cast(DRef))); + return true; + } + else { + throw py::type_error( + "Input is of unexpected type, expected dpctl.SyclDevice"); + } + } + + static handle cast(sycl::device src, return_value_policy, handle) + { + auto const &api = ::dpctl::detail::dpctl_capi::get(); + auto tmp = + api.SyclDevice_Make_(reinterpret_cast(&src)); + return handle(reinterpret_cast(tmp)); + } + + DPCTL_TYPE_CASTER(sycl::device, _("dpctl.SyclDevice")); +}; + +/* This type caster associates ``sycl::context`` C++ class with + * :class:`dpctl.SyclContext` for the purposes of generation of + * Python bindings by pybind11. + */ +template <> +struct type_caster +{ +public: + bool load(handle src, bool) + { + PyObject *source = src.ptr(); + auto const &api = ::dpctl::detail::dpctl_capi::get(); + if (api.PySyclContext_Check_(source)) { + DPCTLSyclContextRef CRef = api.SyclContext_GetContextRef_( + reinterpret_cast(source)); + value = std::make_unique( + *(reinterpret_cast(CRef))); + return true; + } + else { + throw py::type_error( + "Input is of unexpected type, expected dpctl.SyclContext"); + } + } + + static handle cast(sycl::context src, return_value_policy, handle) + { + auto const &api = ::dpctl::detail::dpctl_capi::get(); + auto tmp = + api.SyclContext_Make_(reinterpret_cast(&src)); + return handle(reinterpret_cast(tmp)); + } + + DPCTL_TYPE_CASTER(sycl::context, _("dpctl.SyclContext")); +}; + +/* This type caster associates ``sycl::event`` C++ class with + * :class:`dpctl.SyclEvent` for the purposes of generation of + * Python bindings by pybind11. + */ +template <> +struct type_caster +{ +public: + bool load(handle src, bool) + { + PyObject *source = src.ptr(); + auto const &api = ::dpctl::detail::dpctl_capi::get(); + if (api.PySyclEvent_Check_(source)) { + DPCTLSyclEventRef ERef = api.SyclEvent_GetEventRef_( + reinterpret_cast(source)); + value = std::make_unique( + *(reinterpret_cast(ERef))); + return true; + } + else { + throw py::type_error( + "Input is of unexpected type, expected dpctl.SyclEvent"); + } + } + + static handle cast(sycl::event src, return_value_policy, handle) + { + auto const &api = ::dpctl::detail::dpctl_capi::get(); + auto tmp = + api.SyclEvent_Make_(reinterpret_cast(&src)); + return handle(reinterpret_cast(tmp)); + } + + DPCTL_TYPE_CASTER(sycl::event, _("dpctl.SyclEvent")); +}; + +/* This type caster associates ``sycl::kernel`` C++ class with + * :class:`dpctl.program.SyclKernel` for the purposes of generation of + * Python bindings by pybind11. + */ +template <> +struct type_caster +{ +public: + bool load(handle src, bool) + { + PyObject *source = src.ptr(); + auto const &api = ::dpctl::detail::dpctl_capi::get(); + if (api.PySyclKernel_Check_(source)) { + DPCTLSyclKernelRef KRef = api.SyclKernel_GetKernelRef_( + reinterpret_cast(source)); + value = std::make_unique( + *(reinterpret_cast(KRef))); + return true; + } + else { + throw py::type_error("Input is of unexpected type, expected " + "dpctl.program.SyclKernel"); + } + } + + static handle cast(sycl::kernel src, return_value_policy, handle) + { + auto const &api = ::dpctl::detail::dpctl_capi::get(); + auto tmp = + api.SyclKernel_Make_(reinterpret_cast(&src), + "dpctl4pybind11_kernel"); + return handle(reinterpret_cast(tmp)); + } + + DPCTL_TYPE_CASTER(sycl::kernel, _("dpctl.program.SyclKernel")); +}; + +/* This type caster associates + * ``sycl::kernel_bundle`` C++ class with + * :class:`dpctl.program.SyclProgram` for the purposes of generation of + * Python bindings by pybind11. + */ +template <> +struct type_caster> +{ +public: + bool load(handle src, bool) + { + PyObject *source = src.ptr(); + auto const &api = ::dpctl::detail::dpctl_capi::get(); + if (api.PySyclProgram_Check_(source)) { + DPCTLSyclKernelBundleRef KBRef = + api.SyclProgram_GetKernelBundleRef_( + reinterpret_cast(source)); + value = std::make_unique< + sycl::kernel_bundle>( + *(reinterpret_cast< + sycl::kernel_bundle *>( + KBRef))); + return true; + } + else { + throw py::type_error("Input is of unexpected type, expected " + "dpctl.program.SyclProgram"); + } + } + + static handle cast(sycl::kernel_bundle src, + return_value_policy, + handle) + { + auto const &api = ::dpctl::detail::dpctl_capi::get(); + auto tmp = api.SyclProgram_Make_( + reinterpret_cast(&src)); + return handle(reinterpret_cast(tmp)); + } + + DPCTL_TYPE_CASTER(sycl::kernel_bundle, + _("dpctl.program.SyclProgram")); +}; + +/* This type caster associates + * ``sycl::half`` C++ class with Python :class:`float` for the purposes + * of generation of Python bindings by pybind11. + */ +template <> +struct type_caster +{ +public: + bool load(handle src, bool convert) + { + double py_value; + + if (!src) { + return false; + } + + PyObject *source = src.ptr(); + + if (convert || PyFloat_Check(source)) { + py_value = PyFloat_AsDouble(source); + } + else { + return false; + } + + bool py_err = (py_value == double(-1)) && PyErr_Occurred(); + + if (py_err) { + PyErr_Clear(); + if (convert && (PyNumber_Check(source) != 0)) { + auto tmp = reinterpret_steal(PyNumber_Float(source)); + return load(tmp, false); + } + return false; + } + value = static_cast(py_value); + return true; + } + + static handle cast(sycl::half src, return_value_policy, handle) + { + return PyFloat_FromDouble(static_cast(src)); + } + + PYBIND11_TYPE_CASTER(sycl::half, _("float")); +}; +} // namespace pybind11::detail + +namespace dpctl +{ +namespace memory +{ +// since PYBIND11_OBJECT_CVT uses error_already_set without namespace, +// this allows to avoid compilation error +using pybind11::error_already_set; + +class usm_memory : public py::object +{ +public: + PYBIND11_OBJECT_CVT( + usm_memory, + py::object, + [](PyObject *o) -> bool { + return PyObject_TypeCheck( + o, ::dpctl::detail::dpctl_capi::get().Py_MemoryType_) != + 0; + }, + [](PyObject *o) -> PyObject * { return as_usm_memory(o); }) + + usm_memory() + : py::object( + ::dpctl::detail::dpctl_capi::get().default_usm_memory_pyobj(), + borrowed_t{}) + { + if (!m_ptr) + throw py::error_already_set(); + } + + /*! @brief Create usm_memory object from shared pointer that manages + * lifetime of the USM allocation. + */ + usm_memory(void *usm_ptr, + std::size_t nbytes, + const sycl::queue &q, + std::shared_ptr shptr) + { + auto const &api = ::dpctl::detail::dpctl_capi::get(); + DPCTLSyclUSMRef usm_ref = reinterpret_cast(usm_ptr); + auto q_uptr = std::make_unique(q); + DPCTLSyclQueueRef QRef = + reinterpret_cast(q_uptr.get()); + + auto vacuous_destructor = []() {}; + py::capsule mock_owner(vacuous_destructor); + + // create memory object owned by mock_owner, it is a new reference + PyObject *_memory = + api.Memory_Make_(usm_ref, nbytes, QRef, mock_owner.ptr()); + auto ref_count_decrementer = [](PyObject *o) noexcept { Py_DECREF(o); }; + + using py_uptrT = + std::unique_ptr; + + if (!_memory) { + throw py::error_already_set(); + } + + auto memory_uptr = py_uptrT(_memory, ref_count_decrementer); + std::shared_ptr *opaque_ptr = new std::shared_ptr(shptr); + + Py_MemoryObject *memobj = reinterpret_cast(_memory); + // replace mock_owner capsule as the owner + memobj->refobj = Py_None; + // set opaque ptr field, usm_memory now knowns that USM is managed + // by smart pointer + memobj->_opaque_ptr = reinterpret_cast(opaque_ptr); + + // _memory will delete created copies of sycl::queue, and + // std::shared_ptr and the deleter of the shared_ptr is + // supposed to free the USM allocation + m_ptr = _memory; + q_uptr.release(); + memory_uptr.release(); + } + + sycl::queue get_queue() const + { + Py_MemoryObject *mem_obj = reinterpret_cast(m_ptr); + auto const &api = ::dpctl::detail::dpctl_capi::get(); + DPCTLSyclQueueRef QRef = api.Memory_GetQueueRef_(mem_obj); + sycl::queue *obj_q = reinterpret_cast(QRef); + return *obj_q; + } + + char *get_pointer() const + { + Py_MemoryObject *mem_obj = reinterpret_cast(m_ptr); + auto const &api = ::dpctl::detail::dpctl_capi::get(); + DPCTLSyclUSMRef MRef = api.Memory_GetUsmPointer_(mem_obj); + return reinterpret_cast(MRef); + } + + std::size_t get_nbytes() const + { + auto const &api = ::dpctl::detail::dpctl_capi::get(); + Py_MemoryObject *mem_obj = reinterpret_cast(m_ptr); + return api.Memory_GetNumBytes_(mem_obj); + } + + bool is_managed_by_smart_ptr() const + { + auto const &api = ::dpctl::detail::dpctl_capi::get(); + Py_MemoryObject *mem_obj = reinterpret_cast(m_ptr); + const void *opaque_ptr = api.Memory_GetOpaquePointer_(mem_obj); + + return bool(opaque_ptr); + } + + const std::shared_ptr &get_smart_ptr_owner() const + { + auto const &api = ::dpctl::detail::dpctl_capi::get(); + Py_MemoryObject *mem_obj = reinterpret_cast(m_ptr); + void *opaque_ptr = api.Memory_GetOpaquePointer_(mem_obj); + + if (opaque_ptr) { + auto shptr_ptr = + reinterpret_cast *>(opaque_ptr); + return *shptr_ptr; + } + else { + throw std::runtime_error( + "Memory object does not have smart pointer " + "managing lifetime of USM allocation"); + } + } + +protected: + static PyObject *as_usm_memory(PyObject *o) + { + if (o == nullptr) { + PyErr_SetString(PyExc_ValueError, + "cannot create a usm_memory from a nullptr"); + return nullptr; + } + + auto converter = + ::dpctl::detail::dpctl_capi::get().as_usm_memory_pyobj(); + + py::object res; + try { + res = converter(py::handle(o)); + } catch (const py::error_already_set &e) { + return nullptr; + } + return res.ptr(); + } +}; +} // end namespace memory + +namespace tensor +{ +inline std::vector + c_contiguous_strides(int nd, + const py::ssize_t *shape, + py::ssize_t element_size = 1) +{ + if (nd > 0) { + std::vector c_strides(nd, element_size); + for (int ic = nd - 1; ic > 0;) { + py::ssize_t next_v = c_strides[ic] * shape[ic]; + c_strides[--ic] = next_v; + } + return c_strides; + } + else { + return std::vector(); + } +} + +inline std::vector + f_contiguous_strides(int nd, + const py::ssize_t *shape, + py::ssize_t element_size = 1) +{ + if (nd > 0) { + std::vector f_strides(nd, element_size); + for (int i = 0; i < nd - 1;) { + py::ssize_t next_v = f_strides[i] * shape[i]; + f_strides[++i] = next_v; + } + return f_strides; + } + else { + return std::vector(); + } +} + +inline std::vector + c_contiguous_strides(const std::vector &shape, + py::ssize_t element_size = 1) +{ + return c_contiguous_strides(shape.size(), shape.data(), element_size); +} + +inline std::vector + f_contiguous_strides(const std::vector &shape, + py::ssize_t element_size = 1) +{ + return f_contiguous_strides(shape.size(), shape.data(), element_size); +} + +class usm_ndarray : public py::object +{ +public: + PYBIND11_OBJECT(usm_ndarray, py::object, [](PyObject *o) -> bool { + return PyObject_TypeCheck( + o, ::dpctl::detail::dpctl_capi::get().PyUSMArrayType_) != 0; + }) + + usm_ndarray() + : py::object( + ::dpctl::detail::dpctl_capi::get().default_usm_ndarray_pyobj(), + borrowed_t{}) + { + if (!m_ptr) + throw py::error_already_set(); + } + + char *get_data() const + { + PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = ::dpctl::detail::dpctl_capi::get(); + return api.UsmNDArray_GetData_(raw_ar); + } + + template + T *get_data() const + { + return reinterpret_cast(get_data()); + } + + int get_ndim() const + { + PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = ::dpctl::detail::dpctl_capi::get(); + return api.UsmNDArray_GetNDim_(raw_ar); + } + + const py::ssize_t *get_shape_raw() const + { + PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = ::dpctl::detail::dpctl_capi::get(); + return api.UsmNDArray_GetShape_(raw_ar); + } + + std::vector get_shape_vector() const + { + auto raw_sh = get_shape_raw(); + auto nd = get_ndim(); + + std::vector shape_vector(raw_sh, raw_sh + nd); + return shape_vector; + } + + py::ssize_t get_shape(int i) const + { + auto shape_ptr = get_shape_raw(); + return shape_ptr[i]; + } + + const py::ssize_t *get_strides_raw() const + { + PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = ::dpctl::detail::dpctl_capi::get(); + return api.UsmNDArray_GetStrides_(raw_ar); + } + + std::vector get_strides_vector() const + { + auto raw_st = get_strides_raw(); + auto nd = get_ndim(); + + if (raw_st == nullptr) { + auto is_c_contig = is_c_contiguous(); + auto is_f_contig = is_f_contiguous(); + auto raw_sh = get_shape_raw(); + if (is_c_contig) { + const auto &contig_strides = c_contiguous_strides(nd, raw_sh); + return contig_strides; + } + else if (is_f_contig) { + const auto &contig_strides = f_contiguous_strides(nd, raw_sh); + return contig_strides; + } + else { + throw std::runtime_error("Invalid array encountered when " + "building strides"); + } + } + else { + std::vector st_vec(raw_st, raw_st + nd); + return st_vec; + } + } + + py::ssize_t get_size() const + { + PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = ::dpctl::detail::dpctl_capi::get(); + int ndim = api.UsmNDArray_GetNDim_(raw_ar); + const py::ssize_t *shape = api.UsmNDArray_GetShape_(raw_ar); + + py::ssize_t nelems = 1; + for (int i = 0; i < ndim; ++i) { + nelems *= shape[i]; + } + + assert(nelems >= 0); + return nelems; + } + + std::pair get_minmax_offsets() const + { + PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = ::dpctl::detail::dpctl_capi::get(); + int nd = api.UsmNDArray_GetNDim_(raw_ar); + const py::ssize_t *shape = api.UsmNDArray_GetShape_(raw_ar); + const py::ssize_t *strides = api.UsmNDArray_GetStrides_(raw_ar); + + py::ssize_t offset_min = 0; + py::ssize_t offset_max = 0; + if (strides == nullptr) { + py::ssize_t stride(1); + for (int i = 0; i < nd; ++i) { + offset_max += stride * (shape[i] - 1); + stride *= shape[i]; + } + } + else { + for (int i = 0; i < nd; ++i) { + py::ssize_t delta = strides[i] * (shape[i] - 1); + if (strides[i] > 0) { + offset_max += delta; + } + else { + offset_min += delta; + } + } + } + return std::make_pair(offset_min, offset_max); + } + + sycl::queue get_queue() const + { + PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = ::dpctl::detail::dpctl_capi::get(); + DPCTLSyclQueueRef QRef = api.UsmNDArray_GetQueueRef_(raw_ar); + return *(reinterpret_cast(QRef)); + } + + sycl::device get_device() const + { + PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = ::dpctl::detail::dpctl_capi::get(); + DPCTLSyclQueueRef QRef = api.UsmNDArray_GetQueueRef_(raw_ar); + return reinterpret_cast(QRef)->get_device(); + } + + int get_typenum() const + { + PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = ::dpctl::detail::dpctl_capi::get(); + return api.UsmNDArray_GetTypenum_(raw_ar); + } + + int get_flags() const + { + PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = ::dpctl::detail::dpctl_capi::get(); + return api.UsmNDArray_GetFlags_(raw_ar); + } + + int get_elemsize() const + { + PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = ::dpctl::detail::dpctl_capi::get(); + return api.UsmNDArray_GetElementSize_(raw_ar); + } + + bool is_c_contiguous() const + { + int flags = get_flags(); + auto const &api = ::dpctl::detail::dpctl_capi::get(); + return static_cast(flags & api.USM_ARRAY_C_CONTIGUOUS_); + } + + bool is_f_contiguous() const + { + int flags = get_flags(); + auto const &api = ::dpctl::detail::dpctl_capi::get(); + return static_cast(flags & api.USM_ARRAY_F_CONTIGUOUS_); + } + + bool is_writable() const + { + int flags = get_flags(); + auto const &api = ::dpctl::detail::dpctl_capi::get(); + return static_cast(flags & api.USM_ARRAY_WRITABLE_); + } + + /*! @brief Get usm_data property of array */ + py::object get_usm_data() const + { + PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = ::dpctl::detail::dpctl_capi::get(); + // UsmNDArray_GetUSMData_ gives a new reference + PyObject *usm_data = api.UsmNDArray_GetUSMData_(raw_ar); + + // pass reference ownership to py::object + return py::reinterpret_steal(usm_data); + } + + bool is_managed_by_smart_ptr() const + { + PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = ::dpctl::detail::dpctl_capi::get(); + PyObject *usm_data = api.UsmNDArray_GetUSMData_(raw_ar); + + if (!PyObject_TypeCheck(usm_data, api.Py_MemoryType_)) { + Py_DECREF(usm_data); + return false; + } + + Py_MemoryObject *mem_obj = + reinterpret_cast(usm_data); + const void *opaque_ptr = api.Memory_GetOpaquePointer_(mem_obj); + + Py_DECREF(usm_data); + return bool(opaque_ptr); + } + + const std::shared_ptr &get_smart_ptr_owner() const + { + PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = ::dpctl::detail::dpctl_capi::get(); + + PyObject *usm_data = api.UsmNDArray_GetUSMData_(raw_ar); + + if (!PyObject_TypeCheck(usm_data, api.Py_MemoryType_)) { + Py_DECREF(usm_data); + throw std::runtime_error( + "usm_ndarray object does not have Memory object " + "managing lifetime of USM allocation"); + } + + Py_MemoryObject *mem_obj = + reinterpret_cast(usm_data); + void *opaque_ptr = api.Memory_GetOpaquePointer_(mem_obj); + Py_DECREF(usm_data); + + if (opaque_ptr) { + auto shptr_ptr = + reinterpret_cast *>(opaque_ptr); + return *shptr_ptr; + } + else { + throw std::runtime_error( + "Memory object underlying usm_ndarray does not have " + "smart pointer managing lifetime of USM allocation"); + } + } + +private: + PyUSMArrayObject *usm_array_ptr() const + { + return reinterpret_cast(m_ptr); + } +}; +} // end namespace tensor + +namespace utils +{ +namespace detail +{ +struct ManagedMemory +{ + + static bool is_usm_managed_by_shared_ptr(const py::object &h) + { + if (py::isinstance(h)) { + const auto &usm_memory_inst = + py::cast(h); + return usm_memory_inst.is_managed_by_smart_ptr(); + } + else if (py::isinstance(h)) { + const auto &usm_array_inst = + py::cast(h); + return usm_array_inst.is_managed_by_smart_ptr(); + } + + return false; + } + + static const std::shared_ptr &extract_shared_ptr(const py::object &h) + { + if (py::isinstance(h)) { + const auto &usm_memory_inst = + py::cast(h); + return usm_memory_inst.get_smart_ptr_owner(); + } + else if (py::isinstance(h)) { + const auto &usm_array_inst = + py::cast(h); + return usm_array_inst.get_smart_ptr_owner(); + } + + throw std::runtime_error( + "Attempted extraction of shared_ptr on an unrecognized type"); + } +}; +} // end of namespace detail + +template +sycl::event keep_args_alive(sycl::queue &q, + const py::object (&py_objs)[num], + const std::vector &depends = {}) +{ + std::size_t n_objects_held = 0; + std::array, num> shp_arr{}; + + std::size_t n_usm_owners_held = 0; + std::array, num> shp_usm{}; + + for (std::size_t i = 0; i < num; ++i) { + const auto &py_obj_i = py_objs[i]; + if (detail::ManagedMemory::is_usm_managed_by_shared_ptr(py_obj_i)) { + const auto &shp = + detail::ManagedMemory::extract_shared_ptr(py_obj_i); + shp_usm[n_usm_owners_held] = shp; + ++n_usm_owners_held; + } + else { + shp_arr[n_objects_held] = std::make_shared(py_obj_i); + shp_arr[n_objects_held]->inc_ref(); + ++n_objects_held; + } + } + + bool use_depends = true; + sycl::event host_task_ev; + + if (n_usm_owners_held > 0) { + host_task_ev = q.submit([&](sycl::handler &cgh) { + if (use_depends) { + cgh.depends_on(depends); + use_depends = false; + } + else { + cgh.depends_on(host_task_ev); + } + cgh.host_task([shp_usm = std::move(shp_usm)]() { + // no body, but shared pointers are captured in + // the lambda, ensuring that USM allocation is + // kept alive + }); + }); + } + + if (n_objects_held > 0) { + host_task_ev = q.submit([&](sycl::handler &cgh) { + if (use_depends) { + cgh.depends_on(depends); + use_depends = false; + } + else { + cgh.depends_on(host_task_ev); + } + cgh.host_task([n_objects_held, shp_arr = std::move(shp_arr)]() { + py::gil_scoped_acquire acquire; + + for (std::size_t i = 0; i < n_objects_held; ++i) { + shp_arr[i]->dec_ref(); + } + }); + }); + } + + return host_task_ev; +} + +/*! @brief Check if all allocation queues are the same as the + execution queue */ +template +bool queues_are_compatible(const sycl::queue &exec_q, + const sycl::queue (&alloc_qs)[num]) +{ + for (std::size_t i = 0; i < num; ++i) { + + if (exec_q != alloc_qs[i]) { + return false; + } + } + return true; +} + +/*! @brief Check if all allocation queues of usm_ndarays are the same as + the execution queue */ +template +bool queues_are_compatible(const sycl::queue &exec_q, + const ::dpctl::tensor::usm_ndarray (&arrs)[num]) +{ + for (std::size_t i = 0; i < num; ++i) { + + if (exec_q != arrs[i].get_queue()) { + return false; + } + } + return true; +} +} // end namespace utils +} // end namespace dpctl From 1eef13fd5b556d010696360dce39df05d341199d Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 23 Jan 2026 04:27:03 -0800 Subject: [PATCH 04/18] Update windows extension to work with dpnp4pybind11.hpp --- .../libtensor/include/utils/memory_overlap.hpp | 4 ++++ .../libtensor/include/utils/output_validation.hpp | 4 ++++ .../libtensor/include/utils/type_dispatch.hpp | 4 ++++ dpnp/backend/extensions/window/CMakeLists.txt | 15 +++++---------- dpnp/backend/extensions/window/common.hpp | 4 ++-- dpnp/backend/extensions/window/kaiser.hpp | 4 ++-- 6 files changed, 21 insertions(+), 14 deletions(-) diff --git a/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp b/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp index 3b1bc772b514..db9dfc30eb46 100644 --- a/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp +++ b/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp @@ -38,7 +38,11 @@ #include +#if __has_include() +#include "dpnp4pybind11.hpp" +#else #include "dpctl4pybind11.hpp" +#endif /* @brief check for overlap of memory regions behind arrays. diff --git a/dpctl/tensor/libtensor/include/utils/output_validation.hpp b/dpctl/tensor/libtensor/include/utils/output_validation.hpp index 1397efdee230..7a70f395dfe1 100644 --- a/dpctl/tensor/libtensor/include/utils/output_validation.hpp +++ b/dpctl/tensor/libtensor/include/utils/output_validation.hpp @@ -37,7 +37,11 @@ #include +#if __has_include() +#include "dpnp4pybind11.hpp" +#else #include "dpctl4pybind11.hpp" +#endif namespace dpctl::tensor::validation { diff --git a/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp b/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp index 5ec84783c901..38b5b43ca696 100644 --- a/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp +++ b/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp @@ -36,7 +36,11 @@ #include #include +#if __has_include() +#include "dpnp4pybind11.hpp" +#else #include "dpctl4pybind11.hpp" +#endif #include "type_dispatch_building.hpp" diff --git a/dpnp/backend/extensions/window/CMakeLists.txt b/dpnp/backend/extensions/window/CMakeLists.txt index 186668bb1662..6fe04e334f42 100644 --- a/dpnp/backend/extensions/window/CMakeLists.txt +++ b/dpnp/backend/extensions/window/CMakeLists.txt @@ -62,17 +62,12 @@ set_target_properties( target_include_directories( ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../../ + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${CMAKE_SOURCE_DIR}/dpnp/backend/include + ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include ) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common -) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include -) - target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) if(WIN32) diff --git a/dpnp/backend/extensions/window/common.hpp b/dpnp/backend/extensions/window/common.hpp index cb084e972d78..b95aea6259e3 100644 --- a/dpnp/backend/extensions/window/common.hpp +++ b/dpnp/backend/extensions/window/common.hpp @@ -30,9 +30,10 @@ #include #include + #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" // dpctl tensor headers #include "utils/output_validation.hpp" @@ -41,7 +42,6 @@ namespace dpnp::extensions::window { - namespace dpctl_td_ns = dpctl::tensor::type_dispatch; namespace py = pybind11; diff --git a/dpnp/backend/extensions/window/kaiser.hpp b/dpnp/backend/extensions/window/kaiser.hpp index 0a4712cc594e..46227a60669f 100644 --- a/dpnp/backend/extensions/window/kaiser.hpp +++ b/dpnp/backend/extensions/window/kaiser.hpp @@ -28,9 +28,10 @@ #pragma once -#include #include +#include "dpnp4pybind11.hpp" + namespace dpnp::extensions::window { extern std::pair @@ -40,5 +41,4 @@ extern std::pair const std::vector &depends); extern void init_kaiser_dispatch_vectors(void); - } // namespace dpnp::extensions::window From 4dd62e8c655d16314df719f8d71267883fb41624 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 23 Jan 2026 04:31:51 -0800 Subject: [PATCH 05/18] Update blas extension to work with dpnp4pybind11.hpp --- dpnp/backend/extensions/blas/CMakeLists.txt | 16 +++++----------- dpnp/backend/extensions/blas/gemm.hpp | 2 +- dpnp/backend/extensions/blas/gemv.hpp | 2 +- dpnp/backend/extensions/blas/syrk.hpp | 2 +- 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/dpnp/backend/extensions/blas/CMakeLists.txt b/dpnp/backend/extensions/blas/CMakeLists.txt index 2da35cc695ac..0015eda84843 100644 --- a/dpnp/backend/extensions/blas/CMakeLists.txt +++ b/dpnp/backend/extensions/blas/CMakeLists.txt @@ -65,18 +65,12 @@ set_target_properties( target_include_directories( ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${CMAKE_SOURCE_DIR}/dpnp/backend/include + ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include ) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common -) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include -) - -target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) if(WIN32) target_compile_options( diff --git a/dpnp/backend/extensions/blas/gemm.hpp b/dpnp/backend/extensions/blas/gemm.hpp index 997d515f98a0..59a3d911d885 100644 --- a/dpnp/backend/extensions/blas/gemm.hpp +++ b/dpnp/backend/extensions/blas/gemm.hpp @@ -31,7 +31,7 @@ #include #include -#include +#include "dpnp4pybind11.hpp" namespace dpnp::extensions::blas { diff --git a/dpnp/backend/extensions/blas/gemv.hpp b/dpnp/backend/extensions/blas/gemv.hpp index afe0c6387aa9..6da71ed0964f 100644 --- a/dpnp/backend/extensions/blas/gemv.hpp +++ b/dpnp/backend/extensions/blas/gemv.hpp @@ -31,7 +31,7 @@ #include #include -#include +#include "dpnp4pybind11.hpp" namespace dpnp::extensions::blas { diff --git a/dpnp/backend/extensions/blas/syrk.hpp b/dpnp/backend/extensions/blas/syrk.hpp index 580239b28008..f6cec189489a 100644 --- a/dpnp/backend/extensions/blas/syrk.hpp +++ b/dpnp/backend/extensions/blas/syrk.hpp @@ -31,7 +31,7 @@ #include #include -#include +#include "dpnp4pybind11.hpp" namespace dpnp::extensions::blas { From 6157a52ecc836702016c27201f667e3fa31a58da Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 23 Jan 2026 04:46:22 -0800 Subject: [PATCH 06/18] Update fft extension to work with dpnp4pybind11.hpp --- dpnp/backend/extensions/fft/CMakeLists.txt | 7 ++++--- dpnp/backend/extensions/fft/in_place.hpp | 5 ++++- dpnp/backend/extensions/fft/in_place.tpp | 10 +++++++++- dpnp/backend/extensions/fft/out_of_place.hpp | 5 ++++- dpnp/backend/extensions/fft/out_of_place.tpp | 12 +++++++++++- 5 files changed, 32 insertions(+), 7 deletions(-) diff --git a/dpnp/backend/extensions/fft/CMakeLists.txt b/dpnp/backend/extensions/fft/CMakeLists.txt index 0c2c446fe8a0..0569ecc8bca4 100644 --- a/dpnp/backend/extensions/fft/CMakeLists.txt +++ b/dpnp/backend/extensions/fft/CMakeLists.txt @@ -59,10 +59,11 @@ set_target_properties( target_include_directories( ${python_module_name} - PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include + PRIVATE + ${CMAKE_SOURCE_DIR}/dpnp/backend/include + ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include ) - -target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) if(WIN32) target_compile_options( diff --git a/dpnp/backend/extensions/fft/in_place.hpp b/dpnp/backend/extensions/fft/in_place.hpp index 7eed11565b9e..bc35201b9b6e 100644 --- a/dpnp/backend/extensions/fft/in_place.hpp +++ b/dpnp/backend/extensions/fft/in_place.hpp @@ -28,10 +28,13 @@ #pragma once +#include +#include + #include #include -#include +#include "dpnp4pybind11.hpp" namespace dpnp::extensions::fft { diff --git a/dpnp/backend/extensions/fft/in_place.tpp b/dpnp/backend/extensions/fft/in_place.tpp index 4bc166b0e7ae..ace535284de6 100644 --- a/dpnp/backend/extensions/fft/in_place.tpp +++ b/dpnp/backend/extensions/fft/in_place.tpp @@ -27,15 +27,23 @@ //***************************************************************************** #pragma once + #include +#include +#include +#include + +#include #include #include -#include +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "fft_utils.hpp" +#include "in_place.hpp" + // dpctl tensor headers #include "utils/output_validation.hpp" diff --git a/dpnp/backend/extensions/fft/out_of_place.hpp b/dpnp/backend/extensions/fft/out_of_place.hpp index 811a2bd6d1c4..55ca9383baaf 100644 --- a/dpnp/backend/extensions/fft/out_of_place.hpp +++ b/dpnp/backend/extensions/fft/out_of_place.hpp @@ -28,10 +28,13 @@ #pragma once +#include +#include + #include #include -#include +#include "dpnp4pybind11.hpp" namespace dpnp::extensions::fft { diff --git a/dpnp/backend/extensions/fft/out_of_place.tpp b/dpnp/backend/extensions/fft/out_of_place.tpp index 290408dc60bc..e468246ea7af 100644 --- a/dpnp/backend/extensions/fft/out_of_place.tpp +++ b/dpnp/backend/extensions/fft/out_of_place.tpp @@ -27,15 +27,25 @@ //***************************************************************************** #pragma once + +#include +#include #include +#include +#include +#include #include #include -#include +#include + +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "fft_utils.hpp" +#include "out_of_place.hpp" + // dpctl tensor headers #include "utils/memory_overlap.hpp" #include "utils/output_validation.hpp" From 156d3b78a37f06cf58ff9f5486a9bf3637a0cf41 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 23 Jan 2026 04:52:26 -0800 Subject: [PATCH 07/18] Update indexing extension to work with dpnp4pybind11.hpp --- dpnp/backend/extensions/indexing/CMakeLists.txt | 14 ++++---------- dpnp/backend/extensions/indexing/choose.cpp | 12 ++++++------ 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/dpnp/backend/extensions/indexing/CMakeLists.txt b/dpnp/backend/extensions/indexing/CMakeLists.txt index 28d38bc28f21..c0de75ae3146 100644 --- a/dpnp/backend/extensions/indexing/CMakeLists.txt +++ b/dpnp/backend/extensions/indexing/CMakeLists.txt @@ -62,17 +62,11 @@ set_target_properties( target_include_directories( ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${CMAKE_SOURCE_DIR}/dpnp/backend/include + ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include ) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common -) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include -) - target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) if(WIN32) diff --git a/dpnp/backend/extensions/indexing/choose.cpp b/dpnp/backend/extensions/indexing/choose.cpp index 99d91744366f..05d1bfe15385 100644 --- a/dpnp/backend/extensions/indexing/choose.cpp +++ b/dpnp/backend/extensions/indexing/choose.cpp @@ -30,15 +30,17 @@ #include #include #include -#include -#include -#include #include #include #include +#include + +#include + +#include "dpnp4pybind11.hpp" + #include "choose_kernel.hpp" -#include "dpctl4pybind11.hpp" // utils extension header #include "ext/common.hpp" @@ -52,7 +54,6 @@ namespace dpnp::extensions::indexing { - namespace td_ns = dpctl::tensor::type_dispatch; static kernels::choose_fn_ptr_t choose_clip_dispatch_table[td_ns::num_types] @@ -459,5 +460,4 @@ void init_choose(py::module_ m) return; } - } // namespace dpnp::extensions::indexing From d9d912d8a257222e759e90196a2e5d3867de50da Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 23 Jan 2026 04:57:00 -0800 Subject: [PATCH 08/18] Update lapack extension to work with dpnp4pybind11.hpp --- dpnp/backend/extensions/lapack/CMakeLists.txt | 15 +++++---------- dpnp/backend/extensions/lapack/geqrf.hpp | 2 +- dpnp/backend/extensions/lapack/gesv.hpp | 2 +- dpnp/backend/extensions/lapack/gesvd.hpp | 2 +- dpnp/backend/extensions/lapack/getrf.hpp | 2 +- dpnp/backend/extensions/lapack/getri.hpp | 2 +- dpnp/backend/extensions/lapack/getrs.hpp | 2 +- dpnp/backend/extensions/lapack/orgqr.hpp | 2 +- dpnp/backend/extensions/lapack/potrf.hpp | 2 +- dpnp/backend/extensions/lapack/ungqr.hpp | 2 +- 10 files changed, 14 insertions(+), 19 deletions(-) diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt index aa0f6b718972..76b25c3a6d10 100644 --- a/dpnp/backend/extensions/lapack/CMakeLists.txt +++ b/dpnp/backend/extensions/lapack/CMakeLists.txt @@ -82,17 +82,12 @@ set_target_properties( target_include_directories( ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../../ + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${CMAKE_SOURCE_DIR}/dpnp/backend/include + ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include ) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common -) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include -) - target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) if(WIN32) diff --git a/dpnp/backend/extensions/lapack/geqrf.hpp b/dpnp/backend/extensions/lapack/geqrf.hpp index 522006ace8ab..7be1fee971cf 100644 --- a/dpnp/backend/extensions/lapack/geqrf.hpp +++ b/dpnp/backend/extensions/lapack/geqrf.hpp @@ -31,7 +31,7 @@ #include #include -#include +#include "dpnp4pybind11.hpp" namespace dpnp::extensions::lapack { diff --git a/dpnp/backend/extensions/lapack/gesv.hpp b/dpnp/backend/extensions/lapack/gesv.hpp index d4198efae62e..a86039c9b72e 100644 --- a/dpnp/backend/extensions/lapack/gesv.hpp +++ b/dpnp/backend/extensions/lapack/gesv.hpp @@ -31,7 +31,7 @@ #include #include -#include +#include "dpnp4pybind11.hpp" namespace dpnp::extensions::lapack { diff --git a/dpnp/backend/extensions/lapack/gesvd.hpp b/dpnp/backend/extensions/lapack/gesvd.hpp index 116348e01d9f..b2fea5e47299 100644 --- a/dpnp/backend/extensions/lapack/gesvd.hpp +++ b/dpnp/backend/extensions/lapack/gesvd.hpp @@ -31,7 +31,7 @@ #include #include -#include +#include "dpnp4pybind11.hpp" namespace dpnp::extensions::lapack { diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp index 24ec473f4dc7..ce6dc3e788b5 100644 --- a/dpnp/backend/extensions/lapack/getrf.hpp +++ b/dpnp/backend/extensions/lapack/getrf.hpp @@ -31,7 +31,7 @@ #include #include -#include +#include "dpnp4pybind11.hpp" namespace dpnp::extensions::lapack { diff --git a/dpnp/backend/extensions/lapack/getri.hpp b/dpnp/backend/extensions/lapack/getri.hpp index d8c8e58f3fcb..728af4a77e01 100644 --- a/dpnp/backend/extensions/lapack/getri.hpp +++ b/dpnp/backend/extensions/lapack/getri.hpp @@ -31,7 +31,7 @@ #include #include -#include +#include "dpnp4pybind11.hpp" namespace dpnp::extensions::lapack { diff --git a/dpnp/backend/extensions/lapack/getrs.hpp b/dpnp/backend/extensions/lapack/getrs.hpp index f5a47c69c9ec..2728b0c4e04a 100644 --- a/dpnp/backend/extensions/lapack/getrs.hpp +++ b/dpnp/backend/extensions/lapack/getrs.hpp @@ -31,7 +31,7 @@ #include #include -#include +#include "dpnp4pybind11.hpp" namespace dpnp::extensions::lapack { diff --git a/dpnp/backend/extensions/lapack/orgqr.hpp b/dpnp/backend/extensions/lapack/orgqr.hpp index 962edc7b668f..2502fe567a1f 100644 --- a/dpnp/backend/extensions/lapack/orgqr.hpp +++ b/dpnp/backend/extensions/lapack/orgqr.hpp @@ -31,7 +31,7 @@ #include #include -#include +#include "dpnp4pybind11.hpp" namespace dpnp::extensions::lapack { diff --git a/dpnp/backend/extensions/lapack/potrf.hpp b/dpnp/backend/extensions/lapack/potrf.hpp index d5df48a9ddf4..02faf2c04fde 100644 --- a/dpnp/backend/extensions/lapack/potrf.hpp +++ b/dpnp/backend/extensions/lapack/potrf.hpp @@ -31,7 +31,7 @@ #include #include -#include +#include "dpnp4pybind11.hpp" namespace dpnp::extensions::lapack { diff --git a/dpnp/backend/extensions/lapack/ungqr.hpp b/dpnp/backend/extensions/lapack/ungqr.hpp index a149af1e24e1..8c9a36b3f4a6 100644 --- a/dpnp/backend/extensions/lapack/ungqr.hpp +++ b/dpnp/backend/extensions/lapack/ungqr.hpp @@ -31,7 +31,7 @@ #include #include -#include +#include "dpnp4pybind11.hpp" namespace dpnp::extensions::lapack { From 35681bbab6cb098a83b71cfed826c4316877fcf9 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 23 Jan 2026 05:06:00 -0800 Subject: [PATCH 09/18] Update statistics extension to work with dpnp4pybind11.hpp --- .../extensions/common/ext/validation_utils.hpp | 7 +++++++ dpnp/backend/extensions/statistics/CMakeLists.txt | 10 ++++------ dpnp/backend/extensions/statistics/bincount.hpp | 3 ++- dpnp/backend/extensions/statistics/histogram.cpp | 5 ++--- dpnp/backend/extensions/statistics/histogram.hpp | 4 +++- .../extensions/statistics/histogram_common.cpp | 10 ++++++---- dpnp/backend/extensions/statistics/histogramdd.hpp | 4 +++- .../extensions/statistics/sliding_dot_product1d.cpp | 8 +++++--- .../extensions/statistics/sliding_window1d.cpp | 11 +++++++---- 9 files changed, 39 insertions(+), 23 deletions(-) diff --git a/dpnp/backend/extensions/common/ext/validation_utils.hpp b/dpnp/backend/extensions/common/ext/validation_utils.hpp index d41db8d5ca5a..0bb32c9f876a 100644 --- a/dpnp/backend/extensions/common/ext/validation_utils.hpp +++ b/dpnp/backend/extensions/common/ext/validation_utils.hpp @@ -32,7 +32,14 @@ #include #include +#if __has_include() +#include "dpnp4pybind11.hpp" +#else #include "dpctl4pybind11.hpp" +#endif + +// dpctl tensor headers +#include "utils/type_dispatch.hpp" namespace ext::validation { diff --git a/dpnp/backend/extensions/statistics/CMakeLists.txt b/dpnp/backend/extensions/statistics/CMakeLists.txt index 710a35346d63..e04279b75e49 100644 --- a/dpnp/backend/extensions/statistics/CMakeLists.txt +++ b/dpnp/backend/extensions/statistics/CMakeLists.txt @@ -67,13 +67,11 @@ set_target_properties( target_include_directories( ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${CMAKE_SOURCE_DIR}/dpnp/backend/include + ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include ) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include -) - target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) if(WIN32) diff --git a/dpnp/backend/extensions/statistics/bincount.hpp b/dpnp/backend/extensions/statistics/bincount.hpp index 5e42952349b0..2fc477e71edc 100644 --- a/dpnp/backend/extensions/statistics/bincount.hpp +++ b/dpnp/backend/extensions/statistics/bincount.hpp @@ -31,7 +31,8 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" + #include "ext/dispatch_table.hpp" namespace dpctl_td_ns = dpctl::tensor::type_dispatch; diff --git a/dpnp/backend/extensions/statistics/histogram.cpp b/dpnp/backend/extensions/statistics/histogram.cpp index 6d7da6836f60..621aa36cfbd1 100644 --- a/dpnp/backend/extensions/statistics/histogram.cpp +++ b/dpnp/backend/extensions/statistics/histogram.cpp @@ -33,10 +33,10 @@ #include #include -#include + +#include "dpnp4pybind11.hpp" // dpctl tensor headers -#include "dpctl4pybind11.hpp" #include "utils/type_dispatch.hpp" #include "histogram.hpp" @@ -50,7 +50,6 @@ using namespace ext::common; namespace { - template struct HistogramEdges { diff --git a/dpnp/backend/extensions/statistics/histogram.hpp b/dpnp/backend/extensions/statistics/histogram.hpp index c6a79ec24ee3..d04d8edbf02b 100644 --- a/dpnp/backend/extensions/statistics/histogram.hpp +++ b/dpnp/backend/extensions/statistics/histogram.hpp @@ -31,7 +31,9 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" + +// utils extension header #include "ext/dispatch_table.hpp" namespace statistics::histogram diff --git a/dpnp/backend/extensions/statistics/histogram_common.cpp b/dpnp/backend/extensions/statistics/histogram_common.cpp index 82afa2bd965d..252e1cd7c7cc 100644 --- a/dpnp/backend/extensions/statistics/histogram_common.cpp +++ b/dpnp/backend/extensions/statistics/histogram_common.cpp @@ -31,15 +31,18 @@ #include #include -#include "dpctl4pybind11.hpp" -#include "utils/type_dispatch.hpp" - #include +#include "dpnp4pybind11.hpp" + #include "histogram_common.hpp" +// utils extension header #include "ext/validation_utils.hpp" +// dpctl tensor headers +#include "utils/type_dispatch.hpp" + namespace dpctl_td_ns = dpctl::tensor::type_dispatch; using dpctl::tensor::usm_ndarray; using dpctl_td_ns::typenum_t; @@ -57,7 +60,6 @@ using ext::validation::name_of; namespace statistics::histogram { - void validate(const usm_ndarray &sample, const std::optional &bins, const std::optional &weights, diff --git a/dpnp/backend/extensions/statistics/histogramdd.hpp b/dpnp/backend/extensions/statistics/histogramdd.hpp index 327e9941dbc6..d7c46ae34b7d 100644 --- a/dpnp/backend/extensions/statistics/histogramdd.hpp +++ b/dpnp/backend/extensions/statistics/histogramdd.hpp @@ -31,7 +31,9 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" + +// utils extension header #include "ext/dispatch_table.hpp" namespace statistics::histogram diff --git a/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp b/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp index b8f679f1030e..6c0e39a11a19 100644 --- a/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp +++ b/dpnp/backend/extensions/statistics/sliding_dot_product1d.cpp @@ -33,11 +33,14 @@ #include #include +#include "dpnp4pybind11.hpp" + +// utils extension header +#include "ext/common.hpp" + // dpctl tensor headers -#include "dpctl4pybind11.hpp" #include "utils/type_dispatch.hpp" -#include "ext/common.hpp" #include "sliding_dot_product1d.hpp" #include "sliding_window1d.hpp" @@ -51,7 +54,6 @@ using namespace ext::common; namespace { - template struct SlidingDotProductF { diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.cpp b/dpnp/backend/extensions/statistics/sliding_window1d.cpp index 3ae66daa332b..81f8ae40104e 100644 --- a/dpnp/backend/extensions/statistics/sliding_window1d.cpp +++ b/dpnp/backend/extensions/statistics/sliding_window1d.cpp @@ -29,11 +29,16 @@ #include #include -#include "dpctl4pybind11.hpp" -#include "utils/type_dispatch.hpp" #include +#include "dpnp4pybind11.hpp" + +// utils extension header #include "ext/validation_utils.hpp" + +// dpctl tensor headers +#include "utils/type_dispatch.hpp" + #include "sliding_window1d.hpp" namespace dpctl_td_ns = dpctl::tensor::type_dispatch; @@ -48,7 +53,6 @@ using ext::validation::name_of; namespace statistics::sliding_window1d { - void validate(const usm_ndarray &a, const usm_ndarray &v, const usm_ndarray &out, @@ -89,5 +93,4 @@ void validate(const usm_ndarray &a, std::to_string(expected_output_size) + ")"); } } - } // namespace statistics::sliding_window1d From c35428e3aee3b14abadc550f8ef85e71860bc6e9 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 23 Jan 2026 05:16:45 -0800 Subject: [PATCH 10/18] Update ufunc extension to work with dpnp4pybind11.hpp --- .../elementwise_functions.hpp | 8 +++++--- .../elementwise_functions_type_utils.cpp | 9 ++++++--- .../elementwise_functions_type_utils.hpp | 8 ++++++-- dpnp/backend/extensions/ufunc/CMakeLists.txt | 15 +++++---------- .../ufunc/elementwise_functions/bitwise_count.cpp | 2 +- .../ufunc/elementwise_functions/degrees.cpp | 2 +- .../ufunc/elementwise_functions/divmod.cpp | 2 +- .../ufunc/elementwise_functions/erf_funcs.cpp | 2 +- .../ufunc/elementwise_functions/fabs.cpp | 2 +- .../ufunc/elementwise_functions/float_power.cpp | 2 +- .../ufunc/elementwise_functions/fmax.cpp | 2 +- .../ufunc/elementwise_functions/fmin.cpp | 2 +- .../ufunc/elementwise_functions/fmod.cpp | 2 +- .../ufunc/elementwise_functions/frexp.cpp | 2 +- .../ufunc/elementwise_functions/gcd.cpp | 2 +- .../ufunc/elementwise_functions/heaviside.cpp | 2 +- .../extensions/ufunc/elementwise_functions/i0.cpp | 2 +- .../ufunc/elementwise_functions/interpolate.cpp | 2 +- .../ufunc/elementwise_functions/isclose.cpp | 2 +- .../ufunc/elementwise_functions/lcm.cpp | 2 +- .../ufunc/elementwise_functions/ldexp.cpp | 2 +- .../ufunc/elementwise_functions/logaddexp2.cpp | 2 +- .../ufunc/elementwise_functions/modf.cpp | 2 +- .../ufunc/elementwise_functions/nan_to_num.cpp | 2 +- .../ufunc/elementwise_functions/radians.cpp | 2 +- .../ufunc/elementwise_functions/sinc.cpp | 2 +- .../ufunc/elementwise_functions/spacing.cpp | 2 +- 27 files changed, 45 insertions(+), 41 deletions(-) diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp index c996ac07df02..9e8d98d875a3 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp @@ -34,12 +34,14 @@ #include #include +#include #include +#if __has_include() +#include "dpnp4pybind11.hpp" +#else #include "dpctl4pybind11.hpp" -#include -#include -#include +#endif #include "elementwise_functions_type_utils.hpp" #include "simplify_iteration_space.hpp" diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp index 62f7584a3e0c..c126428f0558 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp @@ -26,12 +26,15 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** -#include "dpctl4pybind11.hpp" - -#include #include #include +#if __has_include() +#include "dpnp4pybind11.hpp" +#else +#include "dpctl4pybind11.hpp" +#endif + #include "elementwise_functions_type_utils.hpp" // dpctl tensor headers diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp index 1bb6fedd7027..129a89a49dbe 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp @@ -28,10 +28,14 @@ #pragma once -#include "dpctl4pybind11.hpp" #include #include -#include + +#if __has_include() +#include "dpnp4pybind11.hpp" +#else +#include "dpctl4pybind11.hpp" +#endif // dpctl tensor headers #include "utils/type_dispatch.hpp" diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt index f1378bf52d88..55a750f8423f 100644 --- a/dpnp/backend/extensions/ufunc/CMakeLists.txt +++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt @@ -84,17 +84,12 @@ set_target_properties( target_include_directories( ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../../ + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${CMAKE_SOURCE_DIR}/dpnp/backend/include + ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include ) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common -) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include -) - target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) if(_dpnp_sycl_targets) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp index a0842f4ef259..14e8b7b5ed35 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp @@ -32,7 +32,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "bitwise_count.hpp" #include "kernels/elementwise_functions/bitwise_count.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp index 77452a6b777f..511ea759ae35 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp @@ -31,7 +31,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "degrees.hpp" #include "kernels/elementwise_functions/degrees.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/divmod.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/divmod.cpp index af87dcc85f53..93d04ed7940e 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/divmod.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/divmod.cpp @@ -32,7 +32,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "divmod.hpp" #include "kernels/elementwise_functions/divmod.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp index 5254e50d3faf..e209f72a83b2 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp @@ -31,7 +31,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "erf_funcs.hpp" #include "kernels/elementwise_functions/erf.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp index d2b6ae24ac4b..d673533e599b 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp @@ -31,7 +31,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "fabs.hpp" #include "kernels/elementwise_functions/fabs.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp index 0994afc7c738..9d42630fd00c 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp @@ -32,7 +32,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "float_power.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp index 5e1a9f33444b..70e8a434e7ac 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp @@ -30,7 +30,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "fmax.hpp" #include "kernels/elementwise_functions/fmax.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp index c0e1db654317..d9c94109fdd0 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp @@ -30,7 +30,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "fmin.hpp" #include "kernels/elementwise_functions/fmin.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp index 5b83595b3f7c..9db1f7873f5b 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp @@ -32,7 +32,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "fmod.hpp" #include "kernels/elementwise_functions/fmod.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp index 4439f1e76993..0c4d1b1b9252 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp @@ -33,7 +33,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "frexp.hpp" #include "kernels/elementwise_functions/frexp.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp index ec10504fa15e..1a570488cc1f 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp @@ -32,7 +32,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "gcd.hpp" #include "kernels/elementwise_functions/gcd.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp index e3212de86f7f..69db72c7142b 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp @@ -31,7 +31,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "heaviside.hpp" #include "kernels/elementwise_functions/heaviside.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp index 4d120a56e837..82c1c7cb27ad 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp @@ -31,7 +31,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "i0.hpp" #include "kernels/elementwise_functions/i0.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index 33c7ab19b9ab..f8ce8f007369 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -37,7 +37,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index b8179feb9263..34577aa7ba68 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -34,7 +34,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp index 4276ceb6b246..c2d2e801fed8 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp @@ -32,7 +32,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "kernels/elementwise_functions/lcm.hpp" #include "lcm.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp index 3e2c4f3d0149..5e413b30735d 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp @@ -32,7 +32,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "kernels/elementwise_functions/ldexp.hpp" #include "ldexp.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp index 57c7c60ca9cf..4f215c8b98a1 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp @@ -30,7 +30,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "kernels/elementwise_functions/logaddexp2.hpp" #include "logaddexp2.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp index f8aab23d5630..7885e26217f0 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp @@ -33,7 +33,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "kernels/elementwise_functions/modf.hpp" #include "modf.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp index 2490f1921a98..b430dc51f974 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp @@ -38,7 +38,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include #include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp index 7fc8ae5331dd..96c1fc2f601a 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp @@ -31,7 +31,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "kernels/elementwise_functions/radians.hpp" #include "populate.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp index abd02e1e6282..afba8db01bb2 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp @@ -32,7 +32,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "kernels/elementwise_functions/sinc.hpp" #include "populate.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp index 6e401c5388dd..ca4e9b8661b2 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp @@ -31,7 +31,7 @@ #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "kernels/elementwise_functions/spacing.hpp" #include "populate.hpp" From db83aeed4d32575f80333400ebc9ed8c1724202c Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 23 Jan 2026 05:20:37 -0800 Subject: [PATCH 11/18] Update vm extension to work with dpnp4pybind11.hpp --- dpnp/backend/extensions/vm/CMakeLists.txt | 14 ++++---------- dpnp/backend/extensions/vm/abs.cpp | 2 +- dpnp/backend/extensions/vm/acos.cpp | 2 +- dpnp/backend/extensions/vm/acosh.cpp | 2 +- dpnp/backend/extensions/vm/add.cpp | 2 +- dpnp/backend/extensions/vm/arg.cpp | 2 +- dpnp/backend/extensions/vm/asin.cpp | 2 +- dpnp/backend/extensions/vm/asinh.cpp | 2 +- dpnp/backend/extensions/vm/atan.cpp | 2 +- dpnp/backend/extensions/vm/atan2.cpp | 2 +- dpnp/backend/extensions/vm/atanh.cpp | 2 +- dpnp/backend/extensions/vm/cbrt.cpp | 2 +- dpnp/backend/extensions/vm/ceil.cpp | 2 +- dpnp/backend/extensions/vm/common.hpp | 4 ++-- dpnp/backend/extensions/vm/conj.cpp | 2 +- dpnp/backend/extensions/vm/copysign.cpp | 2 +- dpnp/backend/extensions/vm/cos.cpp | 2 +- dpnp/backend/extensions/vm/cosh.cpp | 2 +- dpnp/backend/extensions/vm/div.cpp | 2 +- dpnp/backend/extensions/vm/erf_funcs.cpp | 2 +- dpnp/backend/extensions/vm/exp.cpp | 2 +- dpnp/backend/extensions/vm/exp2.cpp | 2 +- dpnp/backend/extensions/vm/expm1.cpp | 2 +- dpnp/backend/extensions/vm/floor.cpp | 2 +- dpnp/backend/extensions/vm/fmax.cpp | 2 +- dpnp/backend/extensions/vm/fmin.cpp | 2 +- dpnp/backend/extensions/vm/fmod.cpp | 2 +- dpnp/backend/extensions/vm/hypot.cpp | 2 +- dpnp/backend/extensions/vm/i0.cpp | 2 +- dpnp/backend/extensions/vm/inv.cpp | 2 +- dpnp/backend/extensions/vm/ln.cpp | 2 +- dpnp/backend/extensions/vm/log10.cpp | 2 +- dpnp/backend/extensions/vm/log1p.cpp | 2 +- dpnp/backend/extensions/vm/log2.cpp | 2 +- dpnp/backend/extensions/vm/modf.cpp | 2 +- dpnp/backend/extensions/vm/mul.cpp | 2 +- dpnp/backend/extensions/vm/nextafter.cpp | 2 +- dpnp/backend/extensions/vm/pow.cpp | 2 +- dpnp/backend/extensions/vm/rint.cpp | 2 +- dpnp/backend/extensions/vm/sin.cpp | 2 +- dpnp/backend/extensions/vm/sinh.cpp | 2 +- dpnp/backend/extensions/vm/sqr.cpp | 2 +- dpnp/backend/extensions/vm/sqrt.cpp | 2 +- dpnp/backend/extensions/vm/sub.cpp | 2 +- dpnp/backend/extensions/vm/tan.cpp | 2 +- dpnp/backend/extensions/vm/tanh.cpp | 2 +- dpnp/backend/extensions/vm/trunc.cpp | 2 +- 47 files changed, 51 insertions(+), 57 deletions(-) diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index b7181616f546..32d6a6765a00 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -107,17 +107,11 @@ set_target_properties( target_include_directories( ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${CMAKE_SOURCE_DIR}/dpnp/backend/include + ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include ) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common -) -target_include_directories( - ${python_module_name} - PRIVATE ${CMAKE_SOURCE_DIR}/dpctl/tensor/libtensor/include -) - target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) if(WIN32) diff --git a/dpnp/backend/extensions/vm/abs.cpp b/dpnp/backend/extensions/vm/abs.cpp index 133f3077ac43..a2432f5bedc6 100644 --- a/dpnp/backend/extensions/vm/abs.cpp +++ b/dpnp/backend/extensions/vm/abs.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "abs.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/acos.cpp b/dpnp/backend/extensions/vm/acos.cpp index 0cb9bb32f4b8..01e4d5ab35f9 100644 --- a/dpnp/backend/extensions/vm/acos.cpp +++ b/dpnp/backend/extensions/vm/acos.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "acos.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/acosh.cpp b/dpnp/backend/extensions/vm/acosh.cpp index fa25ecf5cc1e..b1136163d684 100644 --- a/dpnp/backend/extensions/vm/acosh.cpp +++ b/dpnp/backend/extensions/vm/acosh.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "acosh.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/add.cpp b/dpnp/backend/extensions/vm/add.cpp index 165671c93415..572eadb83cec 100644 --- a/dpnp/backend/extensions/vm/add.cpp +++ b/dpnp/backend/extensions/vm/add.cpp @@ -36,7 +36,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "add.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/arg.cpp b/dpnp/backend/extensions/vm/arg.cpp index e062f1f2ee06..40a15082f0ec 100644 --- a/dpnp/backend/extensions/vm/arg.cpp +++ b/dpnp/backend/extensions/vm/arg.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "arg.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/asin.cpp b/dpnp/backend/extensions/vm/asin.cpp index 8a2e1c079ed8..8cf73f3fe572 100644 --- a/dpnp/backend/extensions/vm/asin.cpp +++ b/dpnp/backend/extensions/vm/asin.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "asin.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/asinh.cpp b/dpnp/backend/extensions/vm/asinh.cpp index 176bacdb92a8..a3404d2c5415 100644 --- a/dpnp/backend/extensions/vm/asinh.cpp +++ b/dpnp/backend/extensions/vm/asinh.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "asinh.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/atan.cpp b/dpnp/backend/extensions/vm/atan.cpp index 21c8c8f1c9d5..a89cb8f9a308 100644 --- a/dpnp/backend/extensions/vm/atan.cpp +++ b/dpnp/backend/extensions/vm/atan.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "atan.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/atan2.cpp b/dpnp/backend/extensions/vm/atan2.cpp index 1d4e5c333e68..bcdf1daae1b3 100644 --- a/dpnp/backend/extensions/vm/atan2.cpp +++ b/dpnp/backend/extensions/vm/atan2.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "atan2.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/atanh.cpp b/dpnp/backend/extensions/vm/atanh.cpp index 7097fabf602f..d4ef24663d02 100644 --- a/dpnp/backend/extensions/vm/atanh.cpp +++ b/dpnp/backend/extensions/vm/atanh.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "atanh.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/cbrt.cpp b/dpnp/backend/extensions/vm/cbrt.cpp index db3cdfcebd8d..47584c8f6811 100644 --- a/dpnp/backend/extensions/vm/cbrt.cpp +++ b/dpnp/backend/extensions/vm/cbrt.cpp @@ -34,7 +34,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "cbrt.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/ceil.cpp b/dpnp/backend/extensions/vm/ceil.cpp index 6f5aeba16f99..d170b66d7d2c 100644 --- a/dpnp/backend/extensions/vm/ceil.cpp +++ b/dpnp/backend/extensions/vm/ceil.cpp @@ -34,7 +34,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "ceil.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/common.hpp b/dpnp/backend/extensions/vm/common.hpp index 6ee73504ce96..81e113771def 100644 --- a/dpnp/backend/extensions/vm/common.hpp +++ b/dpnp/backend/extensions/vm/common.hpp @@ -34,10 +34,10 @@ #include #include +#include #include -#include -#include +#include "dpnp4pybind11.hpp" // utils extension header #include "ext/common.hpp" diff --git a/dpnp/backend/extensions/vm/conj.cpp b/dpnp/backend/extensions/vm/conj.cpp index 36710104750a..ee000d5ee40d 100644 --- a/dpnp/backend/extensions/vm/conj.cpp +++ b/dpnp/backend/extensions/vm/conj.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "conj.hpp" diff --git a/dpnp/backend/extensions/vm/copysign.cpp b/dpnp/backend/extensions/vm/copysign.cpp index cd90abf65a06..8b6714865204 100644 --- a/dpnp/backend/extensions/vm/copysign.cpp +++ b/dpnp/backend/extensions/vm/copysign.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "copysign.hpp" diff --git a/dpnp/backend/extensions/vm/cos.cpp b/dpnp/backend/extensions/vm/cos.cpp index 76db72594763..62ecff7ea6a8 100644 --- a/dpnp/backend/extensions/vm/cos.cpp +++ b/dpnp/backend/extensions/vm/cos.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "cos.hpp" diff --git a/dpnp/backend/extensions/vm/cosh.cpp b/dpnp/backend/extensions/vm/cosh.cpp index 464410b1accc..ec81142eb6ce 100644 --- a/dpnp/backend/extensions/vm/cosh.cpp +++ b/dpnp/backend/extensions/vm/cosh.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "cosh.hpp" diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp index ad96f9acf083..6b8c1f781955 100644 --- a/dpnp/backend/extensions/vm/div.cpp +++ b/dpnp/backend/extensions/vm/div.cpp @@ -36,7 +36,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "div.hpp" diff --git a/dpnp/backend/extensions/vm/erf_funcs.cpp b/dpnp/backend/extensions/vm/erf_funcs.cpp index 4e84403eb061..2d4be369dc13 100644 --- a/dpnp/backend/extensions/vm/erf_funcs.cpp +++ b/dpnp/backend/extensions/vm/erf_funcs.cpp @@ -34,7 +34,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "erf_funcs.hpp" diff --git a/dpnp/backend/extensions/vm/exp.cpp b/dpnp/backend/extensions/vm/exp.cpp index acd265d191f7..de5f34c404a8 100644 --- a/dpnp/backend/extensions/vm/exp.cpp +++ b/dpnp/backend/extensions/vm/exp.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "exp.hpp" diff --git a/dpnp/backend/extensions/vm/exp2.cpp b/dpnp/backend/extensions/vm/exp2.cpp index 82c6c32fb6c5..1f1aa6ab90a8 100644 --- a/dpnp/backend/extensions/vm/exp2.cpp +++ b/dpnp/backend/extensions/vm/exp2.cpp @@ -34,7 +34,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "exp2.hpp" diff --git a/dpnp/backend/extensions/vm/expm1.cpp b/dpnp/backend/extensions/vm/expm1.cpp index 93cef7b3272d..5f803622b1a3 100644 --- a/dpnp/backend/extensions/vm/expm1.cpp +++ b/dpnp/backend/extensions/vm/expm1.cpp @@ -34,7 +34,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "expm1.hpp" diff --git a/dpnp/backend/extensions/vm/floor.cpp b/dpnp/backend/extensions/vm/floor.cpp index fb1a86eda7bf..a12bdb18c719 100644 --- a/dpnp/backend/extensions/vm/floor.cpp +++ b/dpnp/backend/extensions/vm/floor.cpp @@ -34,7 +34,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "floor.hpp" diff --git a/dpnp/backend/extensions/vm/fmax.cpp b/dpnp/backend/extensions/vm/fmax.cpp index 32786a3e8fc2..db4ca265ec42 100644 --- a/dpnp/backend/extensions/vm/fmax.cpp +++ b/dpnp/backend/extensions/vm/fmax.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "fmax.hpp" diff --git a/dpnp/backend/extensions/vm/fmin.cpp b/dpnp/backend/extensions/vm/fmin.cpp index d923b8c7ddfb..ca933a9f1869 100644 --- a/dpnp/backend/extensions/vm/fmin.cpp +++ b/dpnp/backend/extensions/vm/fmin.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "fmin.hpp" diff --git a/dpnp/backend/extensions/vm/fmod.cpp b/dpnp/backend/extensions/vm/fmod.cpp index 6c8a4ac705e4..83337dc1f7fd 100644 --- a/dpnp/backend/extensions/vm/fmod.cpp +++ b/dpnp/backend/extensions/vm/fmod.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "fmod.hpp" diff --git a/dpnp/backend/extensions/vm/hypot.cpp b/dpnp/backend/extensions/vm/hypot.cpp index 92b7c78f8ad6..bf01b8fb42b6 100644 --- a/dpnp/backend/extensions/vm/hypot.cpp +++ b/dpnp/backend/extensions/vm/hypot.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "hypot.hpp" diff --git a/dpnp/backend/extensions/vm/i0.cpp b/dpnp/backend/extensions/vm/i0.cpp index 5db3ef9d9669..afdf34e8cabc 100644 --- a/dpnp/backend/extensions/vm/i0.cpp +++ b/dpnp/backend/extensions/vm/i0.cpp @@ -34,7 +34,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "i0.hpp" diff --git a/dpnp/backend/extensions/vm/inv.cpp b/dpnp/backend/extensions/vm/inv.cpp index 1adeb1be23d0..6be886c0b0f2 100644 --- a/dpnp/backend/extensions/vm/inv.cpp +++ b/dpnp/backend/extensions/vm/inv.cpp @@ -34,7 +34,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "inv.hpp" diff --git a/dpnp/backend/extensions/vm/ln.cpp b/dpnp/backend/extensions/vm/ln.cpp index e60a0545005b..c6bfb930524d 100644 --- a/dpnp/backend/extensions/vm/ln.cpp +++ b/dpnp/backend/extensions/vm/ln.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "ln.hpp" diff --git a/dpnp/backend/extensions/vm/log10.cpp b/dpnp/backend/extensions/vm/log10.cpp index d26ec57ab9ce..7e6e611d01c8 100644 --- a/dpnp/backend/extensions/vm/log10.cpp +++ b/dpnp/backend/extensions/vm/log10.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "log10.hpp" diff --git a/dpnp/backend/extensions/vm/log1p.cpp b/dpnp/backend/extensions/vm/log1p.cpp index 861804f8f6e0..579546f6b3f7 100644 --- a/dpnp/backend/extensions/vm/log1p.cpp +++ b/dpnp/backend/extensions/vm/log1p.cpp @@ -34,7 +34,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "log1p.hpp" diff --git a/dpnp/backend/extensions/vm/log2.cpp b/dpnp/backend/extensions/vm/log2.cpp index e75e96c32fe9..7c3ecb0731d7 100644 --- a/dpnp/backend/extensions/vm/log2.cpp +++ b/dpnp/backend/extensions/vm/log2.cpp @@ -34,7 +34,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "log2.hpp" diff --git a/dpnp/backend/extensions/vm/modf.cpp b/dpnp/backend/extensions/vm/modf.cpp index ef68c79d8b42..283cfadb9b78 100644 --- a/dpnp/backend/extensions/vm/modf.cpp +++ b/dpnp/backend/extensions/vm/modf.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "modf.hpp" diff --git a/dpnp/backend/extensions/vm/mul.cpp b/dpnp/backend/extensions/vm/mul.cpp index 0c9cf7fb79cc..a689e88ae0e1 100644 --- a/dpnp/backend/extensions/vm/mul.cpp +++ b/dpnp/backend/extensions/vm/mul.cpp @@ -36,7 +36,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "mul.hpp" diff --git a/dpnp/backend/extensions/vm/nextafter.cpp b/dpnp/backend/extensions/vm/nextafter.cpp index 59b205b3d62a..03b19529fc72 100644 --- a/dpnp/backend/extensions/vm/nextafter.cpp +++ b/dpnp/backend/extensions/vm/nextafter.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "nextafter.hpp" diff --git a/dpnp/backend/extensions/vm/pow.cpp b/dpnp/backend/extensions/vm/pow.cpp index 5969a4862730..1d8e8fe8afca 100644 --- a/dpnp/backend/extensions/vm/pow.cpp +++ b/dpnp/backend/extensions/vm/pow.cpp @@ -36,7 +36,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "pow.hpp" diff --git a/dpnp/backend/extensions/vm/rint.cpp b/dpnp/backend/extensions/vm/rint.cpp index 41cd20a944a0..f3d37b92a59a 100644 --- a/dpnp/backend/extensions/vm/rint.cpp +++ b/dpnp/backend/extensions/vm/rint.cpp @@ -34,7 +34,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "rint.hpp" diff --git a/dpnp/backend/extensions/vm/sin.cpp b/dpnp/backend/extensions/vm/sin.cpp index 9263c3c4ffcf..39258ceb60b9 100644 --- a/dpnp/backend/extensions/vm/sin.cpp +++ b/dpnp/backend/extensions/vm/sin.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "sin.hpp" diff --git a/dpnp/backend/extensions/vm/sinh.cpp b/dpnp/backend/extensions/vm/sinh.cpp index a1bae13a5281..5aa5a31a8f84 100644 --- a/dpnp/backend/extensions/vm/sinh.cpp +++ b/dpnp/backend/extensions/vm/sinh.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "sinh.hpp" diff --git a/dpnp/backend/extensions/vm/sqr.cpp b/dpnp/backend/extensions/vm/sqr.cpp index 88c2e833b483..bf008a68a68f 100644 --- a/dpnp/backend/extensions/vm/sqr.cpp +++ b/dpnp/backend/extensions/vm/sqr.cpp @@ -34,7 +34,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "sqr.hpp" diff --git a/dpnp/backend/extensions/vm/sqrt.cpp b/dpnp/backend/extensions/vm/sqrt.cpp index 98cf2eea9253..8bd26c0fe1a9 100644 --- a/dpnp/backend/extensions/vm/sqrt.cpp +++ b/dpnp/backend/extensions/vm/sqrt.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "sqrt.hpp" diff --git a/dpnp/backend/extensions/vm/sub.cpp b/dpnp/backend/extensions/vm/sub.cpp index 5ee01f239c06..b0503754194f 100644 --- a/dpnp/backend/extensions/vm/sub.cpp +++ b/dpnp/backend/extensions/vm/sub.cpp @@ -36,7 +36,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "sub.hpp" diff --git a/dpnp/backend/extensions/vm/tan.cpp b/dpnp/backend/extensions/vm/tan.cpp index 46555ebd0178..9fe4cb64d41c 100644 --- a/dpnp/backend/extensions/vm/tan.cpp +++ b/dpnp/backend/extensions/vm/tan.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "tan.hpp" diff --git a/dpnp/backend/extensions/vm/tanh.cpp b/dpnp/backend/extensions/vm/tanh.cpp index 04d2febfac1d..70f4ef6142d5 100644 --- a/dpnp/backend/extensions/vm/tanh.cpp +++ b/dpnp/backend/extensions/vm/tanh.cpp @@ -35,7 +35,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "tanh.hpp" diff --git a/dpnp/backend/extensions/vm/trunc.cpp b/dpnp/backend/extensions/vm/trunc.cpp index c23a9a8180fb..c6cc4f9e8265 100644 --- a/dpnp/backend/extensions/vm/trunc.cpp +++ b/dpnp/backend/extensions/vm/trunc.cpp @@ -34,7 +34,7 @@ #include #include -#include "dpctl4pybind11.hpp" +#include "dpnp4pybind11.hpp" #include "common.hpp" #include "trunc.hpp" From 00f6c70003c671c7ea3aa52cfcd91723346e796b Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 23 Jan 2026 05:24:13 -0800 Subject: [PATCH 12/18] remove conditional include for dpnp4pybind11.hpp --- dpctl/tensor/libtensor/include/utils/memory_overlap.hpp | 4 ---- dpctl/tensor/libtensor/include/utils/output_validation.hpp | 4 ---- dpctl/tensor/libtensor/include/utils/type_dispatch.hpp | 4 ---- dpnp/backend/extensions/common/ext/validation_utils.hpp | 4 ---- .../elementwise_functions/elementwise_functions.hpp | 4 ---- .../elementwise_functions_type_utils.cpp | 4 ---- .../elementwise_functions_type_utils.hpp | 4 ---- 7 files changed, 28 deletions(-) diff --git a/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp b/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp index db9dfc30eb46..b534e55b3192 100644 --- a/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp +++ b/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp @@ -38,11 +38,7 @@ #include -#if __has_include() #include "dpnp4pybind11.hpp" -#else -#include "dpctl4pybind11.hpp" -#endif /* @brief check for overlap of memory regions behind arrays. diff --git a/dpctl/tensor/libtensor/include/utils/output_validation.hpp b/dpctl/tensor/libtensor/include/utils/output_validation.hpp index 7a70f395dfe1..26f1b29bd3d8 100644 --- a/dpctl/tensor/libtensor/include/utils/output_validation.hpp +++ b/dpctl/tensor/libtensor/include/utils/output_validation.hpp @@ -37,11 +37,7 @@ #include -#if __has_include() #include "dpnp4pybind11.hpp" -#else -#include "dpctl4pybind11.hpp" -#endif namespace dpctl::tensor::validation { diff --git a/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp b/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp index 38b5b43ca696..242c2cf8724a 100644 --- a/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp +++ b/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp @@ -36,11 +36,7 @@ #include #include -#if __has_include() #include "dpnp4pybind11.hpp" -#else -#include "dpctl4pybind11.hpp" -#endif #include "type_dispatch_building.hpp" diff --git a/dpnp/backend/extensions/common/ext/validation_utils.hpp b/dpnp/backend/extensions/common/ext/validation_utils.hpp index 0bb32c9f876a..03e0718d4450 100644 --- a/dpnp/backend/extensions/common/ext/validation_utils.hpp +++ b/dpnp/backend/extensions/common/ext/validation_utils.hpp @@ -32,11 +32,7 @@ #include #include -#if __has_include() #include "dpnp4pybind11.hpp" -#else -#include "dpctl4pybind11.hpp" -#endif // dpctl tensor headers #include "utils/type_dispatch.hpp" diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp index 9e8d98d875a3..b1634eafef5a 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp @@ -37,11 +37,7 @@ #include #include -#if __has_include() #include "dpnp4pybind11.hpp" -#else -#include "dpctl4pybind11.hpp" -#endif #include "elementwise_functions_type_utils.hpp" #include "simplify_iteration_space.hpp" diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp index c126428f0558..6798cb4f3154 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp @@ -29,11 +29,7 @@ #include #include -#if __has_include() #include "dpnp4pybind11.hpp" -#else -#include "dpctl4pybind11.hpp" -#endif #include "elementwise_functions_type_utils.hpp" diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp index 129a89a49dbe..58fe43c01589 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp @@ -31,11 +31,7 @@ #include #include -#if __has_include() #include "dpnp4pybind11.hpp" -#else -#include "dpctl4pybind11.hpp" -#endif // dpctl tensor headers #include "utils/type_dispatch.hpp" From 6944e0ace8fc5f18c5b4740f76d9b6d567a56f60 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 23 Jan 2026 08:22:15 -0800 Subject: [PATCH 13/18] Add missing pybind11 include --- dpnp/backend/extensions/indexing/choose.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dpnp/backend/extensions/indexing/choose.cpp b/dpnp/backend/extensions/indexing/choose.cpp index 05d1bfe15385..7b5284418b00 100644 --- a/dpnp/backend/extensions/indexing/choose.cpp +++ b/dpnp/backend/extensions/indexing/choose.cpp @@ -37,6 +37,7 @@ #include #include +#include #include "dpnp4pybind11.hpp" From 135590e76c3afea27d7e985cb4d7982f933d0ace Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Sat, 24 Jan 2026 04:17:34 -0800 Subject: [PATCH 14/18] py::dtype requies to include --- .../extensions/common/ext/details/common_internal.hpp | 4 +++- .../elementwise_functions/elementwise_functions.hpp | 2 ++ .../elementwise_functions_type_utils.cpp | 2 ++ .../extensions/ufunc/elementwise_functions/bitwise_count.cpp | 3 +++ .../extensions/ufunc/elementwise_functions/degrees.cpp | 3 +++ .../extensions/ufunc/elementwise_functions/divmod.cpp | 3 +++ .../extensions/ufunc/elementwise_functions/erf_funcs.cpp | 3 +++ dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp | 3 +++ .../extensions/ufunc/elementwise_functions/float_power.cpp | 3 +++ dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp | 3 +++ dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp | 3 +++ dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp | 3 +++ .../backend/extensions/ufunc/elementwise_functions/frexp.cpp | 3 +++ dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp | 3 +++ .../extensions/ufunc/elementwise_functions/heaviside.cpp | 3 +++ dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp | 3 +++ .../extensions/ufunc/elementwise_functions/interpolate.cpp | 5 +++-- .../extensions/ufunc/elementwise_functions/isclose.cpp | 5 +++-- dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp | 3 +++ .../backend/extensions/ufunc/elementwise_functions/ldexp.cpp | 3 +++ .../extensions/ufunc/elementwise_functions/logaddexp2.cpp | 3 +++ dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp | 3 +++ .../extensions/ufunc/elementwise_functions/radians.cpp | 3 +++ dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp | 3 +++ .../extensions/ufunc/elementwise_functions/spacing.cpp | 3 +++ 25 files changed, 73 insertions(+), 5 deletions(-) diff --git a/dpnp/backend/extensions/common/ext/details/common_internal.hpp b/dpnp/backend/extensions/common/ext/details/common_internal.hpp index 31d9671a0a43..8db72ce32318 100644 --- a/dpnp/backend/extensions/common/ext/details/common_internal.hpp +++ b/dpnp/backend/extensions/common/ext/details/common_internal.hpp @@ -30,9 +30,11 @@ #include +#include +#include + #include "ext/common.hpp" #include "utils/type_dispatch.hpp" -#include namespace dpctl_td_ns = dpctl::tensor::type_dispatch; diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp index b1634eafef5a..e23f74a678dc 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp @@ -34,7 +34,9 @@ #include #include +#include #include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp index 6798cb4f3154..7300f938eabb 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp @@ -26,7 +26,9 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** +#include #include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp index 14e8b7b5ed35..9fe5d5d43c7c 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp @@ -30,6 +30,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp index 511ea759ae35..9b15f8b29895 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp @@ -29,6 +29,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/divmod.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/divmod.cpp index 93d04ed7940e..599a5eca1518 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/divmod.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/divmod.cpp @@ -30,6 +30,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp index e209f72a83b2..c739cd5f119d 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp @@ -29,6 +29,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp index d673533e599b..640d8629edbd 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp @@ -29,6 +29,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp index 9d42630fd00c..0f065e6dad0d 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp @@ -30,6 +30,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp index 70e8a434e7ac..3882f24611a5 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp @@ -28,6 +28,9 @@ #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp index d9c94109fdd0..1fd8798572e5 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp @@ -28,6 +28,9 @@ #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp index 9db1f7873f5b..1dca65e622cf 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp @@ -30,6 +30,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp index 0c4d1b1b9252..b1367bd82540 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp @@ -31,6 +31,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp index 1a570488cc1f..d21ef703fecf 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp @@ -30,6 +30,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp index 69db72c7142b..29b8a475a8a9 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp @@ -29,6 +29,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp index 82c1c7cb27ad..9e7aaba5c90c 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp @@ -29,6 +29,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index f8ce8f007369..d22c51c29fde 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -35,11 +35,12 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" -#include -#include // dpctl tensor headers #include "utils/type_dispatch.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index 34577aa7ba68..37949016c905 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -32,11 +32,12 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" -#include -#include #include "kernels/elementwise_functions/isclose.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp index c2d2e801fed8..3d5d34aae4ab 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp @@ -30,6 +30,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp index 5e413b30735d..15ceb91fbd78 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp @@ -30,6 +30,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp index 4f215c8b98a1..a63b3e3431f0 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp @@ -28,6 +28,9 @@ #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp index 7885e26217f0..784c83b66cd5 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp @@ -31,6 +31,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp index 96c1fc2f601a..ea21ad42e140 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp @@ -29,6 +29,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp index afba8db01bb2..eebeb82b7124 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp @@ -30,6 +30,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp index ca4e9b8661b2..ec81be1bbe03 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp @@ -29,6 +29,9 @@ #include #include +#include +#include + #include #include "dpnp4pybind11.hpp" From 2884939ce327db01d152fb8fc92ed2a2069b6302 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Sat, 24 Jan 2026 06:15:26 -0800 Subject: [PATCH 15/18] Include is required to provide conversion from std::vector to python list --- dpnp/backend/extensions/lapack/heevd.cpp | 1 + dpnp/backend/extensions/lapack/heevd_batch.cpp | 1 + dpnp/backend/extensions/lapack/syevd.cpp | 1 + dpnp/backend/extensions/lapack/syevd_batch.cpp | 1 + dpnp/backend/extensions/statistics/histogram.cpp | 1 + .../extensions/ufunc/elementwise_functions/bitwise_count.cpp | 1 + .../backend/extensions/ufunc/elementwise_functions/degrees.cpp | 1 + dpnp/backend/extensions/ufunc/elementwise_functions/divmod.cpp | 1 + .../extensions/ufunc/elementwise_functions/erf_funcs.cpp | 1 + dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp | 1 + .../extensions/ufunc/elementwise_functions/float_power.cpp | 1 + dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp | 1 + dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp | 1 + dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp | 1 + dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp | 1 + dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp | 1 + .../extensions/ufunc/elementwise_functions/heaviside.cpp | 1 + dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp | 1 + .../extensions/ufunc/elementwise_functions/interpolate.cpp | 1 + .../backend/extensions/ufunc/elementwise_functions/isclose.cpp | 1 + dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp | 1 + dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp | 1 + .../extensions/ufunc/elementwise_functions/logaddexp2.cpp | 1 + dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp | 1 + .../extensions/ufunc/elementwise_functions/nan_to_num.cpp | 3 ++- .../backend/extensions/ufunc/elementwise_functions/radians.cpp | 1 + dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp | 1 + .../backend/extensions/ufunc/elementwise_functions/spacing.cpp | 1 + dpnp/backend/extensions/vm/abs.cpp | 3 +++ dpnp/backend/extensions/vm/acos.cpp | 3 +++ dpnp/backend/extensions/vm/acosh.cpp | 3 +++ dpnp/backend/extensions/vm/add.cpp | 3 +++ dpnp/backend/extensions/vm/arg.cpp | 3 +++ dpnp/backend/extensions/vm/asin.cpp | 3 +++ dpnp/backend/extensions/vm/asinh.cpp | 3 +++ dpnp/backend/extensions/vm/atan.cpp | 3 +++ dpnp/backend/extensions/vm/atan2.cpp | 3 +++ dpnp/backend/extensions/vm/atanh.cpp | 3 +++ dpnp/backend/extensions/vm/cbrt.cpp | 3 +++ dpnp/backend/extensions/vm/ceil.cpp | 3 +++ dpnp/backend/extensions/vm/conj.cpp | 3 +++ dpnp/backend/extensions/vm/copysign.cpp | 3 +++ dpnp/backend/extensions/vm/cos.cpp | 3 +++ dpnp/backend/extensions/vm/cosh.cpp | 3 +++ dpnp/backend/extensions/vm/div.cpp | 3 +++ dpnp/backend/extensions/vm/erf_funcs.cpp | 3 +++ dpnp/backend/extensions/vm/exp.cpp | 3 +++ dpnp/backend/extensions/vm/exp2.cpp | 3 +++ dpnp/backend/extensions/vm/expm1.cpp | 3 +++ dpnp/backend/extensions/vm/floor.cpp | 3 +++ dpnp/backend/extensions/vm/fmax.cpp | 3 +++ dpnp/backend/extensions/vm/fmin.cpp | 3 +++ dpnp/backend/extensions/vm/fmod.cpp | 3 +++ dpnp/backend/extensions/vm/hypot.cpp | 3 +++ dpnp/backend/extensions/vm/i0.cpp | 3 +++ dpnp/backend/extensions/vm/inv.cpp | 3 +++ dpnp/backend/extensions/vm/ln.cpp | 3 +++ dpnp/backend/extensions/vm/log10.cpp | 3 +++ dpnp/backend/extensions/vm/log1p.cpp | 3 +++ dpnp/backend/extensions/vm/log2.cpp | 3 +++ dpnp/backend/extensions/vm/modf.cpp | 3 +++ dpnp/backend/extensions/vm/mul.cpp | 3 +++ dpnp/backend/extensions/vm/nextafter.cpp | 3 +++ dpnp/backend/extensions/vm/pow.cpp | 3 +++ dpnp/backend/extensions/vm/rint.cpp | 3 +++ dpnp/backend/extensions/vm/sin.cpp | 3 +++ dpnp/backend/extensions/vm/sinh.cpp | 3 +++ dpnp/backend/extensions/vm/sqr.cpp | 3 +++ dpnp/backend/extensions/vm/sqrt.cpp | 3 +++ dpnp/backend/extensions/vm/sub.cpp | 3 +++ dpnp/backend/extensions/vm/tan.cpp | 3 +++ dpnp/backend/extensions/vm/tanh.cpp | 3 +++ dpnp/backend/extensions/vm/trunc.cpp | 3 +++ 73 files changed, 164 insertions(+), 1 deletion(-) diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp index 5990e5344a17..923e950b1383 100644 --- a/dpnp/backend/extensions/lapack/heevd.cpp +++ b/dpnp/backend/extensions/lapack/heevd.cpp @@ -28,6 +28,7 @@ #include +#include #include #include "evd_common.hpp" diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp index e1c1a96bc320..9d7c3300dbf7 100644 --- a/dpnp/backend/extensions/lapack/heevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp @@ -28,6 +28,7 @@ #include +#include #include #include "common_helpers.hpp" diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp index af69cf9e6b7e..3c09ca4f587b 100644 --- a/dpnp/backend/extensions/lapack/syevd.cpp +++ b/dpnp/backend/extensions/lapack/syevd.cpp @@ -28,6 +28,7 @@ #include +#include #include #include "evd_common.hpp" diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp index 0c326e5d79bb..36d1c820f00d 100644 --- a/dpnp/backend/extensions/lapack/syevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp @@ -28,6 +28,7 @@ #include +#include #include #include "common_helpers.hpp" diff --git a/dpnp/backend/extensions/statistics/histogram.cpp b/dpnp/backend/extensions/statistics/histogram.cpp index 621aa36cfbd1..afc5d9638f48 100644 --- a/dpnp/backend/extensions/statistics/histogram.cpp +++ b/dpnp/backend/extensions/statistics/histogram.cpp @@ -33,6 +33,7 @@ #include #include +#include #include "dpnp4pybind11.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp index 9fe5d5d43c7c..761bd330a326 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp @@ -32,6 +32,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp index 9b15f8b29895..729fcb576c77 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp @@ -31,6 +31,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/divmod.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/divmod.cpp index 599a5eca1518..1bb3859a39f4 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/divmod.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/divmod.cpp @@ -32,6 +32,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp index c739cd5f119d..fff0118d06aa 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp @@ -31,6 +31,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp index 640d8629edbd..f7c2183633af 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp @@ -31,6 +31,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp index 0f065e6dad0d..43927eb93806 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp @@ -32,6 +32,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp index 3882f24611a5..9471feaf2166 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp @@ -30,6 +30,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp index 1fd8798572e5..8e279897f414 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp @@ -30,6 +30,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp index 1dca65e622cf..83fb750b6907 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp @@ -32,6 +32,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp index b1367bd82540..17e09f3ee816 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp @@ -33,6 +33,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp index d21ef703fecf..0481365356ca 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp @@ -32,6 +32,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp index 29b8a475a8a9..62affd206420 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp @@ -31,6 +31,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp index 9e7aaba5c90c..53ded341b58b 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp @@ -31,6 +31,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index d22c51c29fde..82e96ab732de 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -37,6 +37,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index 37949016c905..3025cbf16586 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -34,6 +34,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp index 3d5d34aae4ab..35138e903eac 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp @@ -32,6 +32,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp index 15ceb91fbd78..44ef51726a6a 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp @@ -32,6 +32,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp index a63b3e3431f0..e37f13b119d6 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp @@ -30,6 +30,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp index 784c83b66cd5..266103248521 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp @@ -33,6 +33,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp index b430dc51f974..c30d388f8afd 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp @@ -38,11 +38,12 @@ #include -#include "dpnp4pybind11.hpp" #include #include #include +#include "dpnp4pybind11.hpp" + #include "kernels/elementwise_functions/nan_to_num.hpp" #include "../../elementwise_functions/simplify_iteration_space.hpp" diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp index ea21ad42e140..0a481fd33d11 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp @@ -31,6 +31,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp index eebeb82b7124..87a911472db2 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp @@ -32,6 +32,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp index ec81be1bbe03..4c14582f30ae 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp @@ -31,6 +31,7 @@ #include #include +#include #include diff --git a/dpnp/backend/extensions/vm/abs.cpp b/dpnp/backend/extensions/vm/abs.cpp index a2432f5bedc6..1dc8143dd5ff 100644 --- a/dpnp/backend/extensions/vm/abs.cpp +++ b/dpnp/backend/extensions/vm/abs.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "abs.hpp" diff --git a/dpnp/backend/extensions/vm/acos.cpp b/dpnp/backend/extensions/vm/acos.cpp index 01e4d5ab35f9..15b4ce80cc3c 100644 --- a/dpnp/backend/extensions/vm/acos.cpp +++ b/dpnp/backend/extensions/vm/acos.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "acos.hpp" diff --git a/dpnp/backend/extensions/vm/acosh.cpp b/dpnp/backend/extensions/vm/acosh.cpp index b1136163d684..eed835b78e10 100644 --- a/dpnp/backend/extensions/vm/acosh.cpp +++ b/dpnp/backend/extensions/vm/acosh.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "acosh.hpp" diff --git a/dpnp/backend/extensions/vm/add.cpp b/dpnp/backend/extensions/vm/add.cpp index 572eadb83cec..a58aac727cd1 100644 --- a/dpnp/backend/extensions/vm/add.cpp +++ b/dpnp/backend/extensions/vm/add.cpp @@ -36,6 +36,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "add.hpp" diff --git a/dpnp/backend/extensions/vm/arg.cpp b/dpnp/backend/extensions/vm/arg.cpp index 40a15082f0ec..c50c4a33dee1 100644 --- a/dpnp/backend/extensions/vm/arg.cpp +++ b/dpnp/backend/extensions/vm/arg.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "arg.hpp" diff --git a/dpnp/backend/extensions/vm/asin.cpp b/dpnp/backend/extensions/vm/asin.cpp index 8cf73f3fe572..5af7033fed21 100644 --- a/dpnp/backend/extensions/vm/asin.cpp +++ b/dpnp/backend/extensions/vm/asin.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "asin.hpp" diff --git a/dpnp/backend/extensions/vm/asinh.cpp b/dpnp/backend/extensions/vm/asinh.cpp index a3404d2c5415..5b0f8ed13106 100644 --- a/dpnp/backend/extensions/vm/asinh.cpp +++ b/dpnp/backend/extensions/vm/asinh.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "asinh.hpp" diff --git a/dpnp/backend/extensions/vm/atan.cpp b/dpnp/backend/extensions/vm/atan.cpp index a89cb8f9a308..2255000c1c4b 100644 --- a/dpnp/backend/extensions/vm/atan.cpp +++ b/dpnp/backend/extensions/vm/atan.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "atan.hpp" diff --git a/dpnp/backend/extensions/vm/atan2.cpp b/dpnp/backend/extensions/vm/atan2.cpp index bcdf1daae1b3..bf29e2921a1d 100644 --- a/dpnp/backend/extensions/vm/atan2.cpp +++ b/dpnp/backend/extensions/vm/atan2.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "atan2.hpp" diff --git a/dpnp/backend/extensions/vm/atanh.cpp b/dpnp/backend/extensions/vm/atanh.cpp index d4ef24663d02..9daab09980e6 100644 --- a/dpnp/backend/extensions/vm/atanh.cpp +++ b/dpnp/backend/extensions/vm/atanh.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "atanh.hpp" diff --git a/dpnp/backend/extensions/vm/cbrt.cpp b/dpnp/backend/extensions/vm/cbrt.cpp index 47584c8f6811..34ff8dd913ac 100644 --- a/dpnp/backend/extensions/vm/cbrt.cpp +++ b/dpnp/backend/extensions/vm/cbrt.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "cbrt.hpp" diff --git a/dpnp/backend/extensions/vm/ceil.cpp b/dpnp/backend/extensions/vm/ceil.cpp index d170b66d7d2c..e76a30d28317 100644 --- a/dpnp/backend/extensions/vm/ceil.cpp +++ b/dpnp/backend/extensions/vm/ceil.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "ceil.hpp" diff --git a/dpnp/backend/extensions/vm/conj.cpp b/dpnp/backend/extensions/vm/conj.cpp index ee000d5ee40d..f77020cf1d55 100644 --- a/dpnp/backend/extensions/vm/conj.cpp +++ b/dpnp/backend/extensions/vm/conj.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/copysign.cpp b/dpnp/backend/extensions/vm/copysign.cpp index 8b6714865204..15c0fceec413 100644 --- a/dpnp/backend/extensions/vm/copysign.cpp +++ b/dpnp/backend/extensions/vm/copysign.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/cos.cpp b/dpnp/backend/extensions/vm/cos.cpp index 62ecff7ea6a8..7c9b0c35d6ca 100644 --- a/dpnp/backend/extensions/vm/cos.cpp +++ b/dpnp/backend/extensions/vm/cos.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/cosh.cpp b/dpnp/backend/extensions/vm/cosh.cpp index ec81142eb6ce..a95c7075ba61 100644 --- a/dpnp/backend/extensions/vm/cosh.cpp +++ b/dpnp/backend/extensions/vm/cosh.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp index 6b8c1f781955..6e0cb4d0439f 100644 --- a/dpnp/backend/extensions/vm/div.cpp +++ b/dpnp/backend/extensions/vm/div.cpp @@ -36,6 +36,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/erf_funcs.cpp b/dpnp/backend/extensions/vm/erf_funcs.cpp index 2d4be369dc13..7be7f691edcf 100644 --- a/dpnp/backend/extensions/vm/erf_funcs.cpp +++ b/dpnp/backend/extensions/vm/erf_funcs.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/exp.cpp b/dpnp/backend/extensions/vm/exp.cpp index de5f34c404a8..31f50f36171d 100644 --- a/dpnp/backend/extensions/vm/exp.cpp +++ b/dpnp/backend/extensions/vm/exp.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/exp2.cpp b/dpnp/backend/extensions/vm/exp2.cpp index 1f1aa6ab90a8..41f18351fa7d 100644 --- a/dpnp/backend/extensions/vm/exp2.cpp +++ b/dpnp/backend/extensions/vm/exp2.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/expm1.cpp b/dpnp/backend/extensions/vm/expm1.cpp index 5f803622b1a3..37440cab9b0c 100644 --- a/dpnp/backend/extensions/vm/expm1.cpp +++ b/dpnp/backend/extensions/vm/expm1.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/floor.cpp b/dpnp/backend/extensions/vm/floor.cpp index a12bdb18c719..771d141e7f6a 100644 --- a/dpnp/backend/extensions/vm/floor.cpp +++ b/dpnp/backend/extensions/vm/floor.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/fmax.cpp b/dpnp/backend/extensions/vm/fmax.cpp index db4ca265ec42..d01b3ef3dc42 100644 --- a/dpnp/backend/extensions/vm/fmax.cpp +++ b/dpnp/backend/extensions/vm/fmax.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/fmin.cpp b/dpnp/backend/extensions/vm/fmin.cpp index ca933a9f1869..6fbebba556f8 100644 --- a/dpnp/backend/extensions/vm/fmin.cpp +++ b/dpnp/backend/extensions/vm/fmin.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/fmod.cpp b/dpnp/backend/extensions/vm/fmod.cpp index 83337dc1f7fd..1330453d6f84 100644 --- a/dpnp/backend/extensions/vm/fmod.cpp +++ b/dpnp/backend/extensions/vm/fmod.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/hypot.cpp b/dpnp/backend/extensions/vm/hypot.cpp index bf01b8fb42b6..a9b3d3c12288 100644 --- a/dpnp/backend/extensions/vm/hypot.cpp +++ b/dpnp/backend/extensions/vm/hypot.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/i0.cpp b/dpnp/backend/extensions/vm/i0.cpp index afdf34e8cabc..50f692ebd958 100644 --- a/dpnp/backend/extensions/vm/i0.cpp +++ b/dpnp/backend/extensions/vm/i0.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/inv.cpp b/dpnp/backend/extensions/vm/inv.cpp index 6be886c0b0f2..eda08a6d0cd5 100644 --- a/dpnp/backend/extensions/vm/inv.cpp +++ b/dpnp/backend/extensions/vm/inv.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/ln.cpp b/dpnp/backend/extensions/vm/ln.cpp index c6bfb930524d..a5365e4d5a8b 100644 --- a/dpnp/backend/extensions/vm/ln.cpp +++ b/dpnp/backend/extensions/vm/ln.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/log10.cpp b/dpnp/backend/extensions/vm/log10.cpp index 7e6e611d01c8..c04fb602f63d 100644 --- a/dpnp/backend/extensions/vm/log10.cpp +++ b/dpnp/backend/extensions/vm/log10.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/log1p.cpp b/dpnp/backend/extensions/vm/log1p.cpp index 579546f6b3f7..04416bf37185 100644 --- a/dpnp/backend/extensions/vm/log1p.cpp +++ b/dpnp/backend/extensions/vm/log1p.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/log2.cpp b/dpnp/backend/extensions/vm/log2.cpp index 7c3ecb0731d7..752caa261977 100644 --- a/dpnp/backend/extensions/vm/log2.cpp +++ b/dpnp/backend/extensions/vm/log2.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/modf.cpp b/dpnp/backend/extensions/vm/modf.cpp index 283cfadb9b78..418e4e44f7f7 100644 --- a/dpnp/backend/extensions/vm/modf.cpp +++ b/dpnp/backend/extensions/vm/modf.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/mul.cpp b/dpnp/backend/extensions/vm/mul.cpp index a689e88ae0e1..557cfb8882b3 100644 --- a/dpnp/backend/extensions/vm/mul.cpp +++ b/dpnp/backend/extensions/vm/mul.cpp @@ -36,6 +36,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/nextafter.cpp b/dpnp/backend/extensions/vm/nextafter.cpp index 03b19529fc72..a8ff710bda77 100644 --- a/dpnp/backend/extensions/vm/nextafter.cpp +++ b/dpnp/backend/extensions/vm/nextafter.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/pow.cpp b/dpnp/backend/extensions/vm/pow.cpp index 1d8e8fe8afca..f0db87d1ef48 100644 --- a/dpnp/backend/extensions/vm/pow.cpp +++ b/dpnp/backend/extensions/vm/pow.cpp @@ -36,6 +36,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/rint.cpp b/dpnp/backend/extensions/vm/rint.cpp index f3d37b92a59a..86931f259a04 100644 --- a/dpnp/backend/extensions/vm/rint.cpp +++ b/dpnp/backend/extensions/vm/rint.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/sin.cpp b/dpnp/backend/extensions/vm/sin.cpp index 39258ceb60b9..7bb6ec321d2a 100644 --- a/dpnp/backend/extensions/vm/sin.cpp +++ b/dpnp/backend/extensions/vm/sin.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/sinh.cpp b/dpnp/backend/extensions/vm/sinh.cpp index 5aa5a31a8f84..5c351afd3b82 100644 --- a/dpnp/backend/extensions/vm/sinh.cpp +++ b/dpnp/backend/extensions/vm/sinh.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/sqr.cpp b/dpnp/backend/extensions/vm/sqr.cpp index bf008a68a68f..9d5cb8af5f2c 100644 --- a/dpnp/backend/extensions/vm/sqr.cpp +++ b/dpnp/backend/extensions/vm/sqr.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/sqrt.cpp b/dpnp/backend/extensions/vm/sqrt.cpp index 8bd26c0fe1a9..5ab3489c1288 100644 --- a/dpnp/backend/extensions/vm/sqrt.cpp +++ b/dpnp/backend/extensions/vm/sqrt.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/sub.cpp b/dpnp/backend/extensions/vm/sub.cpp index b0503754194f..401588d4b65f 100644 --- a/dpnp/backend/extensions/vm/sub.cpp +++ b/dpnp/backend/extensions/vm/sub.cpp @@ -36,6 +36,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/tan.cpp b/dpnp/backend/extensions/vm/tan.cpp index 9fe4cb64d41c..590320034934 100644 --- a/dpnp/backend/extensions/vm/tan.cpp +++ b/dpnp/backend/extensions/vm/tan.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/tanh.cpp b/dpnp/backend/extensions/vm/tanh.cpp index 70f4ef6142d5..8febd94f2ec8 100644 --- a/dpnp/backend/extensions/vm/tanh.cpp +++ b/dpnp/backend/extensions/vm/tanh.cpp @@ -35,6 +35,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" diff --git a/dpnp/backend/extensions/vm/trunc.cpp b/dpnp/backend/extensions/vm/trunc.cpp index c6cc4f9e8265..4ec788ccf949 100644 --- a/dpnp/backend/extensions/vm/trunc.cpp +++ b/dpnp/backend/extensions/vm/trunc.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #include "dpnp4pybind11.hpp" #include "common.hpp" From 617f6e1b8d84e968455b48cceea3c0ed15a014be Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 26 Jan 2026 05:29:21 -0800 Subject: [PATCH 16/18] Extend isort configuration to add known third party lib as dpctl --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 6394cf118dcf..e88c44053dc0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -134,8 +134,10 @@ source = [ ensure_newline_before_comments = true force_grid_wrap = 0 include_trailing_comma = true +known_third_party = ["dpctl"] line_length = 80 multi_line_output = 3 +profile = "black" skip = ["dpnp/__init__.py"] split_on_trailing_comma = true use_parentheses = true From 2d3f41d566518ee9f83983167a083ff111969e15 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 26 Jan 2026 05:44:20 -0800 Subject: [PATCH 17/18] =?UTF-8?q?Disable=20pylint=E2=80=99s=20import-order?= =?UTF-8?q?=20checks=20and=20fully=20rely=20on=20isort?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index e88c44053dc0..2f6fcebc0e4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -143,6 +143,11 @@ split_on_trailing_comma = true use_parentheses = true [tool.pylint.basic] +disable = [ + "wrong-import-order", + "ungrouped-imports", + "wrong-import-position" +] include-naming-hint = true [tool.pylint.classes] From e2eb3cc6c061a6147d32f2a69599ca23c2d5628c Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 26 Jan 2026 05:53:44 -0800 Subject: [PATCH 18/18] Update pylint configuration to skip checking dpctl toplevel package and all dpctl submodules --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 2f6fcebc0e4d..cdf592535d11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -148,6 +148,7 @@ disable = [ "ungrouped-imports", "wrong-import-position" ] +ignored-modules = ["dpctl", "dpctl.*"] include-naming-hint = true [tool.pylint.classes]