diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index a80a6669ffdd..926d10859af4 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -47,7 +47,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1 + uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0 with: access_token: ${{ github.token }} diff --git a/.github/workflows/check-onemath.yaml b/.github/workflows/check-onemath.yaml index c6b12a4454f3..80bcccc1b5ab 100644 --- a/.github/workflows/check-onemath.yaml +++ b/.github/workflows/check-onemath.yaml @@ -34,7 +34,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1 + uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0 with: access_token: ${{ github.token }} diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index d195b51ae80b..e56c36d58782 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -49,7 +49,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1 + uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0 with: access_token: ${{ github.token }} diff --git a/.github/workflows/cron-run-tests.yaml b/.github/workflows/cron-run-tests.yaml index ff3fe6780700..4db380c73775 100644 --- a/.github/workflows/cron-run-tests.yaml +++ b/.github/workflows/cron-run-tests.yaml @@ -43,7 +43,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1 + uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0 with: access_token: ${{ github.token }} diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml index 734f006906f2..8149b63150d7 100644 --- a/.github/workflows/generate_coverage.yaml +++ b/.github/workflows/generate_coverage.yaml @@ -33,7 +33,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1 + uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0 with: access_token: ${{ github.token }} diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml index 62e31b249cbf..7fa391d342d0 100644 --- a/.github/workflows/openssf-scorecard.yml +++ b/.github/workflows/openssf-scorecard.yml @@ -72,6 +72,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9 + uses: github/codeql-action/upload-sarif@cdefb33c0f6224e58673d9004f47f7cb3e328b89 # v4.31.10 with: sarif_file: results.sarif diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 83eae28e76b3..50b1175ffebc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/PyCQA/bandit - rev: '1.9.2' + rev: '1.9.3' hooks: - id: bandit pass_filenames: false @@ -64,7 +64,7 @@ repos: additional_dependencies: - tomli - repo: https://github.com/psf/black - rev: 25.12.0 + rev: 26.1.0 hooks: - id: black exclude: "dpnp/_version.py" @@ -118,7 +118,7 @@ repos: ] files: '^dpnp/(dpnp_iface.*|fft|linalg|scipy|dpnp_array)' - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks - rev: v2.15.0 + rev: v2.16.0 hooks: - id: pretty-format-toml args: [--autofix] @@ -127,7 +127,7 @@ repos: hooks: - id: actionlint - repo: https://github.com/BlankSpruce/gersemi - rev: 0.25.0 + rev: 0.25.1 hooks: - id: gersemi exclude: "dpnp/backend/cmake/Modules/" diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bd28a21d194..69b06cb64bf8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,23 +42,31 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum * Unified `dpnp` public API exports by consolidating function exports in `__init__.py` and removing wildcard imports [#2665](https://github.com/IntelPython/dpnp/pull/2665) [#2666](https://github.com/IntelPython/dpnp/pull/2666) * Updated tests to reflect the new scalar conversion rules for non-0D `usm_ndarray` [#2694](https://github.com/IntelPython/dpnp/pull/2694) * Compile indexing extension with `-fno-sycl-id-queries-fit-in-int` to support huge arrays [#2721](https://github.com/IntelPython/dpnp/pull/2721) +* Updated `dpnp.fix` to reuse `dpnp.trunc` internally [#2722](https://github.com/IntelPython/dpnp/pull/2722) +* Changed the build scripts and documentation due to `python setup.py develop` deprecation notice [#2716](https://github.com/IntelPython/dpnp/pull/2716) +* Clarified behavior on repeated `axes` in `dpnp.tensordot` and `dpnp.linalg.tensordot` functions [#2733](https://github.com/IntelPython/dpnp/pull/2733) ### Deprecated * `dpnp.asfarray` is deprecated. Use `dpnp.asarray` with an appropriate dtype instead [#2650](https://github.com/IntelPython/dpnp/pull/2650) * Passing the output array ``out`` positionally to `dpnp.minimum` and `dpnp.maximum` is deprecated. Pass the output with the keyword form, e.g. ``dpnp.minimum(a, b, out=c)`` [#2659](https://github.com/IntelPython/dpnp/pull/2659) * `dpnp.ndarray.T` property is deprecated for not two-dimensional array to be compatible with the Python array API standard. To achieve a similar behavior when ``a.ndim != 2``, either ``a.transpose()``, or ``a.mT`` (swaps the last two axes only), or ``dpnp.permute_dims(a, range(a.ndim)[::-1])`` can be used [#2681](https://github.com/IntelPython/dpnp/pull/2681) +* `dpnp.fix` is deprecated. Use `dpnp.trunc` instead, which provides identical functionality [#2730](https://github.com/IntelPython/dpnp/pull/2730) ### Removed * Dropped support for Python 3.9 [#2626](https://github.com/IntelPython/dpnp/pull/2626) * Removed the obsolete interface from DPNP to Numba JIT [#2647](https://github.com/IntelPython/dpnp/pull/2647) * Removed the `newshape` parameter from `dpnp.reshape`, which has been deprecated since dpnp 0.17.0. Pass it positionally or use `shape=` on newer versions [#2670](https://github.com/IntelPython/dpnp/pull/2670) +* Removed unused `pytest` configuration from `pyproject.toml` [#2729](https://github.com/IntelPython/dpnp/pull/2729) ### Fixed * Suppressed a potential deprecation warning triggered during import of the `dpctl.tensor` module [#2709](https://github.com/IntelPython/dpnp/pull/2709) * Corrected a phonetic spelling issue due to incorrect using of `a nd` in docstrings [#2719](https://github.com/IntelPython/dpnp/pull/2719) +* Resolved an issue causing `dpnp.linspace` to return an incorrect output shape when inputs were passed as arrays [#2712](https://github.com/IntelPython/dpnp/pull/2712) +* Resolved an issue where `dpnp` always returns the base allocation pointer, when the view start is expected [#2651](https://github.com/IntelPython/dpnp/pull/2651) +* Fixed an issue causing an exception in `dpnp.geomspace` and `dpnp.logspace` when called with explicit `device` keyword but any input array is allocated on another device [#2723](https://github.com/IntelPython/dpnp/pull/2723) ### Security diff --git a/CMakeLists.txt b/CMakeLists.txt index 66f5c776b52d..9d676232f08e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,7 +107,7 @@ set(DPNP_TARGET_CUDA Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), \ or to a specific architecture like sm_80." ) -set(HIP_TARGETS "" CACHE STRING "HIP architecture for target") +set(DPNP_TARGET_HIP "" CACHE STRING "HIP architecture for target") set(_dpnp_sycl_targets) set(_use_onemath OFF) @@ -135,18 +135,28 @@ if("x${DPNP_SYCL_TARGETS}" STREQUAL "x") set(_use_onemath_cuda ON) endif() - if(HIP_TARGETS) - if(HIP_TARGETS MATCHES "^gfx") + if(DPNP_TARGET_HIP) + if(DPNP_TARGET_HIP MATCHES "^gfx") if("x${_dpnp_sycl_targets}" STREQUAL "x") - set(_dpnp_sycl_targets "amd_gpu_${HIP_TARGETS},spir64-unknown-unknown") + set(_dpnp_sycl_targets + "amd_gpu_${DPNP_TARGET_HIP},spir64-unknown-unknown" + ) else() - set(_dpnp_sycl_targets "amd_gpu_${HIP_TARGETS},${_dpnp_sycl_targets}") + set(_dpnp_sycl_targets + "amd_gpu_${DPNP_TARGET_HIP},${_dpnp_sycl_targets}" + ) endif() + set(_use_onemath_hip ON) + set(HIP_TARGETS + ${DPNP_TARGET_HIP} + CACHE STRING + "HIP GPU targets for oneMath" + ) else() message( FATAL_ERROR - "Invalid value for HIP_TARGETS: \"${HIP_TARGETS}\". " + "Invalid value for DPNP_TARGET_HIP: \"${DPNP_TARGET_HIP}\". " "Expected an architecture name starting with 'gfx', e.g. 'gfx1030'." ) endif() @@ -159,11 +169,15 @@ else() endif() if("${DPNP_SYCL_TARGETS}" MATCHES "amd_gpu_") - set(_use_onemath_hip ON) - - if("x${HIP_TARGETS}" STREQUAL "x") - message(FATAL_ERROR "HIP_TARGETS must be specified when using HIP backend") + if("x${DPNP_TARGET_HIP}" STREQUAL "x") + message( + FATAL_ERROR + "DPNP_TARGET_HIP must be specified when using HIP backend" + ) endif() + + set(_use_onemath_hip ON) + set(HIP_TARGETS ${DPNP_TARGET_HIP} CACHE STRING "HIP GPU targets for oneMath") endif() if("${DPNP_SYCL_TARGETS}" MATCHES "amdgcn-amd-amdhsa") @@ -295,6 +309,16 @@ else() message(FATAL_ERROR "Unsupported system.") endif() +# Define flags for CMAKE_BUILD_TYPE=Coverage +set(CMAKE_C_FLAGS_COVERAGE "${CMAKE_C_FLAGS_DEBUG} -O1 -g1 -DDEBUG") +set(CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG} -O1 -g1 -DDEBUG") +set(CMAKE_MODULE_LINKER_FLAGS_COVERAGE "${CMAKE_MODULE_LINKER_FLAGS_DEBUG}") +mark_as_advanced( + CMAKE_C_FLAGS_COVERAGE + CMAKE_CXX_FLAGS_COVERAGE + CMAKE_MODULE_LINKER_FLAGS_COVERAGE +) + if(DPNP_GENERATE_COVERAGE) string( CONCAT PROFILE_FLAGS diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 2fd55e07eb4b..661f44b50ed9 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -1,6 +1,6 @@ {% set max_compiler_and_mkl_version = environ.get("MAX_BUILD_CMPL_MKL_VERSION", "2026.0a0") %} {% set required_compiler_and_mkl_version = "2025.0" %} -{% set required_dpctl_version = "0.21.0" %} +{% set required_dpctl_version = "0.22.0*" %} {% set pyproject = load_file_data('pyproject.toml') %} {% set py_build_deps = pyproject.get('build-system', {}).get('requires', []) %} diff --git a/doc/0.builddoc.sh b/doc/0.builddoc.sh deleted file mode 100755 index f10b4a5cc22d..000000000000 --- a/doc/0.builddoc.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -BUILDDOCDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") -ROOTDIR=$BUILDDOCDIR/.. - -cd "$ROOTDIR" || exit 1 -python setup.py develop - -cd "$BUILDDOCDIR" || exit 2 -make clean -make html diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst index 92c506c0fd81..6226a655c333 100644 --- a/doc/quick_start_guide.rst +++ b/doc/quick_start_guide.rst @@ -112,13 +112,15 @@ To build and install the package on Linux OS, run: .. code-block:: bash - python setup.py install -- -G Ninja -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx + python setup.py build_ext --inplace -- -G Ninja -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx + python -m pip install -e . To build and install the package on Windows OS, run: .. code-block:: bash - python setup.py install -- -G Ninja -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx + python setup.py build_ext --inplace -- -G Ninja -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx + python -m pip install -e . Alternatively, to develop on Linux OS, you can use the driver script: diff --git a/dpnp/__init__.py b/dpnp/__init__.py index 5ec230977f98..02420107972f 100644 --- a/dpnp/__init__.py +++ b/dpnp/__init__.py @@ -566,7 +566,6 @@ kaiser, ) - # ============================================================================= # Helper functions # ============================================================================= diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt index 5609522f58a4..b24d5d131cfe 100644 --- a/dpnp/backend/extensions/ufunc/CMakeLists.txt +++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt @@ -34,7 +34,6 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/divmod.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/erf_funcs.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fabs.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fix.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/float_power.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmax.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmin.cpp diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp index df409464a5c2..9254e87e52c4 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp @@ -33,7 +33,6 @@ #include "divmod.hpp" #include "erf_funcs.hpp" #include "fabs.hpp" -#include "fix.hpp" #include "float_power.hpp" #include "fmax.hpp" #include "fmin.hpp" @@ -67,7 +66,6 @@ void init_elementwise_functions(py::module_ m) init_divmod(m); init_erf_funcs(m); init_fabs(m); - init_fix(m); init_float_power(m); init_fmax(m); init_fmin(m); diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fix.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fix.cpp deleted file mode 100644 index 6b21245489fc..000000000000 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fix.cpp +++ /dev/null @@ -1,131 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2024, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// - Neither the name of the copyright holder nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#include -#include - -#include - -#include "dpctl4pybind11.hpp" - -#include "fix.hpp" -#include "kernels/elementwise_functions/fix.hpp" -#include "populate.hpp" - -// include a local copy of elementwise common header from dpctl tensor: -// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp -// TODO: replace by including dpctl header once available -#include "../../elementwise_functions/elementwise_functions.hpp" - -// dpctl tensor headers -#include "kernels/elementwise_functions/common.hpp" -#include "utils/type_dispatch.hpp" - -namespace dpnp::extensions::ufunc -{ -namespace py = pybind11; -namespace py_int = dpnp::extensions::py_internal; - -namespace impl -{ -namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; -namespace td_ns = dpctl::tensor::type_dispatch; - -/** - * @brief A factory to define pairs of supported types for which - * sycl::fix function is available. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct OutputType -{ - using value_type = - typename std::disjunction, - td_ns::TypeMapResultEntry, - td_ns::TypeMapResultEntry, - td_ns::DefaultResultEntry>::result_type; -}; - -using dpnp::kernels::fix::FixFunctor; - -template -using ContigFunctor = ew_cmn_ns::UnaryContigFunctor, - vec_sz, - n_vecs, - enable_sg_loadstore>; - -template -using StridedFunctor = ew_cmn_ns:: - UnaryStridedFunctor>; - -using ew_cmn_ns::unary_contig_impl_fn_ptr_t; -using ew_cmn_ns::unary_strided_impl_fn_ptr_t; - -static unary_contig_impl_fn_ptr_t fix_contig_dispatch_vector[td_ns::num_types]; -static int fix_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - fix_strided_dispatch_vector[td_ns::num_types]; - -MACRO_POPULATE_DISPATCH_VECTORS(fix); -} // namespace impl - -void init_fix(py::module_ m) -{ - using arrayT = dpctl::tensor::usm_ndarray; - using event_vecT = std::vector; - { - impl::populate_fix_dispatch_vectors(); - using impl::fix_contig_dispatch_vector; - using impl::fix_output_typeid_vector; - using impl::fix_strided_dispatch_vector; - - auto fix_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_int::py_unary_ufunc( - src, dst, exec_q, depends, fix_output_typeid_vector, - fix_contig_dispatch_vector, fix_strided_dispatch_vector); - }; - m.def("_fix", fix_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto fix_result_type_pyapi = [&](const py::dtype &dtype) { - return py_int::py_unary_ufunc_result_type(dtype, - fix_output_typeid_vector); - }; - m.def("_fix_result_type", fix_result_type_pyapi); - } -} -} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fix.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fix.hpp deleted file mode 100644 index f9fed62cf818..000000000000 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fix.hpp +++ /dev/null @@ -1,38 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2024, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// - Neither the name of the copyright holder nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#pragma once - -#include - -namespace py = pybind11; - -namespace dpnp::extensions::ufunc -{ -void init_fix(py::module_ m); -} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/kernels/elementwise_functions/fix.hpp b/dpnp/backend/kernels/elementwise_functions/fix.hpp deleted file mode 100644 index f53bfc17e566..000000000000 --- a/dpnp/backend/kernels/elementwise_functions/fix.hpp +++ /dev/null @@ -1,52 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2024, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// - Neither the name of the copyright holder nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#pragma once - -#include - -namespace dpnp::kernels::fix -{ -template -struct FixFunctor -{ - // is function constant for given argT - using is_constant = typename std::false_type; - // constant value, if constant - // constexpr resT constant_value = resT{}; - // is function defined for sycl::vec - using supports_vec = typename std::false_type; - // do both argT and resT support subgroup store/load operation - using supports_sg_loadstore = typename std::true_type; - - resT operator()(const argT &x) const - { - return (x >= 0.0) ? sycl::floor(x) : sycl::ceil(x); - } -}; -} // namespace dpnp::kernels::fix diff --git a/dpnp/dpnp_algo/dpnp_arraycreation.py b/dpnp/dpnp_algo/dpnp_arraycreation.py index a76736d27bb5..d94a031801f3 100644 --- a/dpnp/dpnp_algo/dpnp_arraycreation.py +++ b/dpnp/dpnp_algo/dpnp_arraycreation.py @@ -46,11 +46,46 @@ def _as_usm_ndarray(a, usm_type, sycl_queue): + """Converts input object to `dpctl.tensor.usm_ndarray`""" + if isinstance(a, dpnp_array): - return a.get_array() + a = a.get_array() return dpt.asarray(a, usm_type=usm_type, sycl_queue=sycl_queue) +def _check_has_zero_val(a): + """Check if any element in input object is equal to zero""" + + if dpnp.isscalar(a): + if a == 0: + return True + elif hasattr(a, "any"): + if (a == 0).any(): + return True + elif (numpy.array(a) == 0).any(): + return True + return False + + +def _get_usm_allocations(objs, device=None, usm_type=None, sycl_queue=None): + """ + Get common USM allocations based on a list of input objects and an explicit + device, a SYCL queue, or a USM type if specified. + + """ + + alloc_usm_type, alloc_sycl_queue = get_usm_allocations(objs) + + if sycl_queue is None and device is None: + sycl_queue = alloc_sycl_queue + + if usm_type is None: + usm_type = alloc_usm_type or "device" + return usm_type, dpnp.get_normalized_queue_device( + sycl_queue=sycl_queue, device=device + ) + + def dpnp_geomspace( start, stop, @@ -62,76 +97,57 @@ def dpnp_geomspace( endpoint=True, axis=0, ): - usm_type_alloc, sycl_queue_alloc = get_usm_allocations([start, stop]) - - if sycl_queue is None and device is None: - sycl_queue = sycl_queue_alloc - sycl_queue_normalized = dpnp.get_normalized_queue_device( - sycl_queue=sycl_queue, device=device + usm_type, sycl_queue = _get_usm_allocations( + [start, stop], device=device, usm_type=usm_type, sycl_queue=sycl_queue ) - if usm_type is None: - _usm_type = "device" if usm_type_alloc is None else usm_type_alloc - else: - _usm_type = usm_type + if _check_has_zero_val(start) or _check_has_zero_val(stop): + raise ValueError("Geometric sequence cannot include zero") - start = _as_usm_ndarray(start, _usm_type, sycl_queue_normalized) - stop = _as_usm_ndarray(stop, _usm_type, sycl_queue_normalized) + start = dpnp.array(start, usm_type=usm_type, sycl_queue=sycl_queue) + stop = dpnp.array(stop, usm_type=usm_type, sycl_queue=sycl_queue) dt = numpy.result_type(start, stop, float(num)) - dt = map_dtype_to_device(dt, sycl_queue_normalized.sycl_device) + dt = map_dtype_to_device(dt, sycl_queue.sycl_device) if dtype is None: dtype = dt - if dpnp.any(start == 0) or dpnp.any(stop == 0): - raise ValueError("Geometric sequence cannot include zero") + # promote both arguments to the same dtype + start = start.astype(dt, copy=False) + stop = stop.astype(dt, copy=False) - out_sign = dpt.ones( - dpt.broadcast_arrays(start, stop)[0].shape, - dtype=dt, - usm_type=_usm_type, - sycl_queue=sycl_queue_normalized, - ) - # Avoid negligible real or imaginary parts in output by rotating to - # positive real, calculating, then undoing rotation - if dpnp.issubdtype(dt, dpnp.complexfloating): - all_imag = (start.real == 0.0) & (stop.real == 0.0) - if dpnp.any(all_imag): - start[all_imag] = start[all_imag].imag - stop[all_imag] = stop[all_imag].imag - out_sign[all_imag] = 1j - - both_negative = (dpt.sign(start) == -1) & (dpt.sign(stop) == -1) - if dpnp.any(both_negative): - dpt.negative(start[both_negative], out=start[both_negative]) - dpt.negative(stop[both_negative], out=stop[both_negative]) - dpt.negative(out_sign[both_negative], out=out_sign[both_negative]) - - log_start = dpt.log10(start) - log_stop = dpt.log10(stop) + # Allow negative real values and ensure a consistent result for complex + # (including avoiding negligible real or imaginary parts in output) by + # rotating start to positive real, calculating, then undoing rotation. + out_sign = dpnp.sign(start) + start = start / out_sign + stop = stop / out_sign + + log_start = dpnp.log10(start) + log_stop = dpnp.log10(stop) res = dpnp_logspace( log_start, log_stop, num=num, endpoint=endpoint, base=10.0, - dtype=dtype, - usm_type=_usm_type, - sycl_queue=sycl_queue_normalized, - ).get_array() + dtype=dt, + usm_type=usm_type, + sycl_queue=sycl_queue, + ) + # Make sure the endpoints match the start and stop arguments. This is + # necessary because np.exp(np.log(x)) is not necessarily equal to x. if num > 0: res[0] = start if num > 1 and endpoint: res[-1] = stop - res = out_sign * res + res *= out_sign if axis != 0: - res = dpt.moveaxis(res, 0, axis) - - res = dpt.astype(res, dtype, copy=False) - return dpnp_array._create_from_usm_ndarray(res) + res = dpnp.moveaxis(res, 0, axis) + return res.astype(dtype, copy=False) def dpnp_linspace( @@ -172,14 +188,9 @@ def dpnp_linspace( num = operator.index(num) if num < 0: - raise ValueError("Number of points must be non-negative") + raise ValueError(f"Number of samples={num} must be non-negative.") step_num = (num - 1) if endpoint else num - step_nan = False - if step_num == 0: - step_nan = True - step = dpnp.nan - if dpnp.isscalar(start) and dpnp.isscalar(stop): # Call linspace() function for scalars. usm_res = dpt.linspace( @@ -191,8 +202,13 @@ def dpnp_linspace( sycl_queue=sycl_queue_normalized, endpoint=endpoint, ) - if retstep is True and step_nan is False: - step = (stop - start) / step_num + + # calculate the used step to return + if retstep is True: + if step_num > 0: + step = (stop - start) / step_num + else: + step = dpnp.nan else: usm_start = dpt.asarray( start, @@ -204,6 +220,8 @@ def dpnp_linspace( stop, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized ) + delta = usm_stop - usm_start + usm_res = dpt.arange( 0, stop=num, @@ -212,20 +230,30 @@ def dpnp_linspace( usm_type=_usm_type, sycl_queue=sycl_queue_normalized, ) + usm_res = dpt.reshape(usm_res, (-1,) + (1,) * delta.ndim, copy=False) + + if step_num > 0: + step = delta / step_num + + # Needed a special handling for denormal numbers (when step == 0), + # see numpy#5437 for more details. + # Note, dpt.where() is used to avoid a synchronization branch. + usm_res = dpt.where( + step == 0, (usm_res / step_num) * delta, usm_res * step + ) + else: + step = dpnp.nan + usm_res = usm_res * delta - if step_nan is False: - step = (usm_stop - usm_start) / step_num - usm_res = dpt.reshape(usm_res, (-1,) + (1,) * step.ndim, copy=False) - usm_res = usm_res * step - usm_res += usm_start + usm_res += usm_start if endpoint and num > 1: - usm_res[-1] = dpt.full(step.shape, usm_stop) + usm_res[-1, ...] = usm_stop if axis != 0: usm_res = dpt.moveaxis(usm_res, 0, axis) - if numpy.issubdtype(dtype, dpnp.integer): + if dpnp.issubdtype(dtype, dpnp.integer): dpt.floor(usm_res, out=usm_res) res = dpt.astype(usm_res, dtype, copy=False) @@ -252,45 +280,36 @@ def dpnp_logspace( dtype=None, axis=0, ): - if not dpnp.isscalar(base): - usm_type_alloc, sycl_queue_alloc = get_usm_allocations( - [start, stop, base] - ) - - if sycl_queue is None and device is None: - sycl_queue = sycl_queue_alloc - sycl_queue = dpnp.get_normalized_queue_device( - sycl_queue=sycl_queue, device=device - ) - - if usm_type is None: - usm_type = "device" if usm_type_alloc is None else usm_type_alloc - else: - usm_type = usm_type + usm_type, sycl_queue = _get_usm_allocations( + [start, stop, base], + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue, + ) - start = _as_usm_ndarray(start, usm_type, sycl_queue) - stop = _as_usm_ndarray(stop, usm_type, sycl_queue) - base = _as_usm_ndarray(base, usm_type, sycl_queue) + if not dpnp.isscalar(base): + base = dpnp.array(base, usm_type=usm_type, sycl_queue=sycl_queue) + start = dpnp.array(start, usm_type=usm_type, sycl_queue=sycl_queue) + stop = dpnp.array(stop, usm_type=usm_type, sycl_queue=sycl_queue) - [start, stop, base] = dpt.broadcast_arrays(start, stop, base) - base = dpt.expand_dims(base, axis=axis) + start, stop, base = dpnp.broadcast_arrays(start, stop, base) + base = dpnp.expand_dims(base, axis=axis) - # assume res as not a tuple, because retstep is False + # assume `res` as not a tuple, because retstep is False res = dpnp_linspace( start, stop, num=num, - device=device, usm_type=usm_type, sycl_queue=sycl_queue, endpoint=endpoint, axis=axis, - ).get_array() + ) - dpt.pow(base, res, out=res) + dpnp.pow(base, res, out=res) if dtype is not None: - res = dpt.astype(res, dtype, copy=False) - return dpnp_array._create_from_usm_ndarray(res) + res = res.astype(dtype, copy=False) + return res class dpnp_nd_grid: diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py index 60a55acd1f45..57bf50422fa0 100644 --- a/dpnp/dpnp_algo/dpnp_elementwise_common.py +++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py @@ -60,7 +60,7 @@ "DPNPBinaryFunc", "DPNPBinaryFuncOutKw", "DPNPBinaryTwoOutputsFunc", - "DPNPFix", + "DPNPDeprecatedUnaryFunc", "DPNPImag", "DPNPReal", "DPNPRound", @@ -231,6 +231,32 @@ def _unpack_out_kw(self, out): return out +class DPNPDeprecatedUnaryFunc(DPNPUnaryFunc): + """ + Class that implements a deprecated unary element-wise function. + + Parameters + ---------- + deprecated_msg : {str, None}, optional + Warning message to emit. If None, no warning is issued. + + Default: ``None``. + + """ + + def __init__(self, *args, deprecated_msg=None, **kwargs): + super().__init__(*args, **kwargs) + self._deprecated_msg = deprecated_msg + + @wraps(DPNPUnaryFunc.__call__) + def __call__(self, *args, **kwargs): + if self._deprecated_msg: + warnings.warn( + self._deprecated_msg, DeprecationWarning, stacklevel=2 + ) + return super().__call__(*args, **kwargs) + + class DPNPUnaryTwoOutputsFunc(UnaryElementwiseFunc): """ Class that implements unary element-wise functions with two output arrays. @@ -1188,55 +1214,6 @@ def __call__(self, x, /, deg=False, *, out=None, order="K"): return res -class DPNPFix(DPNPUnaryFunc): - """Class that implements dpnp.fix unary element-wise functions.""" - - def __init__( - self, - name, - result_type_resolver_fn, - unary_dp_impl_fn, - docs, - ): - super().__init__( - name, - result_type_resolver_fn, - unary_dp_impl_fn, - docs, - ) - - def __call__(self, x, /, out=None, *, order="K"): - if not dpnp.is_supported_array_type(x): - pass # pass to raise error in main implementation - elif dpnp.issubdtype(x.dtype, dpnp.inexact): - pass # for inexact types, pass to calculate in the backend - elif not ( - out is None - or isinstance(out, tuple) - or dpnp.is_supported_array_type(out) - ): - pass # pass to raise error in main implementation - elif not ( - out is None or isinstance(out, tuple) or out.dtype == x.dtype - ): - # passing will raise an error but with incorrect needed dtype - raise ValueError( - f"Output array of type {x.dtype} is needed, got {out.dtype}" - ) - else: - # for exact types, return the input - out = self._unpack_out_kw(out) - if out is None: - return dpnp.copy(x, order=order) - - if isinstance(out, dpt.usm_ndarray): - out = dpnp_array._create_from_usm_ndarray(out) - out[...] = x - return out - - return super().__call__(x, out=out, order=order) - - class DPNPI0(DPNPUnaryFunc): """Class that implements dpnp.i0 unary element-wise functions.""" diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index c42c9f8a2cb5..6a2b2fd1977f 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -98,6 +98,7 @@ def __init__( # or as USM memory allocation if isinstance(buffer, dpnp_array): buffer = buffer.get_array() + offset += buffer._element_offset if dtype is None and hasattr(buffer, "dtype"): dtype = buffer.dtype diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py index f019989a2ca8..4975db17c717 100644 --- a/dpnp/dpnp_container.py +++ b/dpnp/dpnp_container.py @@ -35,7 +35,6 @@ """ - import dpctl.tensor as dpt import dpctl.utils as dpu diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 4cdc74c75ca0..fba1a215756a 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -38,6 +38,7 @@ - The functions parameters check """ + # pylint: disable=protected-access import os diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index 12c65d116a70..8d4ebdd1a6c2 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -39,7 +39,6 @@ """ - # pylint: disable=duplicate-code import operator @@ -2704,6 +2703,8 @@ def linspace( of tuples, tuples of lists, and ndarrays. If `endpoint` is set to ``False`` the sequence consists of all but the last of ``num + 1`` evenly spaced samples, so that `stop` is excluded. + num : int + Number of samples. Must have a nonnegative value. dtype : {None, str, dtype object}, optional The desired dtype for the array. If not given, a default dtype will be used that can represent the values (by considering Promotion Type Rule diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py index 6375e5320e30..6e7ab778299b 100644 --- a/dpnp/dpnp_iface_indexing.py +++ b/dpnp/dpnp_iface_indexing.py @@ -721,23 +721,21 @@ def diagonal(a, offset=0, axis1=0, axis2=1): a_straides = a.strides n, m = a_shape[-2:] st_n, st_m = a_straides[-2:] - # pylint: disable=W0212 - a_element_offset = a.get_array()._element_offset # Compute shape, strides and offset of the resulting diagonal array # based on the input offset if offset == 0: out_shape = a_shape[:-2] + (min(n, m),) out_strides = a_straides[:-2] + (st_n + st_m,) - out_offset = a_element_offset + out_offset = 0 elif 0 < offset < m: out_shape = a_shape[:-2] + (min(n, m - offset),) out_strides = a_straides[:-2] + (st_n + st_m,) - out_offset = a_element_offset + st_m * offset + out_offset = st_m * offset else: out_shape = a_shape[:-2] + (0,) out_strides = a_straides[:-2] + (1,) - out_offset = a_element_offset + out_offset = 0 return dpnp_array( out_shape, buffer=a, strides=out_strides, offset=out_offset diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py index a0068a3597e5..acb123473482 100644 --- a/dpnp/dpnp_iface_linearalgebra.py +++ b/dpnp/dpnp_iface_linearalgebra.py @@ -1121,7 +1121,7 @@ def outer(a, b, out=None): return result -def tensordot(a, b, axes=2): +def tensordot(a, b, /, *, axes=2): r""" Compute tensor dot product along specified axes. @@ -1148,7 +1148,10 @@ def tensordot(a, b, axes=2): axes must match. * (2,) array_like: A list of axes to be summed over, first sequence applying to `a`, second to `b`. Both elements array_like must be of - the same length. + the same length. Each axis may appear at most once; repeated axes are + not allowed. + + Default: ``2``. Returns ------- @@ -1178,6 +1181,13 @@ def tensordot(a, b, axes=2): two sequences of the same length, with the first axis to sum over given first in both sequences, the second axis second, and so forth. + For example, if ``a.shape == (2, 3, 4)`` and ``b.shape == (3, 4, 5)``, then + ``axes=([1, 2], [0, 1])`` sums over the ``(3, 4)`` dimensions of both + arrays and produces an output of shape ``(2, 5)``. + + Each summation axis corresponds to a distinct contraction index; repeating + an axis (for example ``axes=([1, 1], [0, 0])``) is invalid. + The shape of the result consists of the non-contracted axes of the first tensor, followed by the non-contracted axes of the second. diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index 3cec24a44159..9df5278bd16b 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -39,7 +39,6 @@ """ - import math import operator import warnings @@ -3418,7 +3417,7 @@ def rot90(m, k=1, axes=(0, 1)): return dpnp.flip(dpnp.flip(m, axes[0]), axes[1]) axes_list = list(range(0, m_ndim)) - (axes_list[axes[0]], axes_list[axes[1]]) = ( + axes_list[axes[0]], axes_list[axes[1]] = ( axes_list[axes[1]], axes_list[axes[0]], ) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 3dc5ccb82884..3e6a4b0ed121 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -66,7 +66,7 @@ DPNPBinaryFunc, DPNPBinaryFuncOutKw, DPNPBinaryTwoOutputsFunc, - DPNPFix, + DPNPDeprecatedUnaryFunc, DPNPImag, DPNPReal, DPNPRound, @@ -1854,6 +1854,12 @@ def ediff1d(ary, to_end=None, to_begin=None): :obj:`dpnp.floor` : Return the floor of the input, element-wise. :obj:`dpnp.ceil` : Return the ceiling of the input, element-wise. +Warning +------- +This function is deprecated. It is recommended to use +:func:`dpnp.trunc` instead, as it provides the same functionality of +truncating decimal values to their integer parts. + Examples -------- >>> import dpnp as np @@ -1867,11 +1873,15 @@ def ediff1d(ary, to_end=None, to_begin=None): """ -fix = DPNPFix( +# reuse trunc backend implementation for fix +fix = DPNPDeprecatedUnaryFunc( "fix", - ufi._fix_result_type, - ufi._fix, + ti._trunc_result_type, + ti._trunc, _FIX_DOCSTRING, + mkl_fn_to_call="_mkl_trunc_to_call", + mkl_impl_fn="_trunc", + deprecated_msg="dpnp.fix is deprecated in favor of dpnp.trunc", ) diff --git a/dpnp/linalg/__init__.py b/dpnp/linalg/__init__.py index e04928393a8c..3fa27db33473 100644 --- a/dpnp/linalg/__init__.py +++ b/dpnp/linalg/__init__.py @@ -35,7 +35,6 @@ """ - from .dpnp_iface_linalg import ( LinAlgError, cholesky, diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 95bb849afa3c..6959565ecf17 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -1975,9 +1975,10 @@ def tensordot(a, b, /, *, axes=2): axes must match. * (2,) array_like: A list of axes to be summed over, first sequence applying to `a`, second to `b`. Both elements array_like must be of - the same length. + the same length. Each axis may appear at most once; repeated axes are + not allowed. - Default: ``2``. + Default: ``2``. Returns ------- @@ -2007,6 +2008,13 @@ def tensordot(a, b, /, *, axes=2): two sequences of the same length, with the first axis to sum over given first in both sequences, the second axis second, and so forth. + For example, if ``a.shape == (2, 3, 4)`` and ``b.shape == (3, 4, 5)``, then + ``axes=([1, 2], [0, 1])`` sums over the ``(3, 4)`` dimensions of both + arrays and produces an output of shape ``(2, 5)``. + + Each summation axis corresponds to a distinct contraction index; repeating + an axis (for example ``axes=([1, 1], [0, 0])``) is invalid. + The shape of the result consists of the non-contracted axes of the first tensor, followed by the non-contracted axes of the second. diff --git a/dpnp/random/dpnp_random_state.py b/dpnp/random/dpnp_random_state.py index 560e2670932b..e49fe739aedd 100644 --- a/dpnp/random/dpnp_random_state.py +++ b/dpnp/random/dpnp_random_state.py @@ -36,7 +36,6 @@ """ - import dpctl.utils as dpu import numpy diff --git a/dpnp/scipy/linalg/__init__.py b/dpnp/scipy/linalg/__init__.py index a97f2cfb829c..3afc08a6fdb9 100644 --- a/dpnp/scipy/linalg/__init__.py +++ b/dpnp/scipy/linalg/__init__.py @@ -35,7 +35,6 @@ """ - from ._decomp_lu import lu_factor, lu_solve __all__ = [ diff --git a/dpnp/scipy/linalg/_decomp_lu.py b/dpnp/scipy/linalg/_decomp_lu.py index d2a58fba14d6..292d7fffe4b4 100644 --- a/dpnp/scipy/linalg/_decomp_lu.py +++ b/dpnp/scipy/linalg/_decomp_lu.py @@ -39,7 +39,6 @@ """ - import dpnp from dpnp.linalg.dpnp_utils_linalg import ( assert_stacked_2d, @@ -181,7 +180,7 @@ def lu_solve(lu_and_piv, b, trans=0, overwrite_b=False, check_finite=True): """ - (lu, piv) = lu_and_piv + lu, piv = lu_and_piv dpnp.check_supported_arrays_type(lu, piv, b) assert_stacked_2d(lu) assert_stacked_square(lu) diff --git a/dpnp/scipy/linalg/_utils.py b/dpnp/scipy/linalg/_utils.py index be736e076d86..282c645d1095 100644 --- a/dpnp/scipy/linalg/_utils.py +++ b/dpnp/scipy/linalg/_utils.py @@ -37,7 +37,6 @@ """ - # pylint: disable=no-name-in-module # pylint: disable=protected-access diff --git a/dpnp/tests/helper.py b/dpnp/tests/helper.py index e5e251231654..65917ee6c340 100644 --- a/dpnp/tests/helper.py +++ b/dpnp/tests/helper.py @@ -1,4 +1,5 @@ import importlib.util +from enum import Enum from sys import platform import dpctl @@ -11,6 +12,11 @@ from . import config +class LTS_VERSION(Enum): + V1_3 = "1.3" + V1_6 = "1.6" + + def _assert_dtype(a_dt, b_dt, check_only_type_kind=False): if check_only_type_kind: assert a_dt.kind == b_dt.kind, f"{a_dt.kind} != {b_dt.kind}" @@ -69,6 +75,9 @@ def assert_dtype_allclose( x.dtype, dpnp.inexact ) + if not hasattr(numpy_arr, "dtype"): + numpy_arr = numpy.array(numpy_arr) + if is_inexact(dpnp_arr) or is_inexact(numpy_arr): tol_dpnp = ( dpnp.finfo(dpnp_arr).resolution @@ -472,13 +481,13 @@ def is_lnl(device=None): return _get_dev_mask(device) == 0x6400 -def is_lts_driver(device=None): +def is_lts_driver(version=LTS_VERSION.V1_3, device=None): """ Return True if a test is running on a GPU device with LTS driver version, False otherwise. """ dev = dpctl.select_default_device() if device is None else device - return dev.has_aspect_gpu and "1.3" in dev.driver_version + return dev.has_aspect_gpu and version.value in dev.driver_version def is_ptl(device=None): diff --git a/dpnp/tests/test_arraycreation.py b/dpnp/tests/test_arraycreation.py index 28d51e8e6d31..eb20f9b3ffe5 100644 --- a/dpnp/tests/test_arraycreation.py +++ b/dpnp/tests/test_arraycreation.py @@ -19,9 +19,8 @@ assert_dtype_allclose, get_all_dtypes, get_array, - is_lts_driver, - is_tgllp_iris_xe, - is_win_platform, + get_float_dtypes, + has_support_aspect64, ) from .third_party.cupy import testing @@ -83,6 +82,242 @@ def test_validate_positional_args(self, xp): ) +class TestGeomspace: + @pytest.mark.parametrize("sign", [-1, 1]) + @pytest.mark.parametrize("dtype", get_all_dtypes()) + @pytest.mark.parametrize("num", [2, 4, 8, 3, 9, 27]) + @pytest.mark.parametrize("endpoint", [True, False]) + def test_basic(self, sign, dtype, num, endpoint): + start = 2 * sign + stop = 127 * sign + + func = lambda xp: xp.geomspace( + start, stop, num, endpoint=endpoint, dtype=dtype + ) + + np_res = func(numpy) + dpnp_res = func(dpnp) + + assert_allclose(dpnp_res, np_res, rtol=1e-06) + + @pytest.mark.parametrize("start", [1j, 1 + 1j]) + @pytest.mark.parametrize("stop", [10j, 10 + 10j]) + def test_complex(self, start, stop): + func = lambda xp: xp.geomspace(start, stop, num=10) + np_res = func(numpy) + dpnp_res = func(dpnp) + assert_allclose(dpnp_res, np_res, rtol=1e-06) + + @pytest.mark.parametrize("axis", [0, 1]) + def test_axis(self, axis): + func = lambda xp: xp.geomspace([2, 3], [20, 15], num=10, axis=axis) + np_res = func(numpy) + dpnp_res = func(dpnp) + assert_allclose(dpnp_res, np_res, rtol=1e-06) + + def test_num_zero(self): + func = lambda xp: xp.geomspace(1, 10, num=0, endpoint=False) + np_res = func(numpy) + dpnp_res = func(dpnp) + assert_allclose(dpnp_res, np_res) + + @pytest.mark.parametrize( + "start, stop, num", + [ + (0, 5, 3), + (2, 0, 3), + (0, 0, 3), + (dpnp.array([0]), 7, 10), + (-2, numpy.array([[0]]), 7), + ([2, 4, 0], 3, 5), + (10, [[1, 0], [2, 3]], 3), + ], + ) + def test_zero_error(self, start, stop, num): + with pytest.raises(ValueError): + dpnp.geomspace(start, stop, num) + + +class TestLinspace: + @pytest.mark.parametrize("start", [0, -5, 10, -2.5, 9.7]) + @pytest.mark.parametrize("stop", [0, 10, -2, 20.5, 120]) + @pytest.mark.parametrize("num", [0, 1, 5, numpy.array(10)]) + @pytest.mark.parametrize( + "dt", get_all_dtypes(no_bool=True, no_float16=False) + ) + @pytest.mark.parametrize("retstep", [True, False]) + def test_basic(self, start, stop, num, dt, retstep): + if ( + not has_support_aspect64() + and numpy.issubdtype(dt, numpy.integer) + and start == -5 + and stop == 10 + and num == 10 + ): + pytest.skip("due to dpctl-1056") + + if numpy.issubdtype(dt, numpy.unsignedinteger): + start = abs(start) + stop = abs(stop) + + res = dpnp.linspace(start, stop, num, dtype=dt, retstep=retstep) + exp = numpy.linspace(start, stop, num, dtype=dt, retstep=retstep) + if retstep: + res, res_step = res + exp, exp_step = exp + assert_dtype_allclose(res_step, exp_step) + + if numpy.issubdtype(dt, numpy.integer): + assert_allclose(res, exp, rtol=1) + else: + assert_dtype_allclose(res, exp) + + @pytest.mark.parametrize( + "start, stop", + [ + (dpnp.array(1), dpnp.array([-4])), + (dpnp.array([2.6]), dpnp.array([[2.6], [-4]])), + (numpy.array([[-6.7, 3]]), numpy.array(2)), + ([1, -4], [[-4.6]]), + ((3, 5), (3,)), + ], + ) + @pytest.mark.parametrize("num", [0, 1, 5]) + @pytest.mark.parametrize( + "dt", get_all_dtypes(no_bool=True, no_float16=False) + ) + @pytest.mark.parametrize("retstep", [True, False]) + def test_start_stop_arrays(self, start, stop, num, dt, retstep): + res = dpnp.linspace(start, stop, num, dtype=dt, retstep=retstep) + exp = numpy.linspace( + get_array(numpy, start), + get_array(numpy, stop), + num, + dtype=dt, + retstep=retstep, + ) + if retstep: + res, res_step = res + exp, exp_step = exp + assert_dtype_allclose(res_step, exp_step) + assert_dtype_allclose(res, exp) + + @pytest.mark.parametrize( + "start, stop", + [(1 + 2j, 3 + 4j), (1j, 10), ([0, 1], 3 + 2j)], + ) + def test_start_stop_complex(self, start, stop): + result = dpnp.linspace(start, stop, num=5) + expected = numpy.linspace(start, stop, num=5) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("dt", get_float_dtypes()) + def test_denormal_numbers(self, dt): + stop = numpy.nextafter(dt(0), dt(1)) * 5 # denormal number + + result = dpnp.linspace(0, stop, num=10, endpoint=False, dtype=dt) + expected = numpy.linspace(0, stop, num=10, endpoint=False, dtype=dt) + assert_dtype_allclose(result, expected) + + @pytest.mark.skipif(not has_support_aspect64(), reason="due to dpctl-1056") + def test_equivalent_to_arange(self): + result = dpnp.linspace(0, 35, num=36, dtype=int) + expected = numpy.linspace(0, 35, num=36, dtype=int) + assert_equal(result, expected) + + def test_round_negative(self): + result = dpnp.linspace(-1, 3, num=8, dtype=int) + expected = numpy.linspace(-1, 3, num=8, dtype=int) + assert_array_equal(result, expected) + + def test_step_zero(self): + start = numpy.array([0.0, 1.0]) + stop = numpy.array([2.0, 1.0]) + + result = dpnp.linspace(start, stop, num=3) + expected = numpy.linspace(start, stop, num=3) + assert_array_equal(result, expected) + + @pytest.mark.parametrize("endpoint", [True, False]) + def test_num_zero(self, endpoint): + start, stop = 0, [0, 1, 2, 3, 4] + result = dpnp.linspace(start, stop, num=0, endpoint=endpoint) + expected = numpy.linspace(start, stop, num=0, endpoint=endpoint) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("axis", [0, 1]) + def test_axis(self, axis): + func = lambda xp: xp.linspace([2, 3], [20, 15], num=10, axis=axis) + assert_allclose(func(dpnp), func(numpy)) + + @pytest.mark.parametrize("xp", [dpnp, numpy]) + def test_negative_num(self, xp): + with pytest.raises(ValueError, match="must be non-negative"): + _ = xp.linspace(0, 10, num=-1) + + @pytest.mark.parametrize("xp", [dpnp, numpy]) + def test_float_num(self, xp): + with pytest.raises( + TypeError, match="cannot be interpreted as an integer" + ): + _ = xp.linspace(0, 1, num=2.5) + + +class TestLogspace: + @pytest.mark.parametrize("dtype", get_all_dtypes()) + @pytest.mark.parametrize("num", [2, 4, 8, 3, 9, 27]) + @pytest.mark.parametrize("endpoint", [True, False]) + def test_basic(self, dtype, num, endpoint): + start = 2 + stop = 5 + base = 2 + + func = lambda xp: xp.logspace( + start, stop, num, endpoint=endpoint, dtype=dtype, base=base + ) + + np_res = func(numpy) + dpnp_res = func(dpnp) + assert_allclose(dpnp_res, np_res, rtol=1e-06) + + @testing.with_requires("numpy>=1.25.0") + @pytest.mark.parametrize("axis", [0, 1]) + def test_axis(self, axis): + func = lambda xp: xp.logspace( + [2, 3], [20, 15], num=2, base=[[1, 3], [5, 7]], axis=axis + ) + assert_dtype_allclose(func(dpnp), func(numpy)) + + def test_list_input(self): + expected = numpy.logspace([0], [2], base=[5]) + result = dpnp.logspace([0], [2], base=[5]) + assert_dtype_allclose(result, expected) + + +class TestSpaceLike: + @pytest.mark.parametrize("func", ["geomspace", "linspace", "logspace"]) + @pytest.mark.parametrize( + "start_dtype", [numpy.float64, numpy.float32, numpy.int64, numpy.int32] + ) + @pytest.mark.parametrize( + "stop_dtype", [numpy.float64, numpy.float32, numpy.int64, numpy.int32] + ) + def test_numpy_dtype(self, func, start_dtype, stop_dtype): + start = numpy.array([1, 2, 3], dtype=start_dtype) + stop = numpy.array([11, 7, -2], dtype=stop_dtype) + getattr(dpnp, func)(start, stop, 10) + + @pytest.mark.parametrize("xp", [dpnp, numpy]) + @pytest.mark.parametrize("func", ["geomspace", "logspace"]) + @pytest.mark.parametrize( + "start, stop, num", + [(2, 5, -3), ([2, 3], 5, -3)], + ) + def test_space_num_error(self, xp, func, start, stop, num): + with pytest.raises(ValueError): + getattr(xp, func)(start, stop, num) + + class TestTrace: @pytest.mark.parametrize("a_sh", [(3, 4), (2, 2, 2)]) @pytest.mark.parametrize( @@ -141,6 +376,16 @@ def test_linalg_trace(self, dtype, offset): expected = numpy.linalg.trace(a, offset=offset, dtype=dtype) assert_equal(result, expected) + @pytest.mark.parametrize("offset", [-1, 0, 1]) + def test_ndarray_offset(self, offset): + ia = dpnp.arange(8, dtype=dpnp.uint8).reshape((2, 2, 2)) + ia = dpnp.ndarray((2, 2), buffer=ia, offset=1) + a = ia.asnumpy() + + result = dpnp.linalg.trace(ia, offset=offset) + expected = numpy.linalg.trace(a, offset=offset) + assert_equal(result, expected) + @pytest.mark.parametrize( "func, args", @@ -734,101 +979,6 @@ def test_dpctl_tensor_input(func, args): assert_array_equal(X, Y) -@pytest.mark.parametrize("start", [0, -5, 10, -2.5, 9.7]) -@pytest.mark.parametrize("stop", [0, 10, -2, 20.5, 120]) -@pytest.mark.parametrize( - "num", - [1, 5, numpy.array(10), dpnp.array(17), dpt.asarray(100)], - ids=["1", "5", "numpy.array(10)", "dpnp.array(17)", "dpt.asarray(100)"], -) -@pytest.mark.parametrize( - "dtype", - get_all_dtypes(no_bool=True, no_float16=False), -) -@pytest.mark.parametrize("retstep", [True, False]) -def test_linspace(start, stop, num, dtype, retstep): - if numpy.issubdtype(dtype, numpy.unsignedinteger): - start = abs(start) - stop = abs(stop) - - res_np = numpy.linspace(start, stop, num, dtype=dtype, retstep=retstep) - res_dp = dpnp.linspace(start, stop, num, dtype=dtype, retstep=retstep) - - if retstep: - [res_np, step_np] = res_np - [res_dp, step_dp] = res_dp - assert_allclose(step_np, step_dp) - - if numpy.issubdtype(dtype, dpnp.integer): - assert_allclose(res_np, res_dp, rtol=1) - else: - assert_dtype_allclose(res_dp, res_np) - - -@pytest.mark.parametrize("func", ["geomspace", "linspace", "logspace"]) -@pytest.mark.parametrize( - "start_dtype", [numpy.float64, numpy.float32, numpy.int64, numpy.int32] -) -@pytest.mark.parametrize( - "stop_dtype", [numpy.float64, numpy.float32, numpy.int64, numpy.int32] -) -def test_space_numpy_dtype(func, start_dtype, stop_dtype): - start = numpy.array([1, 2, 3], dtype=start_dtype) - stop = numpy.array([11, 7, -2], dtype=stop_dtype) - getattr(dpnp, func)(start, stop, 10) - - -@pytest.mark.parametrize( - "start", - [ - dpnp.array(1), - dpnp.array([2.6]), - numpy.array([[-6.7, 3]]), - [1, -4], - (3, 5), - ], -) -@pytest.mark.parametrize( - "stop", - [ - dpnp.array([-4]), - dpnp.array([[2.6], [-4]]), - numpy.array(2), - [[-4.6]], - (3,), - ], -) -def test_linspace_arrays(start, stop): - func = lambda xp: xp.linspace(get_array(xp, start), get_array(xp, stop), 10) - assert func(numpy).shape == func(dpnp).shape - - -def test_linspace_complex(): - func = lambda xp: xp.linspace(0, 3 + 2j, num=1000) - assert_allclose(func(dpnp), func(numpy)) - - -@pytest.mark.parametrize("axis", [0, 1]) -def test_linspace_axis(axis): - func = lambda xp: xp.linspace([2, 3], [20, 15], num=10, axis=axis) - assert_allclose(func(dpnp), func(numpy)) - - -def test_linspace_step_nan(): - func = lambda xp: xp.linspace(1, 2, num=0, endpoint=False) - assert_allclose(func(dpnp), func(numpy)) - - -@pytest.mark.parametrize("start", [1, [1, 1]]) -@pytest.mark.parametrize("stop", [10, [10 + 10]]) -def test_linspace_retstep(start, stop): - func = lambda xp: xp.linspace(start, stop, num=10, retstep=True) - np_res = func(numpy) - dpnp_res = func(dpnp) - assert_allclose(dpnp_res[0], np_res[0]) - assert_allclose(dpnp_res[1], np_res[1]) - - @pytest.mark.parametrize( "arrays", [[], [[1]], [[1, 2, 3], [4, 5, 6]], [[1, 2], [3, 4], [5, 6]]], @@ -853,106 +1003,6 @@ def test_set_shape(shape): assert_array_equal(na, da) -def test_geomspace_zero_error(): - with pytest.raises(ValueError): - dpnp.geomspace(0, 5, 3) - dpnp.geomspace(2, 0, 3) - dpnp.geomspace(0, 0, 3) - - -def test_space_num_error(): - with pytest.raises(ValueError): - dpnp.linspace(2, 5, -3) - dpnp.geomspace(2, 5, -3) - dpnp.logspace(2, 5, -3) - dpnp.linspace([2, 3], 5, -3) - dpnp.geomspace([2, 3], 5, -3) - dpnp.logspace([2, 3], 5, -3) - - -@pytest.mark.parametrize("sign", [-1, 1]) -@pytest.mark.parametrize("dtype", get_all_dtypes()) -@pytest.mark.parametrize("num", [2, 4, 8, 3, 9, 27]) -@pytest.mark.parametrize("endpoint", [True, False]) -def test_geomspace(sign, dtype, num, endpoint): - start = 2 * sign - stop = 127 * sign - - func = lambda xp: xp.geomspace( - start, stop, num, endpoint=endpoint, dtype=dtype - ) - - np_res = func(numpy) - dpnp_res = func(dpnp) - - assert_allclose(dpnp_res, np_res, rtol=1e-06) - - -@pytest.mark.parametrize("start", [1j, 1 + 1j]) -@pytest.mark.parametrize("stop", [10j, 10 + 10j]) -def test_geomspace_complex(start, stop): - func = lambda xp: xp.geomspace(start, stop, num=10) - np_res = func(numpy) - dpnp_res = func(dpnp) - assert_allclose(dpnp_res, np_res, rtol=1e-06) - - -@pytest.mark.parametrize("axis", [0, 1]) -def test_geomspace_axis(axis): - func = lambda xp: xp.geomspace([2, 3], [20, 15], num=10, axis=axis) - np_res = func(numpy) - dpnp_res = func(dpnp) - assert_allclose(dpnp_res, np_res, rtol=1e-06) - - -def test_geomspace_num0(): - func = lambda xp: xp.geomspace(1, 10, num=0, endpoint=False) - np_res = func(numpy) - dpnp_res = func(dpnp) - assert_allclose(dpnp_res, np_res) - - -@pytest.mark.parametrize("dtype", get_all_dtypes()) -@pytest.mark.parametrize("num", [2, 4, 8, 3, 9, 27]) -@pytest.mark.parametrize("endpoint", [True, False]) -def test_logspace(dtype, num, endpoint): - if not is_win_platform() and is_tgllp_iris_xe() and is_lts_driver(): - if ( - dpnp.issubdtype(dtype, dpnp.integer) - and num in [8, 27] - and endpoint is True - ): - pytest.skip("SAT-7978") - - start = 2 - stop = 5 - base = 2 - - func = lambda xp: xp.logspace( - start, stop, num, endpoint=endpoint, dtype=dtype, base=base - ) - - np_res = func(numpy) - dpnp_res = func(dpnp) - - assert_allclose(dpnp_res, np_res, rtol=1e-06) - - -@testing.with_requires("numpy>=1.25.0") -@pytest.mark.parametrize("axis", [0, 1]) -def test_logspace_axis(axis): - func = lambda xp: xp.logspace( - [2, 3], [20, 15], num=2, base=[[1, 3], [5, 7]], axis=axis - ) - assert_dtype_allclose(func(dpnp), func(numpy)) - - -def test_logspace_list_input(): - expected = numpy.logspace([0], [2], base=[5]) - result = dpnp.logspace([0], [2], base=[5]) - assert_dtype_allclose(result, expected) - - @pytest.mark.parametrize( "data", [(), 1, (2, 3), [4], numpy.array(5), numpy.array([6, 7])] ) diff --git a/dpnp/tests/test_arraypad.py b/dpnp/tests/test_arraypad.py index 9a88dd8bab96..aee93703895b 100644 --- a/dpnp/tests/test_arraypad.py +++ b/dpnp/tests/test_arraypad.py @@ -73,11 +73,8 @@ def test_non_contiguous_array(self, mode): else: assert_array_equal(result, expected) - # TODO: include "linear_ramp" when dpnp issue gh-2084 is resolved @pytest.mark.parametrize("pad_width", [0, (0, 0), ((0, 0), (0, 0))]) - @pytest.mark.parametrize( - "mode", [m for m in _modes if m not in {"linear_ramp"}] - ) + @pytest.mark.parametrize("mode", _modes) def test_zero_pad_width(self, pad_width, mode): arr = dpnp.arange(30).reshape(6, 5) assert_array_equal(arr, dpnp.pad(arr, pad_width, mode=mode)) diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py index 77c65991e9cd..d443b71adff8 100644 --- a/dpnp/tests/test_mathematical.py +++ b/dpnp/tests/test_mathematical.py @@ -20,6 +20,7 @@ from dpnp.dpnp_utils import map_dtype_to_device from .helper import ( + LTS_VERSION, assert_dtype_allclose, generate_random_numpy_array, get_abs_array, @@ -33,6 +34,7 @@ has_support_aspect16, has_support_aspect64, is_intel_numpy, + is_lts_driver, numpy_version, ) from .third_party.cupy import testing @@ -217,6 +219,9 @@ def _get_exp_array(self, a, axis, dtype): @pytest.mark.parametrize("axis", [None, 2, -1]) @pytest.mark.parametrize("include_initial", [True, False]) def test_basic(self, dtype, axis, include_initial): + if axis is None and not is_lts_driver(version=LTS_VERSION.V1_6): + pytest.skip("due to SAT-8336") + a = dpnp.ones((3, 4, 5, 6, 7), dtype=dtype) res = dpnp.cumlogsumexp(a, axis=axis, include_initial=include_initial) @@ -234,6 +239,9 @@ def test_basic(self, dtype, axis, include_initial): @pytest.mark.parametrize("axis", [None, 2, -1]) @pytest.mark.parametrize("include_initial", [True, False]) def test_include_initial(self, dtype, axis, include_initial): + if axis is None and not is_lts_driver(version=LTS_VERSION.V1_6): + pytest.skip("due to SAT-8336") + a = dpnp.ones((3, 4, 5, 6, 7), dtype=dtype) if dpnp.issubdtype(a, dpnp.float32): @@ -2021,7 +2029,18 @@ def test_out_dtype(self, func): @pytest.mark.parametrize("xp", [numpy, dpnp]) @pytest.mark.parametrize( - "func", ["abs", "fix", "round", "add", "frexp", "divmod"] + "func", + [ + "abs", + pytest.param( + "fix", + marks=pytest.mark.filterwarnings("ignore::DeprecationWarning"), + ), + "round", + "add", + "frexp", + "divmod", + ], ) def test_out_wrong_tuple_len(self, xp, func): if func == "round" and xp is numpy: @@ -2536,7 +2555,18 @@ def test_projection(self, dtype): assert dpnp.allclose(result, expected) -@pytest.mark.parametrize("func", ["ceil", "floor", "trunc", "fix"]) +@pytest.mark.parametrize( + "func", + [ + "ceil", + "floor", + "trunc", + pytest.param( + "fix", + marks=pytest.mark.filterwarnings("ignore::DeprecationWarning"), + ), + ], +) class TestRoundingFuncs: @testing.with_requires("numpy>=2.1.0") @pytest.mark.parametrize( diff --git a/dpnp/tests/test_memory.py b/dpnp/tests/test_memory.py index ce9c7e60f030..1bc0da8c1535 100644 --- a/dpnp/tests/test_memory.py +++ b/dpnp/tests/test_memory.py @@ -31,3 +31,14 @@ def test_ndarray_from_data(self): a = dpnp.empty(5) b = dpnp.ndarray(a.shape, buffer=a.data) assert b.data.ptr == a.data.ptr + + def test_view_non_zero_offset(self): + n, m = 2, 8 + plane = n * m + + a = dpnp.empty(4 * plane) + sl = a[plane:] # non-zero offset view + + pl = dpnp.ndarray((n, m), dtype=a.dtype, buffer=sl) + assert pl.data.ptr == sl.data.ptr + assert a.data.ptr != sl.data.ptr diff --git a/dpnp/tests/test_product.py b/dpnp/tests/test_product.py index 763049e8791e..afe767a5e5d9 100644 --- a/dpnp/tests/test_product.py +++ b/dpnp/tests/test_product.py @@ -1842,6 +1842,13 @@ def test_error(self): with pytest.raises(ValueError): dpnp.tensordot(dpnp.arange(4), dpnp.array(5), axes=-1) + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_repeated_axes(self, xp): + a = xp.ones((2, 3, 3)) + b = xp.ones((3, 3, 4)) + with pytest.raises(ValueError): + xp.tensordot(a, b, axes=([1, 1], [0, 0])) + class TestVdot: @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index 0bd4d6b53337..d1853579036a 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -133,10 +133,8 @@ def test_array_creation_from_array(func, args, device_x, device_y): assert_sycl_queue_equal(y.sycl_queue, x.sycl_queue) # cross device - # TODO: include geomspace when issue dpnp#2352 is resolved - if func != "geomspace": - y = getattr(dpnp, func)(*args, device=device_y) - assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue) + y = getattr(dpnp, func)(*args, device=device_y) + assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue) @pytest.mark.parametrize("device_x", valid_dev, ids=dev_ids) @@ -148,10 +146,9 @@ def test_array_creation_logspace_base(device_x, device_y): y = dpnp.logspace(0, 8, 4, base=x[1:3]) assert_sycl_queue_equal(y.sycl_queue, x.sycl_queue) - # TODO: include geomspace when issue dpnp#2353 is resolved # cross device - # y = dpnp.logspace(0, 8, 4, base=x[1:3], device=device_y) - # assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue) + y = dpnp.logspace(0, 8, 4, base=x[1:3], device=device_y) + assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue) @pytest.mark.parametrize("device", valid_dev + [None], ids=dev_ids + [None]) @@ -264,7 +261,11 @@ def test_meshgrid(device): pytest.param("exp2", [0.0, 1.0, 2.0]), pytest.param("expm1", [1.0e-10, 1.0, 2.0, 4.0, 7.0]), pytest.param("fabs", [-1.2, 1.2]), - pytest.param("fix", [2.1, 2.9, -2.1, -2.9]), + pytest.param( + "fix", + [2.1, 2.9, -2.1, -2.9], + marks=pytest.mark.filterwarnings("ignore::DeprecationWarning"), + ), pytest.param("flatnonzero", [-2, -1, 0, 1, 2]), pytest.param("floor", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]), pytest.param("gradient", [1.0, 2.0, 4.0, 7.0, 11.0, 16.0]), @@ -1081,7 +1082,6 @@ def test_array_creation_from_dpctl(copy, device): assert isinstance(result, dpnp_array) -@pytest.mark.skip("due to dpctl-2213") @pytest.mark.parametrize("device", valid_dev, ids=dev_ids) @pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True)) @pytest.mark.parametrize("shape", [tuple(), (2,), (3, 0, 1), (2, 2, 2)]) diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py index fa4fc62e34c9..4fc0f2b958fa 100644 --- a/dpnp/tests/test_usm_type.py +++ b/dpnp/tests/test_usm_type.py @@ -572,7 +572,11 @@ def test_meshgrid(usm_type_x, usm_type_y): pytest.param("exp2", [0.0, 1.0, 2.0]), pytest.param("expm1", [1.0e-10, 1.0, 2.0, 4.0, 7.0]), pytest.param("fabs", [-1.2, 1.2]), - pytest.param("fix", [2.1, 2.9, -2.1, -2.9]), + pytest.param( + "fix", + [2.1, 2.9, -2.1, -2.9], + marks=pytest.mark.filterwarnings("ignore::DeprecationWarning"), + ), pytest.param("flatnonzero", [-2, -1, 0, 1, 2]), pytest.param("floor", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]), pytest.param("gradient", [1, 2, 4, 7, 11, 16]), diff --git a/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py b/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py index 431f1f27d8c7..95ec1d4374c8 100644 --- a/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py +++ b/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py @@ -1,11 +1,9 @@ from __future__ import annotations -import unittest - from dpnp.tests.third_party.cupy import testing -class TestElementwise(unittest.TestCase): +class TestElementwise: @testing.for_int_dtypes() @testing.numpy_cupy_array_equal() diff --git a/dpnp/tests/third_party/cupy/binary_tests/test_packing.py b/dpnp/tests/third_party/cupy/binary_tests/test_packing.py index a72a8a558b08..518e74d98868 100644 --- a/dpnp/tests/third_party/cupy/binary_tests/test_packing.py +++ b/dpnp/tests/third_party/cupy/binary_tests/test_packing.py @@ -1,4 +1,4 @@ -import unittest +from __future__ import annotations import numpy import pytest @@ -11,7 +11,7 @@ ) -class TestPacking(unittest.TestCase): +class TestPacking: @testing.for_int_dtypes() @testing.numpy_cupy_array_equal() diff --git a/dpnp/tests/third_party/cupy/core_tests/test_array_function.py b/dpnp/tests/third_party/cupy/core_tests/test_array_function.py index 7878a5a0aaff..f0c45900bcda 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_array_function.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_array_function.py @@ -1,4 +1,4 @@ -import unittest +from __future__ import annotations import numpy import pytest @@ -11,7 +11,7 @@ ) -class TestArrayFunction(unittest.TestCase): +class TestArrayFunction: @testing.with_requires("numpy>=1.17.0") def test_array_function(self): diff --git a/dpnp/tests/third_party/cupy/core_tests/test_core.py b/dpnp/tests/third_party/cupy/core_tests/test_core.py index c959b4f24954..d9f8196db562 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_core.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_core.py @@ -1,20 +1,23 @@ from __future__ import annotations import sys -import unittest import numpy import pytest import dpnp as cupy + +# from cupy._core import core from dpnp.tests.third_party.cupy import testing from dpnp.tests.third_party.cupy.testing._protocol_helpers import ( DummyObjectWithCudaArrayInterface, DummyObjectWithCuPyGetNDArray, ) +# from cupy_tests.core_tests import test_raw + -class TestSize(unittest.TestCase): +class TestSize: # def tearDown(self): # # Free huge memory for slow test @@ -58,7 +61,7 @@ def test_size_huge(self, xp): @pytest.mark.skip("no cupy._core submodule") -class TestOrder(unittest.TestCase): +class TestOrder: @testing.for_orders(_orders.keys()) def test_ndarray(self, order): diff --git a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py index 2df8cbfc5f7c..41df0a82e0a0 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py @@ -82,7 +82,6 @@ def _get_stream(self, stream_name): return dpctl.SyclQueue() return dpctl.SyclQueue() - @pytest.mark.skip("due to dpctl-2213") @testing.for_all_dtypes(no_bool=False) def test_conversion(self, dtype): orig_array = _gen_array(dtype) @@ -102,7 +101,6 @@ def test_from_dlpack_and_conv_errors(self): # for host copies. cupy.from_dlpack(orig_array, copy=True) - @pytest.mark.skip("due to dpctl-2213") @pytest.mark.parametrize( "kwargs, versioned", [ @@ -130,7 +128,6 @@ def test_conversion_max_version(self, kwargs, versioned): testing.assert_array_equal(orig_array, out_array) testing.assert_array_equal(orig_array.data.ptr, out_array.data.ptr) - @pytest.mark.skip("due to dpctl-2213") def test_conversion_device(self): orig_array = _gen_array("float32") @@ -202,7 +199,6 @@ def test_conversion_device_to_cpu(self): ) assert numpy.may_share_memory(arr_nocopy, arr1) - @pytest.mark.skip("due to dpctl-2213") def test_stream(self): allowed_streams = ["null", True] # if not cuda.runtime.is_hip: diff --git a/dpnp/tests/third_party/cupy/core_tests/test_flags.py b/dpnp/tests/third_party/cupy/core_tests/test_flags.py index b2cc4fca59fb..3f416304a789 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_flags.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_flags.py @@ -29,9 +29,7 @@ def test_key_error(self): def test_repr(self): assert """ C_CONTIGUOUS : 1 F_CONTIGUOUS : 2 - OWNDATA : 3""" == repr( - self.flags - ) + OWNDATA : 3""" == repr(self.flags) @testing.parameterize( diff --git a/dpnp/tests/third_party/cupy/core_tests/test_function.py b/dpnp/tests/third_party/cupy/core_tests/test_function.py index 2943fc2bcf02..5480cdf6e126 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_function.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_function.py @@ -154,9 +154,7 @@ def test_custom_user_struct(self): offsets[3] = (unsigned long long)&ptr->d; offsets[4] = (unsigned long long)&ptr->e; }} -""".format( - struct_definition=struct_definition - ) +""".format(struct_definition=struct_definition) itemsize = cupy.ndarray(shape=(1,), dtype=numpy.uint64) sizes = cupy.ndarray(shape=(5,), dtype=numpy.uint64) @@ -213,9 +211,7 @@ def make_packed(basetype, N, itemsize): sum += s.e[0] + s.e[1] + s.e[2]; x[i] = a[i] + sum; }} -""".format( - struct_definition=struct_definition - ) +""".format(struct_definition=struct_definition) a_cpu = numpy.arange(24, dtype=numpy.float64) a = cupy.array(a_cpu) diff --git a/dpnp/tests/third_party/cupy/core_tests/test_include.py b/dpnp/tests/third_party/cupy/core_tests/test_include.py index 9c6372cf2bcd..a45d2b40cbf4 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_include.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_include.py @@ -18,25 +18,19 @@ #include """ -_code_nvcc = ( - _code_base - + """ +_code_nvcc = _code_base + """ #include int main() { return 0; } """ -) -_code_nvrtc = ( - _code_base - + """ +_code_nvrtc = _code_base + """ __device__ void kernel() { } """ -) @pytest.mark.skipif(cupy.cuda.runtime.is_hip, reason="for CUDA") diff --git a/dpnp/tests/third_party/cupy/core_tests/test_internal.py b/dpnp/tests/third_party/cupy/core_tests/test_internal.py index 205661e80d75..2f4239a2e220 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_internal.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_internal.py @@ -1,18 +1,16 @@ -import math -import unittest +from __future__ import annotations import numpy import pytest # from cupy._core import internal -from dpnp.tests.third_party.cupy import testing pytest.skip( "CuPy internal functions are not supported", allow_module_level=True ) -class TestProd(unittest.TestCase): +class TestProd: def test_empty(self): assert internal.prod([]) == 1 @@ -24,7 +22,7 @@ def test_two(self): assert internal.prod([2, 3]) == 6 -class TestProdSequence(unittest.TestCase): +class TestProdSequence: def test_empty(self): assert internal.prod_sequence(()) == 1 @@ -74,7 +72,7 @@ def test_float(self): assert internal.get_size(1.0) == (1.0,) -class TestVectorEqual(unittest.TestCase): +class TestVectorEqual: def test_empty(self): assert internal.vector_equal([], []) is True @@ -89,7 +87,7 @@ def test_different_size(self): assert internal.vector_equal([1, 2, 3], [1, 2]) is False -class TestGetCContiguity(unittest.TestCase): +class TestGetCContiguity: def test_zero_in_shape(self): assert internal.get_c_contiguity((1, 0, 1), (1, 1, 1), 3) @@ -122,134 +120,101 @@ def test_no_contiguous3(self): assert not internal.get_c_contiguity((3, 1, 3), (6, 6, 4), 2) -class TestInferUnknownDimension(unittest.TestCase): +class TestInferUnknownDimension: def test_known_all(self): assert internal.infer_unknown_dimension((1, 2, 3), 6) == [1, 2, 3] def test_multiple_unknown(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): internal.infer_unknown_dimension((-1, 1, -1), 10) def test_infer(self): assert internal.infer_unknown_dimension((-1, 2, 3), 12) == [2, 2, 3] -@testing.parameterize( - {"slice": (2, 8, 1), "expect": (2, 8, 1)}, - {"slice": (2, None, 1), "expect": (2, 10, 1)}, - {"slice": (2, 1, 1), "expect": (2, 2, 1)}, - {"slice": (2, -1, 1), "expect": (2, 9, 1)}, - {"slice": (None, 8, 1), "expect": (0, 8, 1)}, - {"slice": (-3, 8, 1), "expect": (7, 8, 1)}, - {"slice": (11, 8, 1), "expect": (10, 10, 1)}, - {"slice": (11, 11, 1), "expect": (10, 10, 1)}, - {"slice": (-11, 8, 1), "expect": (0, 8, 1)}, - {"slice": (-11, -11, 1), "expect": (0, 0, 1)}, - {"slice": (8, 2, -1), "expect": (8, 2, -1)}, - {"slice": (8, None, -1), "expect": (8, -1, -1)}, - {"slice": (8, 9, -1), "expect": (8, 8, -1)}, - {"slice": (8, -3, -1), "expect": (8, 7, -1)}, - {"slice": (None, 8, -1), "expect": (9, 8, -1)}, - {"slice": (-3, 6, -1), "expect": (7, 6, -1)}, - {"slice": (10, 10, -1), "expect": (9, 9, -1)}, - {"slice": (10, 8, -1), "expect": (9, 8, -1)}, - {"slice": (9, 10, -1), "expect": (9, 9, -1)}, - {"slice": (9, 9, -1), "expect": (9, 9, -1)}, - {"slice": (9, 8, -1), "expect": (9, 8, -1)}, - {"slice": (8, 8, -1), "expect": (8, 8, -1)}, - {"slice": (-9, -8, -1), "expect": (1, 1, -1)}, - {"slice": (-9, -9, -1), "expect": (1, 1, -1)}, - {"slice": (-9, -10, -1), "expect": (1, 0, -1)}, - {"slice": (-9, -11, -1), "expect": (1, -1, -1)}, - {"slice": (-9, -12, -1), "expect": (1, -1, -1)}, - {"slice": (-10, -9, -1), "expect": (0, 0, -1)}, - {"slice": (-10, -10, -1), "expect": (0, 0, -1)}, - {"slice": (-10, -11, -1), "expect": (0, -1, -1)}, - {"slice": (-10, -12, -1), "expect": (0, -1, -1)}, - {"slice": (-11, 8, -1), "expect": (-1, -1, -1)}, - {"slice": (-11, -9, -1), "expect": (-1, -1, -1)}, - {"slice": (-11, -10, -1), "expect": (-1, -1, -1)}, - {"slice": (-11, -11, -1), "expect": (-1, -1, -1)}, - {"slice": (-11, -12, -1), "expect": (-1, -1, -1)}, +@pytest.mark.parametrize( + ("slice_", "expect"), + [ + ((2, 8, 1), (2, 8, 1)), + ((2, None, 1), (2, 10, 1)), + ((2, 1, 1), (2, 2, 1)), + ((2, -1, 1), (2, 9, 1)), + ((None, 8, 1), (0, 8, 1)), + ((-3, 8, 1), (7, 8, 1)), + ((11, 8, 1), (10, 10, 1)), + ((11, 11, 1), (10, 10, 1)), + ((-11, 8, 1), (0, 8, 1)), + ((-11, -11, 1), (0, 0, 1)), + ((8, 2, -1), (8, 2, -1)), + ((8, None, -1), (8, -1, -1)), + ((8, 9, -1), (8, 8, -1)), + ((8, -3, -1), (8, 7, -1)), + ((None, 8, -1), (9, 8, -1)), + ((-3, 6, -1), (7, 6, -1)), + ((10, 10, -1), (9, 9, -1)), + ((10, 8, -1), (9, 8, -1)), + ((9, 10, -1), (9, 9, -1)), + ((9, 9, -1), (9, 9, -1)), + ((9, 8, -1), (9, 8, -1)), + ((8, 8, -1), (8, 8, -1)), + ((-9, -8, -1), (1, 1, -1)), + ((-9, -9, -1), (1, 1, -1)), + ((-9, -10, -1), (1, 0, -1)), + ((-9, -11, -1), (1, -1, -1)), + ((-9, -12, -1), (1, -1, -1)), + ((-10, -9, -1), (0, 0, -1)), + ((-10, -10, -1), (0, 0, -1)), + ((-10, -11, -1), (0, -1, -1)), + ((-10, -12, -1), (0, -1, -1)), + ((-11, 8, -1), (-1, -1, -1)), + ((-11, -9, -1), (-1, -1, -1)), + ((-11, -10, -1), (-1, -1, -1)), + ((-11, -11, -1), (-1, -1, -1)), + ((-11, -12, -1), (-1, -1, -1)), + ], ) -class TestCompleteSlice(unittest.TestCase): +def test_complete_slice(slice_, expect): + assert internal.complete_slice(slice(*slice_), 10) == slice(*expect) - def test_complete_slice(self): - assert internal.complete_slice(slice(*self.slice), 10) == slice( - *self.expect - ) - -class TestCompleteSliceError(unittest.TestCase): +class TestCompleteSliceError: def test_invalid_step_value(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): internal.complete_slice(slice(1, 1, 0), 1) def test_invalid_step_type(self): - with self.assertRaises(TypeError): + with pytest.raises(TypeError): internal.complete_slice(slice(1, 1, (1, 2)), 1) def test_invalid_start_type(self): - with self.assertRaises(TypeError): + with pytest.raises(TypeError): internal.complete_slice(slice((1, 2), 1, 1), 1) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): internal.complete_slice(slice((1, 2), 1, -1), 1) def test_invalid_stop_type(self): - with self.assertRaises(TypeError): + with pytest.raises(TypeError): internal.complete_slice(slice((1, 2), 1, 1), 1) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): internal.complete_slice(slice((1, 2), 1, -1), 1) -@testing.parameterize( - {"x": 0, "expect": 0}, - {"x": 1, "expect": 1}, - {"x": 2, "expect": 2}, - {"x": 3, "expect": 4}, - {"x": 2**10, "expect": 2**10}, - {"x": 2**10 - 1, "expect": 2**10}, - {"x": 2**10 + 1, "expect": 2**11}, - {"x": 2**40, "expect": 2**40}, - {"x": 2**40 - 1, "expect": 2**40}, - {"x": 2**40 + 1, "expect": 2**41}, +@pytest.mark.parametrize( + ("x", "expect"), + [ + (0, 0), + (1, 1), + (2, 2), + (3, 4), + (2**10, 2**10), + (2**10 - 1, 2**10), + (2**10 + 1, 2**11), + (2**40, 2**40), + (2**40 - 1, 2**40), + (2**40 + 1, 2**41), + ], ) -class TestClp2(unittest.TestCase): - - def test_clp2(self): - assert internal.clp2(self.x) == self.expect - - -@testing.parameterize( - *testing.product( - { - "value": [ - 0.0, - 1.0, - -1.0, - 0.25, - -0.25, - 11.0, - -11.0, - 2**-15, - -(2**-15), # Denormalized Number - float("inf"), - float("-inf"), - ], - } - ) -) -class TestConvertFloat16(unittest.TestCase): - - def test_conversion(self): - half = internal.to_float16(self.value) - assert internal.from_float16(half) == self.value - - -class TestConvertFloat16Nan(unittest.TestCase): - - def test_conversion(self): - half = internal.to_float16(float("nan")) - assert math.isnan(internal.from_float16(half)) +def test_clp2(x, expect): + assert internal.clp2(x) == expect diff --git a/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py b/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py index 3790eae96462..69873473e0d7 100644 --- a/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py +++ b/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import functools import math import unittest @@ -226,8 +228,8 @@ def test_linspace_mixed_start_stop2(self, xp, dtype_range, dtype_out): # TODO (ev-br): np 2.0: had to bump the default rtol on Windows # and numpy 1.26+weak promotion from 0 to 5e-6 if xp.dtype(dtype_range).kind == "u": - # to avoid overflow, limit `val` to be smaller - # than xp.iinfo(dtype).max + # to avoid overflow, limit `val` to be smaller than + # xp.iinfo(dtype).max (TODO: check if dpctl-2230 resolves that) if dtype_range in [xp.uint8, xp.uint16] or dtype_out in [ xp.int8, xp.uint8, diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_cache.py b/dpnp/tests/third_party/cupy/fft_tests/test_cache.py index e32f8e8305a0..29e0a7724edf 100644 --- a/dpnp/tests/third_party/cupy/fft_tests/test_cache.py +++ b/dpnp/tests/third_party/cupy/fft_tests/test_cache.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import contextlib import io import queue @@ -14,7 +16,7 @@ # from cupy.cuda import runtime # from cupy.fft import config -# from .test_fft import (multi_gpu_config, _skip_multi_gpu_bug) +# from .test_fft import multi_gpu_config pytest.skip("FFT cache functions are not supported", allow_module_level=True) @@ -30,26 +32,29 @@ def intercept_stdout(func): class TestPlanCache(unittest.TestCase): - def setUp(self): - self.caches = [] - self.old_sizes = [] + @contextlib.contextmanager + @staticmethod + def prepare_and_restore_caches(): + old_sizes = [] for i in range(n_devices): with device.Device(i): cache = config.get_plan_cache() - self.old_sizes.append(cache.get_size()) + old_sizes.append(cache.get_size()) cache.clear() cache.set_memsize(-1) cache.set_size(2) - self.caches.append(cache) - def tearDown(self): - for i in range(n_devices): - with device.Device(i): - cache = config.get_plan_cache() - cache.clear() - cache.set_size(self.old_sizes[i]) - cache.set_memsize(-1) + try: + yield + finally: + for i in range(n_devices): + with device.Device(i): + cache = config.get_plan_cache() + cache.clear() + cache.set_size(old_sizes[i]) + cache.set_memsize(-1) + @prepare_and_restore_caches() def test_LRU_cache1(self): # test if insertion and clean-up works cache = config.get_plan_cache() @@ -62,6 +67,7 @@ def test_LRU_cache1(self): cache.clear() assert cache.get_curr_size() == 0 <= cache.get_size() + @prepare_and_restore_caches() def test_LRU_cache2(self): # test if plan is reused cache = config.get_plan_cache() @@ -83,6 +89,7 @@ def test_LRU_cache2(self): # we should get the same plan assert plan0 is plan1 + @prepare_and_restore_caches() def test_LRU_cache3(self): # test if cache size is limited cache = config.get_plan_cache() @@ -108,6 +115,7 @@ def test_LRU_cache3(self): for _, node in cache: assert plan is not node.plan + @prepare_and_restore_caches() def test_LRU_cache4(self): # test if fetching the plan will reorder it to the top cache = config.get_plan_cache() @@ -149,6 +157,8 @@ def test_LRU_cache4(self): cache[next(iterator)[0]] @testing.multi_gpu(2) + @prepare_and_restore_caches() + @pytest.mark.thread_unsafe(reason="intercepts stdout") def test_LRU_cache5(self): # test if the LRU cache is thread-local @@ -210,10 +220,13 @@ def thread_init_caches(gpus, queue): assert stdout.count("uninitialized") == n_devices - 2 @testing.multi_gpu(2) - def test_LRU_cache6(self): + @prepare_and_restore_caches() + def test_LRU_cache6(self, gpus=None): # test if each device has a separate cache - cache0 = self.caches[0] - cache1 = self.caches[1] + with device.Device(0): + cache0 = config.get_plan_cache() + with device.Device(1): + cache1 = config.get_plan_cache() # ensure a fresh state assert cache0.get_curr_size() == 0 <= cache0.get_size() @@ -247,10 +260,13 @@ def test_LRU_cache6(self): @pytest.mark.skipif( runtime.is_hip, reason="hipFFT doesn't support multi-GPU" ) - def test_LRU_cache7(self): + @prepare_and_restore_caches() + def test_LRU_cache7(self, gpus=None): # test accessing a multi-GPU plan - cache0 = self.caches[0] - cache1 = self.caches[1] + with device.Device(0): + cache0 = config.get_plan_cache() + with device.Device(1): + cache1 = config.get_plan_cache() # ensure a fresh state assert cache0.get_curr_size() == 0 <= cache0.get_size() @@ -319,6 +335,7 @@ def test_LRU_cache7(self): assert cache0.get_curr_size() == 1 <= cache0.get_size() assert cache1.get_curr_size() == 2 <= cache1.get_size() + @prepare_and_restore_caches() def test_LRU_cache8(self): # test if Plan1d and PlanNd can coexist in the same cache cache = config.get_plan_cache() @@ -340,6 +357,7 @@ def test_LRU_cache8(self): assert isinstance(next(iterator)[1].plan, cufft.PlanNd) assert isinstance(next(iterator)[1].plan, cufft.Plan1d) + @prepare_and_restore_caches() def test_LRU_cache9(self): # test if memsizes in the cache adds up cache = config.get_plan_cache() @@ -358,6 +376,8 @@ def test_LRU_cache9(self): assert memsize == cache.get_curr_memsize() + @prepare_and_restore_caches() + @pytest.mark.thread_unsafe(reason="intercepts stdout") def test_LRU_cache10(self): # test if deletion works and if show_info() is consistent with data cache = config.get_plan_cache() @@ -406,11 +426,13 @@ def test_LRU_cache10(self): @pytest.mark.skipif( runtime.is_hip, reason="hipFFT doesn't support multi-GPU" ) + @prepare_and_restore_caches() def test_LRU_cache11(self): # test if collectively deleting a multi-GPU plan works - _skip_multi_gpu_bug((128,), self.gpus) - cache0 = self.caches[0] - cache1 = self.caches[1] + with device.Device(0): + cache0 = config.get_plan_cache() + with device.Device(1): + cache1 = config.get_plan_cache() # ensure a fresh state assert cache0.get_curr_size() == 0 <= cache0.get_size() @@ -441,11 +463,14 @@ def test_LRU_cache11(self): @pytest.mark.skipif( runtime.is_hip, reason="hipFFT doesn't support multi-GPU" ) + @prepare_and_restore_caches() def test_LRU_cache12(self): # test if an error is raise when one of the caches is unable # to fit it a multi-GPU plan - cache0 = self.caches[0] - cache1 = self.caches[1] + with device.Device(0): + cache0 = config.get_plan_cache() + with device.Device(1): + cache1 = config.get_plan_cache() # ensure a fresh state assert cache0.get_curr_size() == 0 <= cache0.get_size() @@ -467,6 +492,7 @@ def test_LRU_cache12(self): runtime.runtimeGetVersion() >= 11080, "CUDA 11.8 has different plan size", ) + @prepare_and_restore_caches() def test_LRU_cache13(self): # test if plan insertion respect the memory size limit cache = config.get_plan_cache() diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_callback.py b/dpnp/tests/third_party/cupy/fft_tests/test_callback.py index 12bfd4aa73df..e2682d0aaa6e 100644 --- a/dpnp/tests/third_party/cupy/fft_tests/test_callback.py +++ b/dpnp/tests/third_party/cupy/fft_tests/test_callback.py @@ -1,18 +1,62 @@ +from __future__ import annotations + import contextlib +import os import string import sys import tempfile from unittest import mock +try: + import Cython +except ImportError: + Cython = None +else: + if Cython.__version__ < "0.29.0": + Cython = None import numpy as np import pytest import dpnp as cupy from dpnp.tests.third_party.cupy import testing +# from cupy.cuda import cufft +# from cupy.cuda.device import get_compute_capability + pytest.skip("FFT callbacks are not supported", allow_module_level=True) +def cuda_version(): + return cupy.cuda.runtime.runtimeGetVersion() + + +cb_ver_for_test = ("legacy", "jit") + + +def check_should_skip_legacy_test(): + if not sys.platform.startswith("linux"): + pytest.skip("legacy callbacks are only supported on Linux") + if Cython is None: + pytest.skip("no working Cython") + if "LD_PRELOAD" in os.environ: + pytest.skip( + "legacy callback does not work if libcufft.so " "is preloaded" + ) + if cufft.getVersion() >= 12000 and get_compute_capability() == "75": + pytest.skip( + "cuFFT legacy callbacks in CUDA 13.0+ do not support " "cc 7.5" + ) + if cufft.getVersion() == 11303 and get_compute_capability() == "120": + pytest.skip( + "cuFFT legacy callbacks in CUDA 12.8.0 do not support " "cc 12.0" + ) + + +def check_should_skip_jit_test(): + if cufft.getVersion() < 11303: + pytest.skip("JIT callbacks require cuFFT from CUDA 12.8+") + + @contextlib.contextmanager def use_temporary_cache_dir(): target = "cupy.fft._callback.get_cache_dir" @@ -21,45 +65,50 @@ def use_temporary_cache_dir(): yield path +suppress_legacy_warning = pytest.mark.filterwarnings( + "ignore:.*legacy callback.*:DeprecationWarning" +) + + _load_callback = r""" -__device__ ${data_type} CB_ConvertInput( - void* dataIn, size_t offset, void* callerInfo, void* sharedPtr) +__device__ ${data_type} ${cb_name}( + void* dataIn, ${offset_type} offset, void* callerInfo, void* sharedPtr) { ${data_type} x = ((${data_type}*)dataIn)[offset]; ${element} *= 2.5; return x; } -__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput; +__device__ ${load_type} d_loadCallbackPtr = ${cb_name}; """ _load_callback_with_aux = r""" -__device__ ${data_type} CB_ConvertInput( - void* dataIn, size_t offset, void* callerInfo, void* sharedPtr) +__device__ ${data_type} ${cb_name}( + void* dataIn, ${offset_type} offset, void* callerInfo, void* sharedPtr) { ${data_type} x = ((${data_type}*)dataIn)[offset]; ${element} *= *((${aux_type}*)callerInfo); return x; } -__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput; +__device__ ${load_type} d_loadCallbackPtr = ${cb_name}; """ _load_callback_with_aux2 = r""" -__device__ ${data_type} CB_ConvertInput( - void* dataIn, size_t offset, void* callerInfo, void* sharedPtr) +__device__ ${data_type} ${cb_name}( + void* dataIn, ${offset_type} offset, void* callerInfo, void* sharedPtr) { ${data_type} x = ((${data_type}*)dataIn)[offset]; ${element} *= ((${aux_type}*)callerInfo)[offset]; return x; } -__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput; +__device__ ${load_type} d_loadCallbackPtr = ${cb_name}; """ _store_callback = r""" -__device__ void CB_ConvertOutput( - void *dataOut, size_t offset, ${data_type} element, +__device__ void ${cb_name}( + void *dataOut, ${offset_type} offset, ${data_type} element, void *callerInfo, void *sharedPointer) { ${data_type} x = element; @@ -67,12 +116,12 @@ def use_temporary_cache_dir(): ((${data_type}*)dataOut)[offset] = x; } -__device__ ${store_type} d_storeCallbackPtr = CB_ConvertOutput; +__device__ ${store_type} d_storeCallbackPtr = ${cb_name}; """ _store_callback_with_aux = r""" -__device__ void CB_ConvertOutput( - void *dataOut, size_t offset, ${data_type} element, +__device__ void ${cb_name}( + void *dataOut, ${offset_type} offset, ${data_type} element, void *callerInfo, void *sharedPointer) { ${data_type} x = element; @@ -80,26 +129,209 @@ def use_temporary_cache_dir(): ((${data_type}*)dataOut)[offset] = x; } -__device__ ${store_type} d_storeCallbackPtr = CB_ConvertOutput; +__device__ ${store_type} d_storeCallbackPtr = ${cb_name}; """ -def _set_load_cb(code, element, data_type, callback_type, aux_type=None): - return string.Template(code).substitute( +def _set_load_cb( + code, + element, + data_type, + callback_type, + callback_name, + aux_type=None, + cb_ver="", +): + if cb_ver == "jit": + callback_type = callback_type.replace( + "cufftCallback", "cufftJITCallback" + ) + callback = string.Template(code).substitute( data_type=data_type, aux_type=aux_type, load_type=callback_type, + cb_name=callback_name, element=element, + offset_type=("size_t" if cb_ver == "legacy" else "unsigned long long"), ) - - -def _set_store_cb(code, element, data_type, callback_type, aux_type=None): - return string.Template(code).substitute( + if cb_ver == "jit": + callback = "#include \n\n" + callback + return callback + + +def _set_store_cb( + code, + element, + data_type, + callback_type, + callback_name, + aux_type=None, + cb_ver="", +): + if cb_ver == "jit": + callback_type = callback_type.replace( + "cufftCallback", "cufftJITCallback" + ) + callback = string.Template(code).substitute( data_type=data_type, aux_type=aux_type, store_type=callback_type, + cb_name=callback_name, element=element, + offset_type=("size_t" if cb_ver == "legacy" else "unsigned long long"), ) + if cb_ver == "jit": + callback = "#include \n\n" + callback + return callback + + +# Note: this class is place here instead of at the end of this file, because +# pytest does not reset warnings internally, and other tests would suppress +# the warnings such that at the end we have no warnings to capture, but we want +# to ensure warnings are raised. +@pytest.mark.skipif( + cupy.cuda.runtime.is_hip, reason="hipFFT does not support callbacks" +) +class TestInputValidationWith1dCallbacks: + + shape = (10,) + norm = "ortho" + dtype = np.complex64 + + @classmethod + def setup_class(cls): + # All tests in this class use a temporary cache dir (also if threaded) + with use_temporary_cache_dir(): + yield + + def test_fft_load_legacy(self): + check_should_skip_legacy_test() + + fft = cupy.fft.fft + code = _load_callback + types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) + cb_load = _set_load_cb(code, *types, cb_ver="legacy") + + a = testing.shaped_random(self.shape, cupy, self.dtype) + with pytest.deprecated_call( + match="legacy callback is considered deprecated" + ): + with cupy.fft.config.set_cufft_callbacks( + cb_load=cb_load, cb_ver="legacy" + ): + fft(a, norm=self.norm) + + def test_fft_load_jit_no_name(self): + check_should_skip_jit_test() + + fft = cupy.fft.fft + code = _load_callback + types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) + cb_load = _set_load_cb(code, *types, cb_ver="jit") + + a = testing.shaped_random(self.shape, cupy, self.dtype) + # We omit passing cb_load_name. The test infra setup would check + # if we can infer it correctly. + with cupy.fft.config.set_cufft_callbacks(cb_load=cb_load, cb_ver="jit"): + fft(a, norm=self.norm) + + def test_fft_store_legacy(self): + check_should_skip_legacy_test() + + fft = cupy.fft.fft + code = _store_callback + types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + cb_store = _set_store_cb(code, *types, cb_ver="legacy") + + a = testing.shaped_random(self.shape, cupy, self.dtype) + with pytest.deprecated_call( + match="legacy callback is considered deprecated" + ): + with cupy.fft.config.set_cufft_callbacks( + cb_store=cb_store, cb_ver="legacy" + ): + fft(a, norm=self.norm) + + def test_fft_store_jit_no_name(self): + check_should_skip_jit_test() + + fft = cupy.fft.fft + code = _store_callback + types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + cb_store = _set_store_cb(code, *types, cb_ver="jit") + + a = testing.shaped_random(self.shape, cupy, self.dtype) + # We omit passing cb_store_name. The test infra setup would check + # if we can infer it correctly. + with cupy.fft.config.set_cufft_callbacks( + cb_store=cb_store, cb_ver="jit" + ): + fft(a, norm=self.norm) + + def test_fft_load_store_legacy_aux(self): + check_should_skip_legacy_test() + + fft = cupy.fft.fft + dtype = self.dtype + load_code = _load_callback_with_aux + store_code = _store_callback_with_aux + load_aux = cupy.asarray(2.5, dtype=cupy.dtype(dtype).char.lower()) + store_aux = cupy.asarray(3.8, dtype=cupy.dtype(dtype).char.lower()) + + load_types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + "float", + ) + store_types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + "float", + ) + cb_load = _set_load_cb(load_code, *load_types, cb_ver="legacy") + cb_store = _set_store_cb(store_code, *store_types, cb_ver="legacy") + + a = testing.shaped_random(self.shape, cupy, self.dtype) + with ( + pytest.deprecated_call( + match="cb_load_aux_arr or cb_store_aux_arr is deprecated" + ), + pytest.deprecated_call( + match="legacy callback is considered deprecated" + ), + ): + with cupy.fft.config.set_cufft_callbacks( + cb_load=cb_load, + cb_store=cb_store, + cb_load_aux_arr=load_aux, + cb_store_aux_arr=store_aux, + cb_ver="legacy", + ): + fft(a, norm=self.norm) @testing.parameterize( @@ -108,31 +340,60 @@ def _set_store_cb(code, element, data_type, callback_type, aux_type=None): "n": [None, 5, 10, 15], "shape": [(10, 7), (10,), (10, 10)], "norm": [None, "ortho"], + "cb_ver": cb_ver_for_test, } ) ) -@testing.with_requires("cython>=0.29.0") -@pytest.mark.skipif( - not sys.platform.startswith("linux"), - reason="callbacks are only supported on Linux", -) @pytest.mark.skipif( cupy.cuda.runtime.is_hip, reason="hipFFT does not support callbacks" ) class Test1dCallbacks: + @classmethod + def setup_class(cls): + # All tests in this class use a temporary cache dir (also if threaded) + with use_temporary_cache_dir(): + yield + def _test_load_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + + # for simplicity we use the JIT callback names for both legacy/jit fft = getattr(xp.fft, fft_func) code = _load_callback if dtype == np.complex64: - types = ("x.x", "cufftComplex", "cufftCallbackLoadC") + types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) elif dtype == np.complex128: - types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") + types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", + ) elif dtype == np.float32: - types = ("x", "cufftReal", "cufftCallbackLoadR") - else: - types = ("x", "cufftDoubleReal", "cufftCallbackLoadD") - cb_load = _set_load_cb(code, *types) + types = ( + "x", + "cufftReal", + "cufftCallbackLoadR", + "cufftJITCallbackLoadReal", + ) + else: # float64 + types = ( + "x", + "cufftDoubleReal", + "cufftCallbackLoadD", + "cufftJITCallbackLoadDoubleReal", + ) + cb_load = _set_load_cb(code, *types, cb_ver=self.cb_ver) + cb_load_name = types[-1] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -144,50 +405,93 @@ def _test_load_helper(self, xp, dtype, fft_func): else: out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks(cb_load=cb_load): - out = fft(a, n=self.n, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, cb_load_name=cb_load_name, cb_ver=self.cb_ver + ): + out = fft(a, n=self.n, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_fft_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "fft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_ifft_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "ifft") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_rfft_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "rfft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_irfft_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "irfft") def _test_store_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + fft = getattr(xp.fft, fft_func) code = _store_callback + + # for simplicity we use the JIT callback names for both legacy/jit if dtype == np.complex64: if fft_func != "irfft": - types = ("x.y", "cufftComplex", "cufftCallbackStoreC") - else: - types = ("x", "cufftReal", "cufftCallbackStoreR") + types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + else: # float32 for irfft + types = ( + "x", + "cufftReal", + "cufftCallbackStoreR", + "cufftJITCallbackStoreReal", + ) elif dtype == np.complex128: if fft_func != "irfft": - types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ") - else: - types = ("x", "cufftDoubleReal", "cufftCallbackStoreD") + types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", + ) + else: # float64 for irfft + types = ( + "x", + "cufftDoubleReal", + "cufftCallbackStoreD", + "cufftJITCallbackStoreDoubleReal", + ) elif dtype == np.float32: - types = ("x.y", "cufftComplex", "cufftCallbackStoreC") + types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) elif dtype == np.float64: - types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ") - cb_store = _set_store_cb(code, *types) + types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", + ) + cb_store = _set_store_cb(code, *types, cb_ver=self.cb_ver) + cb_store_name = types[-1] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -201,67 +505,134 @@ def _test_store_helper(self, xp, dtype, fft_func): if dtype in (np.float32, np.complex64): out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks(cb_store=cb_store): - out = fft(a, n=self.n, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_store=cb_store, + cb_store_name=cb_store_name, + cb_ver=self.cb_ver, + ): + out = fft(a, n=self.n, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_fft_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "fft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_ifft_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "ifft") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_rfft_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "rfft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_irfft_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "irfft") def _test_load_store_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + + # for simplicity we use the JIT callback names for both legacy/jit fft = getattr(xp.fft, fft_func) load_code = _load_callback store_code = _store_callback if fft_func in ("fft", "ifft"): if dtype == np.complex64: - load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC") - store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC") - else: - load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") + load_types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) + store_types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + else: # complex128 + load_types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", + ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", ) elif fft_func == "rfft": if dtype == np.float32: - load_types = ("x", "cufftReal", "cufftCallbackLoadR") - store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC") - else: - load_types = ("x", "cufftDoubleReal", "cufftCallbackLoadD") + load_types = ( + "x", + "cufftReal", + "cufftCallbackLoadR", + "cufftJITCallbackLoadReal", + ) + store_types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + else: # float64 + load_types = ( + "x", + "cufftDoubleReal", + "cufftCallbackLoadD", + "cufftJITCallbackLoadDoubleReal", + ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", ) else: # irfft if dtype == np.complex64: - load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC") - store_types = ("x", "cufftReal", "cufftCallbackStoreR") - else: - load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") - store_types = ("x", "cufftDoubleReal", "cufftCallbackStoreD") - cb_load = _set_load_cb(load_code, *load_types) - cb_store = _set_store_cb(store_code, *store_types) + load_types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) + store_types = ( + "x", + "cufftReal", + "cufftCallbackStoreR", + "cufftJITCallbackStoreReal", + ) + else: # complex128 + load_types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", + ) + store_types = ( + "x", + "cufftDoubleReal", + "cufftCallbackStoreD", + "cufftJITCallbackStoreDoubleReal", + ) + cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver) + cb_load_name = load_types[-1] if self.cb_ver == "jit" else None + cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver) + cb_store_name = store_types[-1] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -276,47 +647,71 @@ def _test_load_store_helper(self, xp, dtype, fft_func): if dtype in (np.float32, np.complex64): out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks( - cb_load=cb_load, cb_store=cb_store - ): - out = fft(a, n=self.n, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, + cb_load_name=cb_load_name, + cb_store=cb_store, + cb_store_name=cb_store_name, + cb_ver=self.cb_ver, + ): + out = fft(a, n=self.n, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_fft_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "fft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_ifft_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "ifft") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_rfft_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "rfft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_irfft_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "irfft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_fft_load_aux(self, xp, dtype): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + fft = xp.fft.fft c = _load_callback_with_aux2 + # for simplicity we use the JIT callback names for both legacy/jit if dtype == np.complex64: - cb_load = _set_load_cb( - c, "x.x", "cufftComplex", "cufftCallbackLoadC", "float" + types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + "float", ) - else: - cb_load = _set_load_cb( - c, "x.x", "cufftDoubleComplex", "cufftCallbackLoadZ", "double" + else: # complex128 + types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", + "double", ) + cb_load = _set_load_cb(c, *types, cb_ver=self.cb_ver) + cb_load_name = types[3] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) out_last = self.n if self.n is not None else self.shape[-1] @@ -333,15 +728,22 @@ def test_fft_load_aux(self, xp, dtype): if dtype in (np.float32, np.complex64): out = out.astype(np.complex64) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks( - cb_load=cb_load, cb_load_aux_arr=b - ): - out = fft(a, n=self.n, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, + cb_load_name=cb_load_name, + cb_load_data=b.data, + cb_ver=self.cb_ver, + ): + out = fft(a, n=self.n, norm=self.norm) return out def _test_load_store_aux_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + fft = getattr(xp.fft, fft_func) load_code = _load_callback_with_aux store_code = _store_callback_with_aux @@ -349,53 +751,67 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func): load_aux = xp.asarray(2.5, dtype=xp.dtype(dtype).char.lower()) store_aux = xp.asarray(3.8, dtype=xp.dtype(dtype).char.lower()) + # for simplicity we use the JIT callback names for both legacy/jit if fft_func in ("fft", "ifft"): if dtype == np.complex64: load_types = ( "x.x", "cufftComplex", "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", "float", ) store_types = ( "x.y", "cufftComplex", "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", "float", ) - else: + else: # complex128 load_types = ( "x.x", "cufftDoubleComplex", "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", "double", ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", "double", ) elif fft_func == "rfft": if dtype == np.float32: - load_types = ("x", "cufftReal", "cufftCallbackLoadR", "float") + load_types = ( + "x", + "cufftReal", + "cufftCallbackLoadR", + "cufftJITCallbackLoadReal", + "float", + ) store_types = ( "x.y", "cufftComplex", "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", "float", ) - else: + else: # float64 load_types = ( "x", "cufftDoubleReal", "cufftCallbackLoadD", + "cufftJITCallbackLoadDoubleReal", "double", ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", "double", ) else: # irfft @@ -404,24 +820,35 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func): "x.x", "cufftComplex", "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", "float", ) - store_types = ("x", "cufftReal", "cufftCallbackStoreR", "float") - else: + store_types = ( + "x", + "cufftReal", + "cufftCallbackStoreR", + "cufftJITCallbackStoreReal", + "float", + ) + else: # complex128 load_types = ( "x.x", "cufftDoubleComplex", "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", "double", ) store_types = ( "x", "cufftDoubleReal", "cufftCallbackStoreD", + "cufftJITCallbackStoreDoubleReal", "double", ) - cb_load = _set_load_cb(load_code, *load_types) - cb_store = _set_store_cb(store_code, *store_types) + cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver) + cb_load_name = load_types[3] if self.cb_ver == "jit" else None + cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver) + cb_store_name = store_types[3] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -436,32 +863,38 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func): if dtype in (np.float32, np.complex64): out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks( - cb_load=cb_load, - cb_store=cb_store, - cb_load_aux_arr=load_aux, - cb_store_aux_arr=store_aux, - ): - out = fft(a, n=self.n, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, + cb_load_name=cb_load_name, + cb_store=cb_store, + cb_store_name=cb_store_name, + cb_load_data=load_aux.data, + cb_store_data=store_aux.data, + cb_ver=self.cb_ver, + ): + out = fft(a, n=self.n, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_fft_load_store_aux(self, xp, dtype): return self._test_load_store_aux_helper(xp, dtype, "fft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_ifft_load_store_aux(self, xp, dtype): return self._test_load_store_aux_helper(xp, dtype, "ifft") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_rfft_load_store_aux(self, xp, dtype): return self._test_load_store_aux_helper(xp, dtype, "rfft") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False) def test_irfft_load_store_aux(self, xp, dtype): @@ -469,38 +902,92 @@ def test_irfft_load_store_aux(self, xp, dtype): @testing.parameterize( - {"shape": (3, 4), "s": None, "axes": None, "norm": None}, - {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1), "norm": None}, - {"shape": (3, 4), "s": None, "axes": (-2, -1), "norm": None}, - {"shape": (3, 4), "s": None, "axes": None, "norm": "ortho"}, - {"shape": (2, 3, 4), "s": None, "axes": None, "norm": None}, - {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1), "norm": None}, - {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1), "norm": None}, - {"shape": (2, 3, 4), "s": None, "axes": None, "norm": "ortho"}, - {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2), "norm": "ortho"}, -) -@testing.with_requires("cython>=0.29.0") -@pytest.mark.skipif( - not sys.platform.startswith("linux"), - reason="callbacks are only supported on Linux", + *( + testing.product_dict( + [ + {"shape": (3, 4), "s": None, "axes": None, "norm": None}, + {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1), "norm": None}, + {"shape": (3, 4), "s": None, "axes": (-2, -1), "norm": None}, + {"shape": (3, 4), "s": None, "axes": None, "norm": "ortho"}, + {"shape": (2, 3, 4), "s": None, "axes": None, "norm": None}, + { + "shape": (2, 3, 4), + "s": (1, 4, 10), + "axes": (-3, -2, -1), + "norm": None, + }, + { + "shape": (2, 3, 4), + "s": None, + "axes": (-3, -2, -1), + "norm": None, + }, + {"shape": (2, 3, 4), "s": None, "axes": None, "norm": "ortho"}, + { + "shape": (2, 3, 4), + "s": (2, 3), + "axes": (0, 1, 2), + "norm": "ortho", + }, + ], + testing.product( + { + "cb_ver": cb_ver_for_test, + }, + ), + ) + ) ) @pytest.mark.skipif( cupy.cuda.runtime.is_hip, reason="hipFFT does not support callbacks" ) class TestNdCallbacks: + @classmethod + def setup_class(cls): + # All tests in this class use a temporary cache dir (also if threaded) + with use_temporary_cache_dir(): + yield + def _test_load_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + + # for simplicity we use the JIT callback names for both legacy/jit fft = getattr(xp.fft, fft_func) load_code = _load_callback if dtype == np.complex64: - types = ("x.x", "cufftComplex", "cufftCallbackLoadC") + types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) elif dtype == np.complex128: - types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") + types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", + ) elif dtype == np.float32: - types = ("x", "cufftReal", "cufftCallbackLoadR") - else: - types = ("x", "cufftDoubleReal", "cufftCallbackLoadD") - cb_load = _set_load_cb(load_code, *types) + types = ( + "x", + "cufftReal", + "cufftCallbackLoadR", + "cufftJITCallbackLoadReal", + ) + else: # float64 + types = ( + "x", + "cufftDoubleReal", + "cufftCallbackLoadD", + "cufftJITCallbackLoadDoubleReal", + ) + cb_load = _set_load_cb(load_code, *types, cb_ver=self.cb_ver) + cb_load_name = types[3] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -512,12 +999,14 @@ def _test_load_helper(self, xp, dtype, fft_func): else: out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks(cb_load=cb_load): - out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, cb_load_name=cb_load_name, cb_ver=self.cb_ver + ): + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -525,6 +1014,7 @@ def _test_load_helper(self, xp, dtype, fft_func): def test_fftn_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "fftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -532,6 +1022,7 @@ def test_fftn_load(self, xp, dtype): def test_ifftn_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "ifftn") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -539,6 +1030,7 @@ def test_ifftn_load(self, xp, dtype): def test_rfftn_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "rfftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -547,23 +1039,61 @@ def test_irfftn_load(self, xp, dtype): return self._test_load_helper(xp, dtype, "irfftn") def _test_store_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + fft = getattr(xp.fft, fft_func) store_code = _store_callback + + # for simplicity we use the JIT callback names for both legacy/jit if dtype == np.complex64: if fft_func != "irfftn": - types = ("x.y", "cufftComplex", "cufftCallbackStoreC") - else: - types = ("x", "cufftReal", "cufftCallbackStoreR") + types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + else: # float32 for irfftn + types = ( + "x", + "cufftReal", + "cufftCallbackStoreR", + "cufftJITCallbackStoreReal", + ) elif dtype == np.complex128: if fft_func != "irfftn": - types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ") - else: - types = ("x", "cufftDoubleReal", "cufftCallbackStoreD") + types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", + ) + else: # float64 for irfftn + types = ( + "x", + "cufftDoubleReal", + "cufftCallbackStoreD", + "cufftJITCallbackStoreDoubleReal", + ) elif dtype == np.float32: - types = ("x.y", "cufftComplex", "cufftCallbackStoreC") + types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) elif dtype == np.float64: - types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ") - cb_store = _set_store_cb(store_code, *types) + types = ( + "x.y", + "cufftDoubleComplex", + "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", + ) + cb_store = _set_store_cb(store_code, *types, cb_ver=self.cb_ver) + cb_store_name = types[3] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -577,12 +1107,16 @@ def _test_store_helper(self, xp, dtype, fft_func): if dtype in (np.float32, np.complex64): out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks(cb_store=cb_store): - out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_store=cb_store, + cb_store_name=cb_store_name, + cb_ver=self.cb_ver, + ): + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -590,6 +1124,7 @@ def _test_store_helper(self, xp, dtype, fft_func): def test_fftn_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "fftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -597,6 +1132,7 @@ def test_fftn_store(self, xp, dtype): def test_ifftn_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "ifftn") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -604,6 +1140,7 @@ def test_ifftn_store(self, xp, dtype): def test_rfftn_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "rfftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -612,40 +1149,101 @@ def test_irfftn_store(self, xp, dtype): return self._test_store_helper(xp, dtype, "irfftn") def _test_load_store_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + fft = getattr(xp.fft, fft_func) load_code = _load_callback store_code = _store_callback + + # for simplicity we use the JIT callback names for both legacy/jit if fft_func in ("fftn", "ifftn"): if dtype == np.complex64: - load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC") - store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC") - else: - load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") + load_types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) + store_types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + else: # complex128 + load_types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", + ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", ) elif fft_func == "rfftn": if dtype == np.float32: - load_types = ("x", "cufftReal", "cufftCallbackLoadR") - store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC") - else: - load_types = ("x", "cufftDoubleReal", "cufftCallbackLoadD") + load_types = ( + "x", + "cufftReal", + "cufftCallbackLoadR", + "cufftJITCallbackLoadReal", + ) + store_types = ( + "x.y", + "cufftComplex", + "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", + ) + else: # float64 + load_types = ( + "x", + "cufftDoubleReal", + "cufftCallbackLoadD", + "cufftJITCallbackLoadDoubleReal", + ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", ) else: # irfft if dtype == np.complex64: - load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC") - store_types = ("x", "cufftReal", "cufftCallbackStoreR") - else: - load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ") - store_types = ("x", "cufftDoubleReal", "cufftCallbackStoreD") - cb_load = _set_load_cb(load_code, *load_types) - cb_store = _set_store_cb(store_code, *store_types) + load_types = ( + "x.x", + "cufftComplex", + "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", + ) + store_types = ( + "x", + "cufftReal", + "cufftCallbackStoreR", + "cufftJITCallbackStoreReal", + ) + else: # complex128 + load_types = ( + "x.x", + "cufftDoubleComplex", + "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", + ) + store_types = ( + "x", + "cufftDoubleReal", + "cufftCallbackStoreD", + "cufftJITCallbackStoreDoubleReal", + ) + cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver) + cb_load_name = load_types[3] if self.cb_ver == "jit" else None + cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver) + cb_store_name = store_types[3] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -660,14 +1258,18 @@ def _test_load_store_helper(self, xp, dtype, fft_func): if dtype in (np.float32, np.complex64): out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks( - cb_load=cb_load, cb_store=cb_store - ): - out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, + cb_load_name=cb_load_name, + cb_store=cb_store, + cb_store_name=cb_store_name, + cb_ver=self.cb_ver, + ): + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -675,6 +1277,7 @@ def _test_load_store_helper(self, xp, dtype, fft_func): def test_fftn_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "fftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -682,6 +1285,7 @@ def test_fftn_load_store(self, xp, dtype): def test_ifftn_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "ifftn") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -689,6 +1293,7 @@ def test_ifftn_load_store(self, xp, dtype): def test_rfftn_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "rfftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -697,6 +1302,11 @@ def test_irfftn_load_store(self, xp, dtype): return self._test_load_store_helper(xp, dtype, "irfftn") def _test_load_store_aux_helper(self, xp, dtype, fft_func): + if self.cb_ver == "legacy": + check_should_skip_legacy_test() + else: + check_should_skip_jit_test() + fft = getattr(xp.fft, fft_func) load_code = _load_callback_with_aux store_code = _store_callback_with_aux @@ -704,53 +1314,67 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func): load_aux = xp.asarray(2.5, dtype=xp.dtype(dtype).char.lower()) store_aux = xp.asarray(3.8, dtype=xp.dtype(dtype).char.lower()) + # for simplicity we use the JIT callback names for both legacy/jit if fft_func in ("fftn", "ifftn"): if dtype == np.complex64: load_types = ( "x.x", "cufftComplex", "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", "float", ) store_types = ( "x.y", "cufftComplex", "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", "float", ) - else: + else: # complex128 load_types = ( "x.x", "cufftDoubleComplex", "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", "double", ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", "double", ) elif fft_func == "rfftn": if dtype == np.float32: - load_types = ("x", "cufftReal", "cufftCallbackLoadR", "float") + load_types = ( + "x", + "cufftReal", + "cufftCallbackLoadR", + "cufftJITCallbackLoadReal", + "float", + ) store_types = ( "x.y", "cufftComplex", "cufftCallbackStoreC", + "cufftJITCallbackStoreComplex", "float", ) - else: + else: # float64 load_types = ( "x", "cufftDoubleReal", "cufftCallbackLoadD", + "cufftJITCallbackLoadDoubleReal", "double", ) store_types = ( "x.y", "cufftDoubleComplex", "cufftCallbackStoreZ", + "cufftJITCallbackStoreDoubleComplex", "double", ) else: # irfftn @@ -759,24 +1383,35 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func): "x.x", "cufftComplex", "cufftCallbackLoadC", + "cufftJITCallbackLoadComplex", "float", ) - store_types = ("x", "cufftReal", "cufftCallbackStoreR", "float") - else: + store_types = ( + "x", + "cufftReal", + "cufftCallbackStoreR", + "cufftJITCallbackStoreReal", + "float", + ) + else: # complex128 load_types = ( "x.x", "cufftDoubleComplex", "cufftCallbackLoadZ", + "cufftJITCallbackLoadDoubleComplex", "double", ) store_types = ( "x", "cufftDoubleReal", "cufftCallbackStoreD", + "cufftJITCallbackStoreDoubleReal", "double", ) - cb_load = _set_load_cb(load_code, *load_types) - cb_store = _set_store_cb(store_code, *store_types) + cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver) + cb_load_name = load_types[3] if self.cb_ver == "jit" else None + cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver) + cb_store_name = store_types[3] if self.cb_ver == "jit" else None a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -791,17 +1426,20 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func): if dtype in (np.float32, np.complex64): out = out.astype(np.float32) else: - with use_temporary_cache_dir(): - with xp.fft.config.set_cufft_callbacks( - cb_load=cb_load, - cb_store=cb_store, - cb_load_aux_arr=load_aux, - cb_store_aux_arr=store_aux, - ): - out = fft(a, s=self.s, axes=self.axes, norm=self.norm) + with xp.fft.config.set_cufft_callbacks( + cb_load=cb_load, + cb_load_name=cb_load_name, + cb_store=cb_store, + cb_store_name=cb_store_name, + cb_load_data=load_aux.data, + cb_store_data=store_aux.data, + cb_ver=self.cb_ver, + ): + out = fft(a, s=self.s, axes=self.axes, norm=self.norm) return out + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -809,6 +1447,7 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func): def test_fftn_load_store_aux(self, xp, dtype): return self._test_load_store_aux_helper(xp, dtype, "fftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -816,6 +1455,7 @@ def test_fftn_load_store_aux(self, xp, dtype): def test_ifftn_load_store_aux(self, xp, dtype): return self._test_load_store_aux_helper(xp, dtype, "ifftn") + @suppress_legacy_warning @testing.for_float_dtypes(no_float16=True) @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False @@ -823,6 +1463,7 @@ def test_ifftn_load_store_aux(self, xp, dtype): def test_rfftn_load_store_aux(self, xp, dtype): return self._test_load_store_aux_helper(xp, dtype, "rfftn") + @suppress_legacy_warning @testing.for_complex_dtypes() @testing.numpy_cupy_allclose( rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_fft.py b/dpnp/tests/third_party/cupy/fft_tests/test_fft.py index 534b474363f1..369409ba001c 100644 --- a/dpnp/tests/third_party/cupy/fft_tests/test_fft.py +++ b/dpnp/tests/third_party/cupy/fft_tests/test_fft.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import functools import warnings @@ -6,6 +8,14 @@ import dpnp as cupy from dpnp.tests.helper import has_support_aspect64 + +# from cupy.fft import config +# from cupy.fft._fft import ( +# _default_fft_func, +# _fft, +# _fftn, +# _size_last_transform_axis, +# ) from dpnp.tests.third_party.cupy import testing from dpnp.tests.third_party.cupy.testing._loops import _wraps_partial @@ -36,12 +46,16 @@ def decorator(impl): @_wraps_partial(impl, name) def test_func(self, *args, **kw): # get original global planning state - # planning_state = config.enable_nd_planning + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # planning_state = config.enable_nd_planning try: for nd_planning in states: try: # enable or disable nd planning - # config.enable_nd_planning = nd_planning + # with pytest.warns( + # DeprecationWarning, match="enable_nd_planning" + # ): + # config.enable_nd_planning = nd_planning kw[name] = nd_planning impl(self, *args, **kw) @@ -50,7 +64,10 @@ def test_func(self, *args, **kw): raise finally: # restore original global planning state - # config.enable_nd_planning = planning_state + # with pytest.warns( + # DeprecationWarning, match="enable_nd_planning" + # ): + # config.enable_nd_planning = planning_state pass return test_func @@ -71,8 +88,8 @@ def multi_gpu_config(gpu_configs=None): def decorator(impl): @functools.wraps(impl) def test_func(self, *args, **kw): - use_multi_gpus = config.use_multi_gpus - _devices = config._devices + use_multi_gpus = config._use_multi_gpus.get() + _devices = config._devices.get() try: for gpus in gpu_configs: @@ -81,23 +98,21 @@ def test_func(self, *args, **kw): assert nGPUs >= 2, "Must use at least two gpus" config.use_multi_gpus = True config.set_cufft_gpus(gpus) - self.gpus = gpus impl(self, *args, **kw) except Exception: print("GPU config is:", gpus) raise finally: - config.use_multi_gpus = use_multi_gpus - config._devices = _devices - del self.gpus + config._use_multi_gpus.set(use_multi_gpus) + config._devices.set(_devices) return test_func return decorator -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @pytest.mark.usefixtures("skip_forward_backward") @testing.parameterize( *testing.product( @@ -138,7 +153,7 @@ def test_ifft(self, xp, dtype): return xp.fft.ifft(a, n=self.n, norm=self.norm) -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @testing.parameterize( *testing.product( { @@ -179,19 +194,6 @@ def test_ifft(self, xp, dtype): return xp.fft.ifft(a, axis=self.axis) -# See #3757 and NVIDIA internal ticket 3093094 -def _skip_multi_gpu_bug(shape, gpus): - # avoid CUDA 11.0 (will be fixed by CUDA 11.2) bug triggered by - # - batch = 1 - # - gpus = [1, 0] - if ( - 11000 <= cupy.cuda.runtime.runtimeGetVersion() < 11020 - and len(shape) == 1 - and gpus == [1, 0] - ): - pytest.skip("avoid CUDA 11 bug") - - # Almost identical to the TestFft class, except that # 1. multi-GPU cuFFT is used # 2. the tested parameter combinations are adjusted to meet the requirements @@ -208,6 +210,9 @@ def _skip_multi_gpu_bug(shape, gpus): ) @pytest.mark.skip("multi GPU is not supported") @testing.multi_gpu(2) +# @pytest.mark.skipif( +# cupy.cuda.runtime.is_hip, reason="hipFFT does not support multi-GPU FFT" +# ) class TestMultiGpuFft: @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) @@ -216,8 +221,6 @@ class TestMultiGpuFft: rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_fft(self, xp, dtype): - _skip_multi_gpu_bug(self.shape, self.gpus) - a = testing.shaped_random(self.shape, xp, dtype) return xp.fft.fft(a, n=self.n, norm=self.norm) @@ -230,8 +233,6 @@ def test_fft(self, xp, dtype): @testing.with_requires("numpy!=1.17.0") @testing.with_requires("numpy!=1.17.1") def test_ifft(self, xp, dtype): - _skip_multi_gpu_bug(self.shape, self.gpus) - a = testing.shaped_random(self.shape, xp, dtype) return xp.fft.ifft(a, n=self.n, norm=self.norm) @@ -251,6 +252,9 @@ def test_ifft(self, xp, dtype): ) @pytest.mark.skip("multi GPU is not supported") @testing.multi_gpu(2) +# @pytest.mark.skipif( +# cupy.cuda.runtime.is_hip, reason="hipFFT does not support multi-GPU FFT" +# ) class TestMultiGpuFftOrder: @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) @testing.for_complex_dtypes() @@ -258,8 +262,6 @@ class TestMultiGpuFftOrder: rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_fft(self, xp, dtype): - _skip_multi_gpu_bug(self.shape, self.gpus) - a = testing.shaped_random(self.shape, xp, dtype) if self.data_order == "F": a = xp.asfortranarray(a) @@ -271,8 +273,6 @@ def test_fft(self, xp, dtype): rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_ifft(self, xp, dtype): - _skip_multi_gpu_bug(self.shape, self.gpus) - a = testing.shaped_random(self.shape, xp, dtype) if self.data_order == "F": a = xp.asfortranarray(a) @@ -348,9 +348,13 @@ def test_default_fft_func(self, enable_nd): @pytest.mark.skip("memory management is not supported") @testing.with_requires("numpy>=2.0") +# @pytest.mark.skipif( +# 10010 <= cupy.cuda.runtime.runtimeGetVersion() <= 11010, +# reason="avoid a cuFFT bug (cupy/cupy#3777)", +# ) @testing.slow class TestFftAllocate: - + # @pytest.mark.thread_unsafe(reason="does large allocations") def test_fft_allocate(self): # Check CuFFTError is not raised when the GPU memory is enough. # See https://github.com/cupy/cupy/issues/1063 @@ -368,7 +372,7 @@ def test_fft_allocate(self): cupy.fft.config.clear_plan_cache() -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @pytest.mark.usefixtures("skip_forward_backward") @testing.parameterize( *( @@ -413,7 +417,8 @@ class TestFft2: type_check=has_support_aspect64(), ) def test_fft2(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) @@ -440,7 +445,8 @@ def test_fft2(self, xp, dtype, order, enable_nd): type_check=has_support_aspect64(), ) def test_ifft2(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) @@ -457,7 +463,7 @@ def test_ifft2(self, xp, dtype, order, enable_nd): return out -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @pytest.mark.usefixtures("skip_forward_backward") @testing.parameterize( *( @@ -503,7 +509,8 @@ class TestFftn: type_check=has_support_aspect64(), ) def test_fftn(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) @@ -530,7 +537,8 @@ def test_fftn(self, xp, dtype, order, enable_nd): type_check=has_support_aspect64(), ) def test_ifftn(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) @@ -595,7 +603,8 @@ def skip_buggy(self): rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_fftn(self, xp, dtype, enable_nd): - assert config.enable_nd_planning == enable_nd + with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -613,7 +622,8 @@ def test_fftn(self, xp, dtype, enable_nd): rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_ifftn(self, xp, dtype, enable_nd): - assert config.enable_nd_planning == enable_nd + with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -635,7 +645,8 @@ def test_fftn_error_on_wrong_plan(self, dtype, enable_nd): from cupy.fft import fftn from cupyx.scipy.fftpack import get_fft_plan - assert config.enable_nd_planning == enable_nd + with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + assert config.enable_nd_planning == enable_nd # can't get a plan, so skip if self.axes is not None: @@ -747,6 +758,9 @@ def test_fft_error_on_wrong_plan(self, dtype): ) @pytest.mark.skip("get_fft_plan() is not supported") @testing.multi_gpu(2) +# @pytest.mark.skipif( +# cupy.cuda.runtime.is_hip, reason="hipFFT does not support multi-GPU FFT" +# ) class TestMultiGpuPlanCtxManagerFft: @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]]) @@ -755,8 +769,6 @@ class TestMultiGpuPlanCtxManagerFft: rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_fft(self, xp, dtype): - _skip_multi_gpu_bug(self.shape, self.gpus) - a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -776,8 +788,6 @@ def test_fft(self, xp, dtype): rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_ifft(self, xp, dtype): - _skip_multi_gpu_bug(self.shape, self.gpus) - a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -1029,7 +1039,8 @@ class TestRfft2: type_check=has_support_aspect64(), ) def test_rfft2(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) @@ -1046,14 +1057,21 @@ def test_rfft2(self, xp, dtype, order, enable_nd): type_check=has_support_aspect64(), ) def test_irfft2(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd + # if ( + # 10020 >= cupy.cuda.runtime.runtimeGetVersion() >= 10010 + # and int(cupy.cuda.device.get_compute_capability()) < 70 + # and _size_last_transform_axis(self.shape, self.s, self.axes) == 2 + # ): + # pytest.skip("work-around for cuFFT issue") a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) return xp.fft.irfft2(a, s=self.s, axes=self.axes, norm=self.norm) -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @testing.parameterize( {"shape": (3, 4), "s": None, "axes": (), "norm": None}, {"shape": (2, 3, 4), "s": None, "axes": (), "norm": None}, @@ -1115,7 +1133,8 @@ class TestRfftn: type_check=has_support_aspect64(), ) def test_rfftn(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) @@ -1132,7 +1151,14 @@ def test_rfftn(self, xp, dtype, order, enable_nd): type_check=has_support_aspect64(), ) def test_irfftn(self, xp, dtype, order, enable_nd): - # assert config.enable_nd_planning == enable_nd + # with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + # assert config.enable_nd_planning == enable_nd + # if ( + # 10020 >= cupy.cuda.runtime.runtimeGetVersion() >= 10010 + # and int(cupy.cuda.device.get_compute_capability()) < 70 + # and _size_last_transform_axis(self.shape, self.s, self.axes) == 2 + # ): + # pytest.skip("work-around for cuFFT issue") a = testing.shaped_random(self.shape, xp, dtype) if order == "F": a = xp.asfortranarray(a) @@ -1182,7 +1208,8 @@ def skip_buggy(self): rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_rfftn(self, xp, dtype, enable_nd): - assert config.enable_nd_planning == enable_nd + with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if xp is np: @@ -1194,13 +1221,17 @@ def test_rfftn(self, xp, dtype, enable_nd): with plan: return xp.fft.rfftn(a, s=self.s, axes=self.axes, norm=self.norm) + # @pytest.mark.skipif( + # cupy.cuda.runtime.is_hip, reason="hipFFT's PlanNd for C2R is buggy" + # ) @nd_planning_states() @testing.for_all_dtypes() @testing.numpy_cupy_allclose( rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False ) def test_irfftn(self, xp, dtype, enable_nd): - assert config.enable_nd_planning == enable_nd + with pytest.warns(DeprecationWarning, match="enable_nd_planning"): + assert config.enable_nd_planning == enable_nd a = testing.shaped_random(self.shape, xp, dtype) if xp is np: return xp.fft.irfftn(a, s=self.s, axes=self.axes, norm=self.norm) @@ -1282,7 +1313,7 @@ def test_ifftn_orders(self, dtype, enable_nd): pass -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @testing.parameterize( {"shape": (3, 4), "s": None, "axes": (), "norm": None}, {"shape": (2, 3, 4), "s": None, "axes": (), "norm": None}, @@ -1343,7 +1374,7 @@ def test_ihfft(self, xp, dtype): return xp.fft.ihfft(a, n=self.n, norm=self.norm) -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @testing.parameterize( {"n": 1, "d": 1}, {"n": 10, "d": 0.5}, @@ -1372,7 +1403,7 @@ def test_rfftfreq(self, xp, dtype): return xp.fft.rfftfreq(self.n, self.d) -# @testing.with_requires("numpy>=2.0") +@testing.with_requires("numpy>=2.0") @testing.parameterize( {"shape": (5,), "axes": None}, {"shape": (5,), "axes": 0}, diff --git a/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py b/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py index 5ce72bd806b8..c16a9e91f9dd 100644 --- a/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py +++ b/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import unittest import numpy diff --git a/dpnp/tests/third_party/cupy/math_tests/test_rounding.py b/dpnp/tests/third_party/cupy/math_tests/test_rounding.py index a2ad717f2500..10e79715dd10 100644 --- a/dpnp/tests/third_party/cupy/math_tests/test_rounding.py +++ b/dpnp/tests/third_party/cupy/math_tests/test_rounding.py @@ -66,6 +66,7 @@ def test_trunc(self): self.check_unary("trunc") self.check_unary_complex_unsupported("trunc") + @pytest.mark.filterwarnings("ignore::DeprecationWarning") @testing.with_requires("numpy>=2.1") def test_fix(self): self.check_unary("fix") diff --git a/dpnp/tests/third_party/cupy/random_tests/test_generator.py b/dpnp/tests/third_party/cupy/random_tests/test_generator.py index 9d1a3d233473..abb58df07af9 100644 --- a/dpnp/tests/third_party/cupy/random_tests/test_generator.py +++ b/dpnp/tests/third_party/cupy/random_tests/test_generator.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import functools import os import threading @@ -850,6 +852,7 @@ def test_goodness_of_fit(self): assert _hypothesis.chi_square_test(counts, expected) @_condition.repeat(3, 10) + # @pytest.mark.xfail(runtime.is_hip, reason="ROCm/HIP may have a bug") def test_goodness_of_fit_2(self): vals = self.generate(3, (5, 20), True, [0.3, 0.3, 0.4]).get() counts = numpy.histogram(vals, bins=numpy.arange(4))[0] @@ -929,6 +932,163 @@ def test_bound(self): assert numpy.unique(val).size == val.size +@testing.parameterize( + # Edge cases with small domain sizes + {"a": 0, "size": 0}, + {"a": 1, "size": 1}, + {"a": 2, "size": 1}, + {"a": 256, "size": 100}, # Minimum cipher bits threshold + {"a": 257, "size": 100}, + # large scalare uniqueness + {"a": 100, "size": 50}, + {"a": 1000, "size": 500}, + {"a": 10000, "size": 5000}, + {"a": 100000, "size": 50000}, + # full inpupt permutation + {"a": 10, "size": 10}, + {"a": 100, "size": 100}, + {"a": 1000, "size": 1000}, + # Power of 2 + {"a": 2**8, "size": 100}, + {"a": 2**10, "size": 500}, + {"a": 2**16, "size": 1000}, + {"a": 2**20, "size": 5000}, + {"a": 2**24, "size": 10000}, + # Just below power of 2 + {"a": 2**8 - 1, "size": 100}, + {"a": 2**16 - 1, "size": 1000}, + {"a": 2**20 - 1, "size": 5000}, + # Just above power of 2 + {"a": 2**8 + 1, "size": 100}, + {"a": 2**16 + 1, "size": 1000}, + {"a": 2**20 + 1, "size": 5000}, + # Test multi-dimensional shapes. + {"a": 6, "size": (2, 3)}, + {"a": 32, "size": (4, 5)}, + {"a": 120, "size": (5, 4, 5)}, +) +@testing.fix_random() +class TestChoiceReplaceFalseLargeScale(RandomGeneratorTestCase): + """Test large-scale uniqueness for Feistel bijection implementation.""" + + target_method = "choice" + + def test_uniqueness_and_bounds(self): + """Test that samples have no duplicates and correct bounds.""" + val = self.generate(a=self.a, size=self.size, replace=False).get() + size = self.size if isinstance(self.size, tuple) else (self.size,) + + # Check shape + assert val.shape == size + + # Check bounds + assert (0 <= val).all() + assert (val < self.a).all() + + # Check uniqueness + val_flat = numpy.asarray(val).flatten() + assert ( + numpy.unique(val_flat).size == val_flat.size + ), "Found duplicate values in replace=False sample" + + +@testing.fix_random() +class TestChoiceReplaceFalseStatistical(RandomGeneratorTestCase): + """Statistical tests for uniformity of Feistel bijection.""" + + target_method = "choice" + + @_condition.repeat(3) + def test_small_domain_uniformity(self): + """Chi-square test for uniform sampling in small domain.""" + # Sample from domain of size 10, taking 5 elements + # Repeat many times and check each index appears uniformly + n = 10 + sample_size = 5 + n_trials = 1000 + + counts = cupy.zeros(n, dtype=int) + vals = self.generate_many( + n, size=sample_size, replace=False, _count=n_trials + ) + for val in vals: + counts[val] += 1 + counts = counts.get() + + # Each index should appear ~500 times (5/10 * 1000) + expected = numpy.ones(n, dtype=int) * (sample_size * n_trials // n) + assert _hypothesis.chi_square_test(counts, expected) + + @_condition.repeat(3, 10) + def test_permutation_variability(self): + """Test that repeated full permutations are different.""" + n = 20 + n_trials = 10 + + vals = self.generate_many(n, size=n, replace=False, _count=n_trials) + perms = cupy.vstack(vals) + + # Should have multiple unique permutations + unique_perms = cupy.unique(perms, axis=0) + assert ( + len(unique_perms) == n_trials + ), "Permutations should vary across multiple calls" + + +@testing.slow +@testing.fix_random() +class TestChoiceReplaceFalseVeryLargeDomain(unittest.TestCase): + """Test memory efficiency with very large domains.""" + + def setUp(self): + self.rs = _generator.RandomState(seed=testing.generate_seed()) + + def test_large_domain_memory_efficiency(self): + """Test that very large domains don't allocate full arrays.""" + # This should NOT allocate a 2^30 element array + # If it did, it would require ~8GB of memory + a = 2**30 + size = 1000 + + val = self.rs.choice(a=a, size=size, replace=False).get() + + # Check bounds + assert (0 <= val).all() + assert (val < a).all() + + # Check uniqueness + assert numpy.unique(val).size == size + + def test_near_32bit_limit(self): + """Test at the 32-bit boundary.""" + # Current implementation supports up to 2^32 + a = 2**31 + size = 500 + + val = self.rs.choice(a=a, size=size, replace=False).get() + + # Check bounds + assert (0 <= val).all() + assert (val < a).all() + + # Check uniqueness + assert numpy.unique(val).size == size + + +@testing.fix_random() +class TestChoiceReplaceFalseDtypeConsistency(RandomGeneratorTestCase): + """Test output dtype consistency.""" + + target_method = "choice" + + def test_integer_input_dtype(self): + """Integer input should produce int64/long dtype.""" + val = self.generate(a=100, size=50, replace=False) + + # Should be 'l' (long) dtype, which is int64 on most platforms + assert val.dtype == numpy.dtype("l") or val.dtype == numpy.int64 + + @testing.fix_random() class TestGumbel(RandomGeneratorTestCase): diff --git a/dpnp/tests/third_party/cupy/test_init.py b/dpnp/tests/third_party/cupy/test_init.py index 0a841ba28b21..a4f1ad78f7f5 100644 --- a/dpnp/tests/third_party/cupy/test_init.py +++ b/dpnp/tests/third_party/cupy/test_init.py @@ -36,11 +36,9 @@ def _run_script(code): def _test_cupy_available(self): - returncode, stdoutdata, stderrdata = _run_script( - """ + returncode, stdoutdata, stderrdata = _run_script(""" import dpnp as cupy -print(cupy.is_available())""" - ) +print(cupy.is_available())""") assert returncode == 0, "stderr: {!r}".format(stderrdata) assert stdoutdata in (b"True\n", b"True\r\n", b"False\n", b"False\r\n") return stdoutdata == b"True\n" or stdoutdata == b"True\r\n" @@ -49,14 +47,12 @@ def _test_cupy_available(self): class TestImportError(unittest.TestCase): def test_import_error(self): - returncode, stdoutdata, stderrdata = _run_script( - """ + returncode, stdoutdata, stderrdata = _run_script(""" try: import dpnp as cupy except Exception as e: print(type(e).__name__) -""" - ) +""") assert returncode == 0, "stderr: {!r}".format(stderrdata) assert stdoutdata in (b"", b"RuntimeError\n") diff --git a/dpnp/tests/third_party/cupy/testing/_loops.py b/dpnp/tests/third_party/cupy/testing/_loops.py index 6a383780b9ca..63cd09147c4b 100644 --- a/dpnp/tests/third_party/cupy/testing/_loops.py +++ b/dpnp/tests/third_party/cupy/testing/_loops.py @@ -307,7 +307,7 @@ def decorator(impl): @_wraps_partial_xp(impl, name, sp_name, scipy_name) def test_func(*args, **kw): # Run cupy and numpy - (cupy_result, cupy_error, numpy_result, numpy_error) = ( + cupy_result, cupy_error, numpy_result, numpy_error = ( _call_func_numpy_cupy(impl, args, kw, name, sp_name, scipy_name) ) assert cupy_result is not None or cupy_error is not None @@ -344,9 +344,7 @@ def test_func(*args, **kw): raise AssertionError( """ndarrays of different dtypes are returned. cupy: {} -numpy: {}""".format( - cupy_r.dtype, numpy_r.dtype - ) +numpy: {}""".format(cupy_r.dtype, numpy_r.dtype) ) # Check contiguous @@ -902,7 +900,7 @@ def decorator(impl): @_wraps_partial_xp(impl, name, sp_name, scipy_name) def test_func(*args, **kw): # Run cupy and numpy - (cupy_result, cupy_error, numpy_result, numpy_error) = ( + cupy_result, cupy_error, numpy_result, numpy_error = ( _call_func_numpy_cupy(impl, args, kw, name, sp_name, scipy_name) ) @@ -958,7 +956,7 @@ def decorator(impl): @_wraps_partial_xp(impl, name, sp_name, scipy_name) def test_func(*args, **kw): # Run cupy and numpy - (cupy_result, cupy_error, numpy_result, numpy_error) = ( + cupy_result, cupy_error, numpy_result, numpy_error = ( _call_func_numpy_cupy(impl, args, kw, name, sp_name, scipy_name) ) diff --git a/dpnp/tests/third_party/cupy/testing/_pytest_impl.py b/dpnp/tests/third_party/cupy/testing/_pytest_impl.py index 97dfee5e174a..ecc4a6338eb5 100644 --- a/dpnp/tests/third_party/cupy/testing/_pytest_impl.py +++ b/dpnp/tests/third_party/cupy/testing/_pytest_impl.py @@ -19,14 +19,10 @@ def is_available(): def check_available(feature): if not is_available(): - raise RuntimeError( - """\ + raise RuntimeError("""\ cupy.testing: {} is not available. -Reason: {}: {}""".format( - feature, type(_error).__name__, _error - ) - ) +Reason: {}: {}""".format(feature, type(_error).__name__, _error)) if is_available(): diff --git a/environments/dpctl_pkg.txt b/environments/dpctl_pkg.txt index 29fdceb21e82..9d585f1ec230 100644 --- a/environments/dpctl_pkg.txt +++ b/environments/dpctl_pkg.txt @@ -1,2 +1,2 @@ --index-url https://pypi.anaconda.org/dppy/label/dev/simple -dpctl>=0.21.0dev0 +dpctl>=0.22.0dev0 diff --git a/environments/dpctl_pkg.yml b/environments/dpctl_pkg.yml index 6cea30d0e65a..16d295cdc09c 100644 --- a/environments/dpctl_pkg.yml +++ b/environments/dpctl_pkg.yml @@ -2,4 +2,4 @@ name: Install dpctl package channels: - dppy/label/dev dependencies: - - dpctl>=0.21.0dev0 + - dpctl>=0.22.0dev0 diff --git a/examples/example1.py b/examples/example1.py index fdcbfbac9249..4d84c43c686c 100644 --- a/examples/example1.py +++ b/examples/example1.py @@ -37,7 +37,6 @@ """ - import time import dpctl diff --git a/examples/example2.py b/examples/example2.py index 853f879a7f11..eb856a2bed80 100644 --- a/examples/example2.py +++ b/examples/example2.py @@ -37,7 +37,6 @@ """ - import time import numpy diff --git a/examples/example4.py b/examples/example4.py index 0149c22f7ced..ea70ef18d612 100644 --- a/examples/example4.py +++ b/examples/example4.py @@ -34,7 +34,6 @@ """ - import numpy """ diff --git a/pyproject.toml b/pyproject.toml index 6fbc23ac1b88..d659428877fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ dependencies = [ # "dpcpp-cpp-rt>=0.59.0", # "intel-cmplr-lib-rt>=0.59.0" # WARNING: use the latest dpctl dev version, otherwise stable w/f will fail - "dpctl>=0.21.0dev0", + "dpctl>=0.22.0dev0", "numpy>=1.26.0" ] description = "Data Parallel Extension for NumPy" @@ -163,26 +163,6 @@ allow-wildcard-with-all = true [tool.pylint.miscellaneous] notes = ["FIXME", "XXX"] -[tool.pytest.ini.options] -addopts = [ - "--junitxml=junit.xml", - "--ignore setup.py", - "--ignore run_test.py", - "--cov-report term-missing", - "--tb native", - "--strict", - "--durations=20", - "-q -ra" -] -minversion = "6.0" -norecursedirs = [ - ".*", - "*.egg*", - "build", - "dist", - "conda-recipe" -] - [tool.versioneer] VCS = "git" parentdir_prefix = "dpnp-" diff --git a/scripts/_build_helper.py b/scripts/_build_helper.py new file mode 100644 index 000000000000..f9e303454848 --- /dev/null +++ b/scripts/_build_helper.py @@ -0,0 +1,249 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import os +import shutil +import subprocess +import sys +import warnings + + +def get_dpctl_cmake_dir(): + """ + If dpctl is locally built using `script/build_locally.py`, it is needed + to pass the -DDpctl_ROOT=$(python -m dpctl --cmakedir) during the build. + If dpctl is conda installed, it is optional to pass this parameter. + + """ + + process = subprocess.Popen( + [sys.executable, "-m", "dpctl", "--cmakedir"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + output, error = process.communicate() + if process.returncode == 0: + return output.decode("utf-8").strip() + + raise RuntimeError( + "Failed to retrieve dpctl cmake directory: " + + error.decode("utf-8").strip() + ) + + +def resolve_compilers( + oneapi: bool, + c_compiler: str, + cxx_compiler: str, + compiler_root: str, +): + is_linux = "linux" in sys.platform + + if oneapi or ( + c_compiler is None and cxx_compiler is None and compiler_root is None + ): + return "icx", ("icpx" if is_linux else "icx") + + if ( + (c_compiler is None or not os.path.isabs(c_compiler)) + and (cxx_compiler is None or not os.path.isabs(cxx_compiler)) + and (not compiler_root or not os.path.exists(compiler_root)) + ): + raise RuntimeError( + "--compiler-root option must be set when using non-default DPC++ " + "layout unless absolute paths are provided for both compilers" + ) + + # default values + if c_compiler is None: + c_compiler = "icx" + if cxx_compiler is None: + cxx_compiler = "icpx" if is_linux else "icx" + + compiler_paths = [] + for name, opt_name in ( + (c_compiler, "--c-compiler"), + (cxx_compiler, "--cxx-compiler"), + ): + if os.path.isabs(name): + path = name + else: + path = os.path.join(compiler_root, name) + + if not os.path.exists(path): + raise RuntimeError( + f"{opt_name} value {name} not found and {path} not exist" + ) + + compiler_paths.append(path) + return tuple(compiler_paths) + + +def resolve_onemath( + onemath: bool, + onemath_dir: str, + target_cuda: str = None, + target_hip: str = None, + onemkl_interfaces: bool = False, + onemkl_interfaces_dir: str = None, +): + # always enable build with oneMath i/f when oneMath path is passed + if onemath_dir: + onemath = True + + # always enable build with oneMath i/f for CUDA or HIP target + if target_cuda or target_hip: + onemath = True + + # TODO: onemkl_interfaces and onemkl_interfaces_dir are deprecated in + # dpnp-0.19.0 and should be removed in dpnp-0.20.0. + if onemkl_interfaces: + warnings.warn( + "Using 'onemkl_interfaces' is deprecated. Please use 'onemath' instead.", + DeprecationWarning, + stacklevel=2, + ) + onemath = True + if onemkl_interfaces_dir is not None: + warnings.warn( + "Using 'onemkl_interfaces_dir' is deprecated. Please use 'onemath_dir' instead.", + DeprecationWarning, + stacklevel=2, + ) + onemath_dir = onemkl_interfaces_dir + return onemath, onemath_dir + + +def run(cmd: list[str], env: dict[str, str] = None, cwd: str = None): + print("+", " ".join(cmd)) + subprocess.check_call( + cmd, env=env or os.environ.copy(), cwd=cwd or os.getcwd() + ) + + +def capture_cmd_output(cmd: list[str], cwd: str = None): + print("+", " ".join(cmd)) + return ( + subprocess.check_output(cmd, cwd=cwd or os.getcwd()) + .decode("utf-8") + .strip("\n") + ) + + +def err(msg: str, script: str): + raise RuntimeError(f"[{script}] error: {msg}") + + +def log_cmake_args(cmake_args: list[str], script: str): + print(f"[{script}] Using CMake args:\n{' '.join(cmake_args)}") + + +def make_cmake_args( + c_compiler: str = None, + cxx_compiler: str = None, + dpctl_cmake_dir: str = None, + onemath: bool = False, + onemath_dir: str = None, + verbose: bool = False, + other_opts: str = None, +): + args = [ + f"-DCMAKE_C_COMPILER:PATH={c_compiler}" if c_compiler else "", + f"-DCMAKE_CXX_COMPILER:PATH={cxx_compiler}" if cxx_compiler else "", + f"-DDpctl_ROOT={dpctl_cmake_dir}" if dpctl_cmake_dir else "", + ] + + if onemath: + args.append("-DDPNP_USE_ONEMATH=ON") + if onemath_dir: + args.append(f"-DDPNP_ONEMATH_DIR={onemath_dir}") + + if verbose: + args.append("-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON") + if other_opts: + args.extend(other_opts.split()) + + return args + + +def build_extension( + setup_dir: str, + env: dict[str, str], + cmake_args: list[str], + cmake_executable: str = None, + generator: str = None, + build_type: str = None, +): + cmd = [sys.executable, "setup.py", "build_ext", "--inplace"] + if cmake_executable: + cmd.append(f"--cmake-executable={cmake_executable}") + if generator: + cmd.append(f"--generator={generator}") + if build_type: + cmd.append(f"--build-type={build_type}") + if cmake_args: + cmd.append("--") + cmd += cmake_args + run( + cmd, + env=env, + cwd=setup_dir, + ) + + +def install_editable(setup_dir: str, env: dict[str, str]): + run( + [ + sys.executable, + "-m", + "pip", + "install", + "-e", + ".", + "--no-build-isolation", + ], + env=env, + cwd=setup_dir, + ) + + +def clean_build_dir(setup_dir: str): + if ( + not isinstance(setup_dir, str) + or not setup_dir + or not os.path.isdir(setup_dir) + ): + raise RuntimeError(f"Invalid setup directory provided: '{setup_dir}'") + target = os.path.join(setup_dir, "_skbuild") + if os.path.exists(target): + print(f"Cleaning build directory: {target}") + try: + shutil.rmtree(target) + except Exception as e: + print(f"Failed to remove build directory: '{target}'") + raise e diff --git a/scripts/build_locally.py b/scripts/build_locally.py index 1197de9d9455..e17fc8e478fe 100644 --- a/scripts/build_locally.py +++ b/scripts/build_locally.py @@ -26,290 +26,211 @@ # THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** +import argparse import os -import subprocess import sys -import warnings -warnings.simplefilter("default", DeprecationWarning) - - -def run( - use_oneapi=True, - build_type="Release", - c_compiler=None, - cxx_compiler=None, - compiler_root=None, - cmake_executable=None, - verbose=False, - cmake_opts="", - target_cuda=None, - target_hip=None, - onemkl_interfaces=False, - onemkl_interfaces_dir=None, - onemath=False, - onemath_dir=None, -): - build_system = None - - if "linux" in sys.platform: - build_system = "Ninja" - elif sys.platform in ["win32", "cygwin"]: - build_system = "Ninja" - else: - raise AssertionError(sys.platform + " not supported") - - setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - cmake_args = [ - sys.executable, - "setup.py", - "develop", - ] - if cmake_executable: - cmake_args += [ - "--cmake-executable=" + cmake_executable, - ] - - # if dpctl is locally built using `script/build_locally.py`, it is needed - # to pass the -DDpctl_ROOT=$(python -m dpctl --cmakedir) - # if dpctl is conda installed, it is optional to pass this parameter - process = subprocess.Popen( - ["python", "-m", "dpctl", "--cmakedir"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - output, error = process.communicate() - if process.returncode == 0: - cmake_dir = output.decode("utf-8").strip() - else: - raise RuntimeError( - "Failed to retrieve dpctl cmake directory: " - + error.decode("utf-8").strip() - ) - - cmake_args += [ - "--build-type=" + build_type, - "--generator=" + build_system, - "--", - "-DCMAKE_C_COMPILER:PATH=" + c_compiler, - "-DCMAKE_CXX_COMPILER:PATH=" + cxx_compiler, - "-DDpctl_ROOT=" + cmake_dir, - ] - if verbose: - cmake_args += [ - "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON", - ] - if cmake_opts: - cmake_args += cmake_opts.split() - if use_oneapi: - if "DPL_ROOT" in os.environ: - os.environ["DPL_ROOT_HINT"] = os.environ["DPL_ROOT"] - - # TODO: onemkl_interfaces and onemkl_interfaces_dir are deprecated in - # dpnp-0.19.0 and should be removed in dpnp-0.20.0. - if onemkl_interfaces: - warnings.warn( - "Using 'onemkl_interfaces' is deprecated. Please use 'onemath' instead.", - DeprecationWarning, - stacklevel=1, - ) - onemath = True - if onemkl_interfaces_dir is not None: - warnings.warn( - "Using 'onemkl_interfaces_dir' is deprecated. Please use 'onemath_dir' instead.", - DeprecationWarning, - stacklevel=1, - ) - onemath_dir = onemkl_interfaces_dir - - if target_cuda is not None: - if not target_cuda.strip(): - raise ValueError( - "--target-cuda can not be an empty string. " - "Use --target-cuda= or --target-cuda" - ) - cmake_args += [ - f"-DDPNP_TARGET_CUDA={target_cuda}", - ] - # Always builds using oneMath for the cuda target - onemath = True - - if target_hip is not None: - if not target_hip.strip(): - raise ValueError( - "--target-hip requires an architecture (e.g., gfx90a)" - ) - cmake_args += [ - f"-DHIP_TARGETS={target_hip}", - ] - # Always builds using oneMath for the hip target - onemath = True - - if onemath: - cmake_args += [ - "-DDPNP_USE_ONEMATH=ON", - ] - - if onemath_dir: - cmake_args += [ - f"-DDPNP_ONEMATH_DIR={onemath_dir}", - ] - elif onemath_dir: - raise RuntimeError("--onemath-dir option is not supported") - - subprocess.check_call( - cmake_args, shell=False, cwd=setup_dir, env=os.environ +from _build_helper import ( + build_extension, + clean_build_dir, + err, + get_dpctl_cmake_dir, + install_editable, + log_cmake_args, + make_cmake_args, + resolve_compilers, + resolve_onemath, +) + + +def parse_args(): + p = argparse.ArgumentParser(description="Local dpnp build driver") + + # compiler and oneAPI relating options + p.add_argument( + "--c-compiler", + type=str, + default=None, + help="Path or name of C compiler", ) - - -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser( - description="Driver to build dpnp for in-place installation" + p.add_argument( + "--cxx-compiler", + type=str, + default=None, + help="Path or name of C++ compiler", ) - driver = parser.add_argument_group(title="Coverage driver arguments") - driver.add_argument("--c-compiler", help="Name of C compiler", default=None) - driver.add_argument( - "--cxx-compiler", help="Name of C++ compiler", default=None + p.add_argument( + "--compiler-root", + type=str, + default=None, + help="Path to compiler installation root", ) - driver.add_argument( + p.add_argument( "--oneapi", - help="Set if using one-API installation", dest="oneapi", action="store_true", + help="Use default oneAPI compiler layout", ) - driver.add_argument( - "--debug", - default="Release", - const="Debug", - action="store_const", - help="Set the compilation mode to debugging", + + # CMake relating options + p.add_argument( + "--generator", type=str, default="Ninja", help="CMake generator" ) - driver.add_argument( - "--compiler-root", + p.add_argument( + "--cmake-executable", type=str, - help="Path to compiler home directory", default=None, + help="Path to CMake executable used by build", ) - driver.add_argument( - "--cmake-executable", + p.add_argument( + "--cmake-opts", type=str, - help="Path to cmake executable", - default=None, + default="", + help="Additional options to pass directly to CMake", ) - driver.add_argument( + p.add_argument( + "--debug", + dest="build_type", + const="Debug", + action="store_const", + default="Release", + help="Set build type to Debug (defaults to Release)", + ) + p.add_argument( "--verbose", - help="Build using vebose makefile mode", dest="verbose", action="store_true", + help="Enable verbose makefile output", ) - driver.add_argument( - "--cmake-opts", - help="Channels through additional cmake options", - dest="cmake_opts", - default="", - type=str, - ) - driver.add_argument( + + # platform target relating options + p.add_argument( "--target-cuda", nargs="?", const="ON", - help="Enable CUDA target for build; " - "optionally specify architecture (e.g., --target-cuda=sm_80)", default=None, - type=str, + help="Enable CUDA build. Architecture is optional to specify (e.g., --target-cuda=sm_80).", ) - driver.add_argument( + p.add_argument( "--target-hip", required=False, - help="Enable HIP target for build. " - "Must specify HIP architecture (e.g., --target-hip=gfx90a)", type=str, + help="Enable HIP backend. Architecture required to be specified (e.g., --target-hip=gfx90a).", ) - driver.add_argument( + + # oneMath relating options + p.add_argument( "--onemkl_interfaces", help="(DEPRECATED) Build using oneMath", dest="onemkl_interfaces", action="store_true", ) - driver.add_argument( + p.add_argument( "--onemkl_interfaces_dir", help="(DEPRECATED) Local directory with source of oneMath", dest="onemkl_interfaces_dir", default=None, type=str, ) - driver.add_argument( + p.add_argument( "--onemath", help="Build using oneMath", dest="onemath", action="store_true", ) - driver.add_argument( + p.add_argument( "--onemath-dir", help="Local directory with source of oneMath", dest="onemath_dir", default=None, type=str, ) - args = parser.parse_args() - args_to_validate = [ - "c_compiler", - "cxx_compiler", - "compiler_root", - ] + # build relating options + p.add_argument( + "--clean", + action="store_true", + help="Remove build dir before rebuild", + ) + p.add_argument( + "--skip-editable", + action="store_true", + help="Skip pip editable install step", + ) - if args.oneapi or ( - args.c_compiler is None - and args.cxx_compiler is None - and args.compiler_root is None - ): - args.c_compiler = "icx" - args.cxx_compiler = "icpx" if "linux" in sys.platform else "icx" - args.compiler_root = None - else: - cr = args.compiler_root - if isinstance(cr, str) and os.path.exists(cr): - if args.c_compiler is None: - args.c_compiler = "icx" - if args.cxx_compiler is None: - args.cxx_compiler = "icpx" if "linux" in sys.platform else "icx" - else: - raise RuntimeError( - "Option 'compiler-root' must be provided when " - "using non-default DPC++ layout." - ) - args_to_validate = [ - "c_compiler", - "cxx_compiler", - ] - for p in args_to_validate: - arg = getattr(args, p) - assert isinstance(arg, str) - if not os.path.exists(arg): - arg2 = os.path.join(cr, arg) - if os.path.exists(arg2): - arg = arg2 - setattr(args, p, arg) - if not os.path.exists(arg): - opt_name = p.replace("_", "-") - raise RuntimeError(f"Option {opt_name} value {arg} must exist.") + return p.parse_args() - run( - use_oneapi=args.oneapi, - build_type=args.debug, - c_compiler=args.c_compiler, - cxx_compiler=args.cxx_compiler, - compiler_root=args.compiler_root, - cmake_executable=args.cmake_executable, + +def main(): + if sys.platform not in ["cygwin", "win32", "linux"]: + err(f"{sys.platform} not supported", "build_locally") + + args = parse_args() + setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + c_compiler, cxx_compiler = resolve_compilers( + args.oneapi, args.c_compiler, args.cxx_compiler, args.compiler_root + ) + + dpctl_cmake_dir = get_dpctl_cmake_dir() + print(f"[build_locally] Found DPCTL CMake dir: {dpctl_cmake_dir}") + + onemath, onemath_dir = resolve_onemath( + args.onemath, + args.onemath_dir, + args.target_cuda, + args.target_hip, + args.onemkl_interfaces, + args.onemkl_interfaces_dir, + ) + + # clean build dir if --clean set + if args.clean: + clean_build_dir(setup_dir) + + cmake_args = make_cmake_args( + c_compiler=c_compiler, + cxx_compiler=cxx_compiler, + dpctl_cmake_dir=dpctl_cmake_dir, + onemath=onemath, + onemath_dir=onemath_dir, verbose=args.verbose, - cmake_opts=args.cmake_opts, - target_cuda=args.target_cuda, - target_hip=args.target_hip, - onemkl_interfaces=args.onemkl_interfaces, - onemkl_interfaces_dir=args.onemkl_interfaces_dir, - onemath=args.onemath, - onemath_dir=args.onemath_dir, + other_opts=args.cmake_opts, ) + + # handle architecture conflicts + if args.target_hip is not None and not args.target_hip.strip(): + err("--target-hip requires an explicit architecture", "build_locally") + + # CUDA/HIP targets + if args.target_cuda: + cmake_args += [f"-DDPNP_TARGET_CUDA={args.target_cuda}"] + if args.target_hip: + cmake_args += [f"-DDPNP_TARGET_HIP={args.target_hip}"] + + log_cmake_args(cmake_args, "build_locally") + + print("[build_locally] Building extensions in-place...") + + env = os.environ.copy() + if args.oneapi and "DPL_ROOT" in env: + env["DPL_ROOT_HINT"] = env["DPL_ROOT"] + + build_extension( + setup_dir, + env, + cmake_args, + cmake_executable=args.cmake_executable, + generator=args.generator, + build_type=args.build_type, + ) + if not args.skip_editable: + install_editable(setup_dir, env) + else: + print("[build_locally] Skipping editable install (--skip-editable)") + + print("[build_locally] Build complete") + + +if __name__ == "__main__": + main() diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py index a624bd570e2d..588345d91b2e 100644 --- a/scripts/gen_coverage.py +++ b/scripts/gen_coverage.py @@ -26,67 +26,185 @@ # THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** +import argparse import os import subprocess import sys +from _build_helper import ( + build_extension, + capture_cmd_output, + clean_build_dir, + err, + get_dpctl_cmake_dir, + install_editable, + log_cmake_args, + make_cmake_args, + resolve_compilers, + run, +) -def run( - use_oneapi=True, - c_compiler=None, - cxx_compiler=None, - compiler_root=None, - bin_llvm=None, - pytest_opts="", - verbose=False, -): - IS_LIN = False - - if "linux" in sys.platform: - IS_LIN = True - elif sys.platform in ["win32", "cygwin"]: - pass + +def parse_args(): + p = argparse.ArgumentParser(description="Build dpnp and generate coverage") + + # compiler and oneAPI relating options + p.add_argument( + "--c-compiler", default=None, help="Path or name of C compiler" + ) + p.add_argument( + "--cxx-compiler", default=None, help="Path or name of C++ compiler" + ) + p.add_argument( + "--compiler-root", + type=str, + default=None, + help="Path to compiler installation root", + ) + p.add_argument( + "--oneapi", + dest="oneapi", + action="store_true", + help="Use default oneAPI compiler layout", + ) + p.add_argument( + "--bin-llvm", + type=str, + default=None, + help="Path to folder where llvm-cov/llvm-profdata can be found", + ) + + # CMake relating options + p.add_argument( + "--generator", type=str, default="Ninja", help="CMake generator" + ) + p.add_argument( + "--cmake-executable", + type=str, + default=None, + help="Path to CMake executable used by build", + ) + + p.add_argument( + "--cmake-opts", + type=str, + default="", + help="Additional options to pass directly to CMake", + ) + p.add_argument( + "--verbose", + dest="verbose", + action="store_true", + help="Enable verbose makefile output", + ) + + # test relating options + p.add_argument( + "--skip-pytest", + dest="run_pytest", + action="store_false", + help="Skip running pytest and coverage generation", + ) + p.add_argument( + "--pytest-opts", + help="Channels through additional pytest options", + dest="pytest_opts", + default="", + type=str, + ) + + # build relating options + p.add_argument( + "--clean", + action="store_true", + help="Remove build dir before rebuild (default: False)", + ) + + return p.parse_args() + + +def find_bin_llvm(compiler): + if os.path.isabs(compiler): + bin_dir = os.path.dirname(compiler) else: - raise AssertionError(sys.platform + " not supported") + compiler_path = capture_cmd_output(["which", compiler]) + if not compiler_path: + raise RuntimeError(f"Compiler {compiler} not found in PATH") + bin_dir = os.path.dirname(compiler_path) - if not IS_LIN: + compiler_dir = os.path.join(bin_dir, "compiler") + if os.path.exists(compiler_dir): + bin_llvm = compiler_dir + else: + bin_dir = os.path.dirname(bin_dir) + bin_llvm = os.path.join(bin_dir, "bin-llvm") + + if not os.path.exists(bin_llvm): raise RuntimeError( - "This scripts only supports coverage collection on Linux" + f"Path to folder with llvm-cov/llvm-profdata={bin_llvm} " + "seems to not exist" ) + return bin_llvm + +def main(): + is_linux = "linux" in sys.platform + if not is_linux: + err(f"{sys.platform} not supported", "gen_coverage") + + args = parse_args() setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - cmake_args = [ - sys.executable, - "setup.py", - "develop", - "--generator=Ninja", - "--", - "-DCMAKE_C_COMPILER:PATH=" + c_compiler, - "-DCMAKE_CXX_COMPILER:PATH=" + cxx_compiler, - "-DDPNP_GENERATE_COVERAGE=ON", - ] - - env = {} - if bin_llvm: - env = { - "PATH": ":".join((os.environ.get("PATH", ""), bin_llvm)), - "LLVM_TOOLS_HOME": bin_llvm, - } + c_compiler, cxx_compiler = resolve_compilers( + args.oneapi, + args.c_compiler, + args.cxx_compiler, + args.compiler_root, + ) - # extend with global environment variables - env.update({k: v for k, v in os.environ.items() if k != "PATH"}) + dpctl_cmake_dir = get_dpctl_cmake_dir() + print(f"[gen_coverage] Found DPCTL CMake dir: {dpctl_cmake_dir}") - if verbose: - cmake_args += [ - "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON", - ] + if args.clean: + clean_build_dir(setup_dir) + + cmake_args = make_cmake_args( + c_compiler=c_compiler, + cxx_compiler=cxx_compiler, + dpctl_cmake_dir=dpctl_cmake_dir, + verbose=args.verbose, + ) + cmake_args.append("-DDPNP_GENERATE_COVERAGE=ON") + + env = os.environ.copy() + + if args.bin_llvm: + bin_llvm = args.bin_llvm + else: + bin_llvm = find_bin_llvm(c_compiler) + print( + f"[gen_coverage] Path to folder with llvm-cov/llvm-profdata: {bin_llvm}" + ) + + if bin_llvm: + env["PATH"] = ":".join((env.get("PATH", ""), bin_llvm)) + env["LLVM_TOOLS_HOME"] = bin_llvm - subprocess.check_call(cmake_args, shell=False, cwd=setup_dir, env=env) + log_cmake_args(cmake_args, "gen_coverage") - env["LLVM_PROFILE_FILE"] = "dpnp_pytest.profraw" - subprocess.check_call( - [ + build_extension( + setup_dir, + env, + cmake_args, + cmake_executable=args.cmake_executable, + generator=args.generator, + build_type="Coverage", + ) + install_editable(setup_dir, env) + + if args.run_pytest: + env["LLVM_PROFILE_FILE"] = "dpnp_pytest.profraw" + pytest_cmd = [ "pytest", "-q", "-ra", @@ -99,133 +217,66 @@ def run( "--cov-report=lcov:coverage-python.lcov", "--pyargs", "dpnp", - *pytest_opts.split(), - ], - cwd=setup_dir, - shell=False, - env=env, - ) - - def find_objects(): - objects = [] - dpnp_path = os.getcwd() - search_path = os.path.join(dpnp_path, "dpnp") - for root, _, files in os.walk(search_path): - for file in files: - if ( - file.endswith("_c.so") - or root.find("extensions") != -1 - and file.find("_impl.cpython") != -1 - ): - objects.extend(["-object", os.path.join(root, file)]) - return objects - - objects = find_objects() - instr_profile_fn = "dpnp_pytest.profdata" - # generate instrumentation profile data - subprocess.check_call( - [ - os.path.join(bin_llvm, "llvm-profdata"), - "merge", - "-sparse", - env["LLVM_PROFILE_FILE"], - "-o", - instr_profile_fn, + *args.pytest_opts.split(), ] - ) + run(pytest_cmd, env=env, cwd=setup_dir) - # export lcov - with open("coverage-cpp.lcov", "w") as fh: - subprocess.check_call( + def find_objects(): + objects = [] + dpnp_path = os.getcwd() + search_path = os.path.join(dpnp_path, "dpnp") + for root, _, files in os.walk(search_path): + for file in files: + if ( + file.endswith("_c.so") + or root.find("extensions") != -1 + and file.find("_impl.cpython") != -1 + ): + objects.extend(["-object", os.path.join(root, file)]) + return objects + + objects = find_objects() + instr_profile_fn = "dpnp_pytest.profdata" + + # generate instrumentation profile data + run( [ - os.path.join(bin_llvm, "llvm-cov"), - "export", - "-format=lcov", - "-ignore-filename-regex=/tmp/icpx*", - r"-ignore-filename-regex=.*/backend/kernels/elementwise_functions/.*\.hpp$", - "-instr-profile=" + instr_profile_fn, + os.path.join(bin_llvm, "llvm-profdata"), + "merge", + "-sparse", + env["LLVM_PROFILE_FILE"], + "-o", + instr_profile_fn, ] - + objects - + ["-sources", "dpnp"], - stdout=fh, ) + # export lcov + with open("coverage-cpp.lcov", "w") as fh: + subprocess.check_call( + [ + os.path.join(bin_llvm, "llvm-cov"), + "export", + "-format=lcov", + "-ignore-filename-regex=/tmp/icpx*", + r"-ignore-filename-regex=.*/backend/kernels/elementwise_functions/.*\.hpp$", + "-instr-profile=" + instr_profile_fn, + ] + + objects + + ["-sources", "dpnp"], + cwd=setup_dir, + env=env, + stdout=fh, + ) -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser( - description="Driver to build dpnp and generate coverage" - ) - driver = parser.add_argument_group(title="Coverage driver arguments") - driver.add_argument("--c-compiler", help="Name of C compiler", default=None) - driver.add_argument( - "--cxx-compiler", help="Name of C++ compiler", default=None - ) - driver.add_argument( - "--not-oneapi", - help="Is one-API installation", - dest="oneapi", - action="store_false", - ) - driver.add_argument( - "--compiler-root", type=str, help="Path to compiler home directory" - ) - driver.add_argument( - "--bin-llvm", help="Path to folder where llvm-cov can be found" - ) - driver.add_argument( - "--pytest-opts", - help="Channels through additional pytest options", - dest="pytest_opts", - default="", - type=str, - ) - driver.add_argument( - "--verbose", - help="Build using vebose makefile mode", - dest="verbose", - action="store_true", - ) - args = parser.parse_args() - - if args.oneapi: - args.c_compiler = "icx" - args.cxx_compiler = "icpx" - args.compiler_root = None - icx_path = subprocess.check_output(["which", "icx"]) - bin_dir = os.path.dirname(icx_path) - compiler_dir = os.path.join(bin_dir.decode("utf-8"), "compiler") - if os.path.exists(compiler_dir): - args.bin_llvm = os.path.join(bin_dir.decode("utf-8"), "compiler") - else: - bin_dir = os.path.dirname(bin_dir) - args.bin_llvm = os.path.join(bin_dir.decode("utf-8"), "bin-llvm") - assert os.path.exists(args.bin_llvm) + print("[gen_coverage] Coverage export is completed") else: - args_to_validate = [ - "c_compiler", - "cxx_compiler", - "compiler_root", - "bin_llvm", - ] - for p in args_to_validate: - arg = getattr(args, p, None) - if not isinstance(arg, str): - opt_name = p.replace("_", "-") - raise RuntimeError( - f"Option {opt_name} must be provided is " - "using non-default DPC++ layout" - ) - if not os.path.exists(arg): - raise RuntimeError(f"Path {arg} must exist") - - run( - use_oneapi=args.oneapi, - c_compiler=args.c_compiler, - cxx_compiler=args.cxx_compiler, - compiler_root=args.compiler_root, - bin_llvm=args.bin_llvm, - pytest_opts=args.pytest_opts, - verbose=args.verbose, - ) + print( + "[gen_coverage] Skipping pytest and coverage collection " + "(--skip-pytest)" + ) + + print("[gen_coverage] Done") + + +if __name__ == "__main__": + main()