diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml
index a80a6669ffdd..926d10859af4 100644
--- a/.github/workflows/build-sphinx.yml
+++ b/.github/workflows/build-sphinx.yml
@@ -47,7 +47,7 @@ jobs:
 
     steps:
       - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1
+        uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0
         with:
           access_token: ${{ github.token }}
 
diff --git a/.github/workflows/check-onemath.yaml b/.github/workflows/check-onemath.yaml
index c6b12a4454f3..80bcccc1b5ab 100644
--- a/.github/workflows/check-onemath.yaml
+++ b/.github/workflows/check-onemath.yaml
@@ -34,7 +34,7 @@ jobs:
 
     steps:
       - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1
+        uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0
         with:
           access_token: ${{ github.token }}
 
diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index d195b51ae80b..e56c36d58782 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -49,7 +49,7 @@ jobs:
 
     steps:
       - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1
+        uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0
         with:
           access_token: ${{ github.token }}
 
diff --git a/.github/workflows/cron-run-tests.yaml b/.github/workflows/cron-run-tests.yaml
index ff3fe6780700..4db380c73775 100644
--- a/.github/workflows/cron-run-tests.yaml
+++ b/.github/workflows/cron-run-tests.yaml
@@ -43,7 +43,7 @@ jobs:
 
     steps:
       - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1
+        uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0
         with:
           access_token: ${{ github.token }}
 
diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index 734f006906f2..8149b63150d7 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -33,7 +33,7 @@ jobs:
 
     steps:
       - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1
+        uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0
         with:
           access_token: ${{ github.token }}
 
diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml
index 62e31b249cbf..7fa391d342d0 100644
--- a/.github/workflows/openssf-scorecard.yml
+++ b/.github/workflows/openssf-scorecard.yml
@@ -72,6 +72,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9
+        uses: github/codeql-action/upload-sarif@cdefb33c0f6224e58673d9004f47f7cb3e328b89 # v4.31.10
         with:
           sarif_file: results.sarif
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 83eae28e76b3..50b1175ffebc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 -   repo: https://github.com/PyCQA/bandit
-    rev: '1.9.2'
+    rev: '1.9.3'
     hooks:
     -   id: bandit
         pass_filenames: false
@@ -64,7 +64,7 @@ repos:
         additional_dependencies:
             - tomli
 -   repo: https://github.com/psf/black
-    rev: 25.12.0
+    rev: 26.1.0
     hooks:
     -   id: black
         exclude: "dpnp/_version.py"
@@ -118,7 +118,7 @@ repos:
             ]
         files: '^dpnp/(dpnp_iface.*|fft|linalg|scipy|dpnp_array)'
 -   repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
-    rev: v2.15.0
+    rev: v2.16.0
     hooks:
     -   id: pretty-format-toml
         args: [--autofix]
@@ -127,7 +127,7 @@ repos:
     hooks:
     -   id: actionlint
 -   repo: https://github.com/BlankSpruce/gersemi
-    rev: 0.25.0
+    rev: 0.25.1
     hooks:
     -   id: gersemi
         exclude: "dpnp/backend/cmake/Modules/"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9bd28a21d194..69b06cb64bf8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -42,23 +42,31 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Unified `dpnp` public API exports by consolidating function exports in `__init__.py` and removing wildcard imports [#2665](https://github.com/IntelPython/dpnp/pull/2665) [#2666](https://github.com/IntelPython/dpnp/pull/2666)
 * Updated tests to reflect the new scalar conversion rules for non-0D `usm_ndarray` [#2694](https://github.com/IntelPython/dpnp/pull/2694)
 * Compile indexing extension with `-fno-sycl-id-queries-fit-in-int` to support huge arrays [#2721](https://github.com/IntelPython/dpnp/pull/2721)
+* Updated `dpnp.fix` to reuse `dpnp.trunc` internally [#2722](https://github.com/IntelPython/dpnp/pull/2722)
+* Changed the build scripts and documentation due to `python setup.py develop` deprecation notice [#2716](https://github.com/IntelPython/dpnp/pull/2716)
+* Clarified behavior on repeated `axes` in `dpnp.tensordot` and `dpnp.linalg.tensordot` functions [#2733](https://github.com/IntelPython/dpnp/pull/2733)
 
 ### Deprecated
 
 * `dpnp.asfarray` is deprecated. Use `dpnp.asarray` with an appropriate dtype instead [#2650](https://github.com/IntelPython/dpnp/pull/2650)
 * Passing the output array ``out`` positionally to `dpnp.minimum` and `dpnp.maximum` is deprecated. Pass the output with the keyword form, e.g. ``dpnp.minimum(a, b, out=c)`` [#2659](https://github.com/IntelPython/dpnp/pull/2659)
 * `dpnp.ndarray.T` property is deprecated for not two-dimensional array to be compatible with the Python array API standard. To achieve a similar behavior when ``a.ndim != 2``, either ``a.transpose()``, or ``a.mT`` (swaps the last two axes only), or ``dpnp.permute_dims(a, range(a.ndim)[::-1])`` can be used [#2681](https://github.com/IntelPython/dpnp/pull/2681)
+* `dpnp.fix` is deprecated. Use `dpnp.trunc` instead, which provides identical functionality [#2730](https://github.com/IntelPython/dpnp/pull/2730)
 
 ### Removed
 
 * Dropped support for Python 3.9 [#2626](https://github.com/IntelPython/dpnp/pull/2626)
 * Removed the obsolete interface from DPNP to Numba JIT [#2647](https://github.com/IntelPython/dpnp/pull/2647)
 * Removed the `newshape` parameter from `dpnp.reshape`, which has been deprecated since dpnp 0.17.0. Pass it positionally or use `shape=` on newer versions [#2670](https://github.com/IntelPython/dpnp/pull/2670)
+* Removed unused `pytest` configuration from `pyproject.toml` [#2729](https://github.com/IntelPython/dpnp/pull/2729)
 
 ### Fixed
 
 * Suppressed a potential deprecation warning triggered during import of the `dpctl.tensor` module [#2709](https://github.com/IntelPython/dpnp/pull/2709)
 * Corrected a phonetic spelling issue due to incorrect using of `a nd` in docstrings [#2719](https://github.com/IntelPython/dpnp/pull/2719)
+* Resolved an issue causing `dpnp.linspace` to return an incorrect output shape when inputs were passed as arrays [#2712](https://github.com/IntelPython/dpnp/pull/2712)
+* Resolved an issue where `dpnp` always returns the base allocation pointer, when the view start is expected [#2651](https://github.com/IntelPython/dpnp/pull/2651)
+* Fixed an issue causing an exception in `dpnp.geomspace` and `dpnp.logspace` when called with explicit `device` keyword but any input array is allocated on another device [#2723](https://github.com/IntelPython/dpnp/pull/2723)
 
 ### Security
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 66f5c776b52d..9d676232f08e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -107,7 +107,7 @@ set(DPNP_TARGET_CUDA
 Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), \
 or to a specific architecture like sm_80."
 )
-set(HIP_TARGETS "" CACHE STRING "HIP architecture for target")
+set(DPNP_TARGET_HIP "" CACHE STRING "HIP architecture for target")
 
 set(_dpnp_sycl_targets)
 set(_use_onemath OFF)
@@ -135,18 +135,28 @@ if("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
         set(_use_onemath_cuda ON)
     endif()
 
-    if(HIP_TARGETS)
-        if(HIP_TARGETS MATCHES "^gfx")
+    if(DPNP_TARGET_HIP)
+        if(DPNP_TARGET_HIP MATCHES "^gfx")
             if("x${_dpnp_sycl_targets}" STREQUAL "x")
-                set(_dpnp_sycl_targets "amd_gpu_${HIP_TARGETS},spir64-unknown-unknown")
+                set(_dpnp_sycl_targets
+                    "amd_gpu_${DPNP_TARGET_HIP},spir64-unknown-unknown"
+                )
             else()
-                set(_dpnp_sycl_targets "amd_gpu_${HIP_TARGETS},${_dpnp_sycl_targets}")
+                set(_dpnp_sycl_targets
+                    "amd_gpu_${DPNP_TARGET_HIP},${_dpnp_sycl_targets}"
+                )
             endif()
+
             set(_use_onemath_hip ON)
+            set(HIP_TARGETS
+                ${DPNP_TARGET_HIP}
+                CACHE STRING
+                "HIP GPU targets for oneMath"
+            )
         else()
             message(
                 FATAL_ERROR
-                "Invalid value for HIP_TARGETS: \"${HIP_TARGETS}\". "
+                "Invalid value for DPNP_TARGET_HIP: \"${DPNP_TARGET_HIP}\". "
                 "Expected an architecture name starting with 'gfx', e.g. 'gfx1030'."
             )
         endif()
@@ -159,11 +169,15 @@ else()
     endif()
 
     if("${DPNP_SYCL_TARGETS}" MATCHES "amd_gpu_")
-        set(_use_onemath_hip ON)
-
-        if("x${HIP_TARGETS}" STREQUAL "x")
-            message(FATAL_ERROR "HIP_TARGETS must be specified when using HIP backend")
+        if("x${DPNP_TARGET_HIP}" STREQUAL "x")
+            message(
+                FATAL_ERROR
+                "DPNP_TARGET_HIP must be specified when using HIP backend"
+            )
         endif()
+
+        set(_use_onemath_hip ON)
+        set(HIP_TARGETS ${DPNP_TARGET_HIP} CACHE STRING "HIP GPU targets for oneMath")
     endif()
 
     if("${DPNP_SYCL_TARGETS}" MATCHES "amdgcn-amd-amdhsa")
@@ -295,6 +309,16 @@ else()
     message(FATAL_ERROR "Unsupported system.")
 endif()
 
+# Define flags for CMAKE_BUILD_TYPE=Coverage
+set(CMAKE_C_FLAGS_COVERAGE "${CMAKE_C_FLAGS_DEBUG} -O1 -g1 -DDEBUG")
+set(CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG} -O1 -g1 -DDEBUG")
+set(CMAKE_MODULE_LINKER_FLAGS_COVERAGE "${CMAKE_MODULE_LINKER_FLAGS_DEBUG}")
+mark_as_advanced(
+    CMAKE_C_FLAGS_COVERAGE
+    CMAKE_CXX_FLAGS_COVERAGE
+    CMAKE_MODULE_LINKER_FLAGS_COVERAGE
+)
+
 if(DPNP_GENERATE_COVERAGE)
     string(
         CONCAT PROFILE_FLAGS
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 2fd55e07eb4b..661f44b50ed9 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -1,6 +1,6 @@
 {% set max_compiler_and_mkl_version = environ.get("MAX_BUILD_CMPL_MKL_VERSION", "2026.0a0") %}
 {% set required_compiler_and_mkl_version = "2025.0" %}
-{% set required_dpctl_version = "0.21.0" %}
+{% set required_dpctl_version = "0.22.0*" %}
 
 {% set pyproject = load_file_data('pyproject.toml') %}
 {% set py_build_deps = pyproject.get('build-system', {}).get('requires', []) %}
diff --git a/doc/0.builddoc.sh b/doc/0.builddoc.sh
deleted file mode 100755
index f10b4a5cc22d..000000000000
--- a/doc/0.builddoc.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-BUILDDOCDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-ROOTDIR=$BUILDDOCDIR/..
-
-cd "$ROOTDIR" || exit 1
-python setup.py develop
-
-cd "$BUILDDOCDIR" || exit 2
-make clean
-make html
diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst
index 92c506c0fd81..6226a655c333 100644
--- a/doc/quick_start_guide.rst
+++ b/doc/quick_start_guide.rst
@@ -112,13 +112,15 @@ To build and install the package on Linux OS, run:
 
 .. code-block:: bash
 
-    python setup.py install -- -G Ninja -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx
+    python setup.py build_ext --inplace -- -G Ninja -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx
+    python -m pip install -e .
 
 To build and install the package on Windows OS, run:
 
 .. code-block:: bash
 
-    python setup.py install -- -G Ninja -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx
+    python setup.py build_ext --inplace -- -G Ninja -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx
+    python -m pip install -e .
 
 Alternatively, to develop on Linux OS, you can use the driver script:
 
diff --git a/dpnp/__init__.py b/dpnp/__init__.py
index 5ec230977f98..02420107972f 100644
--- a/dpnp/__init__.py
+++ b/dpnp/__init__.py
@@ -566,7 +566,6 @@
     kaiser,
 )
 
-
 # =============================================================================
 # Helper functions
 # =============================================================================
diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt
index 5609522f58a4..b24d5d131cfe 100644
--- a/dpnp/backend/extensions/ufunc/CMakeLists.txt
+++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt
@@ -34,7 +34,6 @@ set(_elementwise_sources
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/divmod.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/erf_funcs.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fabs.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fix.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/float_power.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmax.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmin.cpp
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
index df409464a5c2..9254e87e52c4 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
@@ -33,7 +33,6 @@
 #include "divmod.hpp"
 #include "erf_funcs.hpp"
 #include "fabs.hpp"
-#include "fix.hpp"
 #include "float_power.hpp"
 #include "fmax.hpp"
 #include "fmin.hpp"
@@ -67,7 +66,6 @@ void init_elementwise_functions(py::module_ m)
     init_divmod(m);
     init_erf_funcs(m);
     init_fabs(m);
-    init_fix(m);
     init_float_power(m);
     init_fmax(m);
     init_fmin(m);
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fix.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fix.cpp
deleted file mode 100644
index 6b21245489fc..000000000000
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/fix.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-//*****************************************************************************
-// Copyright (c) 2024, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-// - Neither the name of the copyright holder nor the names of its contributors
-//   may be used to endorse or promote products derived from this software
-//   without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-// THE POSSIBILITY OF SUCH DAMAGE.
-//*****************************************************************************
-
-#include <type_traits>
-#include <vector>
-
-#include <sycl/sycl.hpp>
-
-#include "dpctl4pybind11.hpp"
-
-#include "fix.hpp"
-#include "kernels/elementwise_functions/fix.hpp"
-#include "populate.hpp"
-
-// include a local copy of elementwise common header from dpctl tensor:
-// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp
-// TODO: replace by including dpctl header once available
-#include "../../elementwise_functions/elementwise_functions.hpp"
-
-// dpctl tensor headers
-#include "kernels/elementwise_functions/common.hpp"
-#include "utils/type_dispatch.hpp"
-
-namespace dpnp::extensions::ufunc
-{
-namespace py = pybind11;
-namespace py_int = dpnp::extensions::py_internal;
-
-namespace impl
-{
-namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
-namespace td_ns = dpctl::tensor::type_dispatch;
-
-/**
- * @brief A factory to define pairs of supported types for which
- * sycl::fix<T> function is available.
- *
- * @tparam T Type of input vector `a` and of result vector `y`.
- */
-template <typename T>
-struct OutputType
-{
-    using value_type =
-        typename std::disjunction<td_ns::TypeMapResultEntry<T, sycl::half>,
-                                  td_ns::TypeMapResultEntry<T, float>,
-                                  td_ns::TypeMapResultEntry<T, double>,
-                                  td_ns::DefaultResultEntry<void>>::result_type;
-};
-
-using dpnp::kernels::fix::FixFunctor;
-
-template <typename argT,
-          typename resT = argT,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
-          bool enable_sg_loadstore = true>
-using ContigFunctor = ew_cmn_ns::UnaryContigFunctor<argT,
-                                                    resT,
-                                                    FixFunctor<argT, resT>,
-                                                    vec_sz,
-                                                    n_vecs,
-                                                    enable_sg_loadstore>;
-
-template <typename argTy, typename resTy, typename IndexerT>
-using StridedFunctor = ew_cmn_ns::
-    UnaryStridedFunctor<argTy, resTy, IndexerT, FixFunctor<argTy, resTy>>;
-
-using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
-using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
-
-static unary_contig_impl_fn_ptr_t fix_contig_dispatch_vector[td_ns::num_types];
-static int fix_output_typeid_vector[td_ns::num_types];
-static unary_strided_impl_fn_ptr_t
-    fix_strided_dispatch_vector[td_ns::num_types];
-
-MACRO_POPULATE_DISPATCH_VECTORS(fix);
-} // namespace impl
-
-void init_fix(py::module_ m)
-{
-    using arrayT = dpctl::tensor::usm_ndarray;
-    using event_vecT = std::vector<sycl::event>;
-    {
-        impl::populate_fix_dispatch_vectors();
-        using impl::fix_contig_dispatch_vector;
-        using impl::fix_output_typeid_vector;
-        using impl::fix_strided_dispatch_vector;
-
-        auto fix_pyapi = [&](const arrayT &src, const arrayT &dst,
-                             sycl::queue &exec_q,
-                             const event_vecT &depends = {}) {
-            return py_int::py_unary_ufunc(
-                src, dst, exec_q, depends, fix_output_typeid_vector,
-                fix_contig_dispatch_vector, fix_strided_dispatch_vector);
-        };
-        m.def("_fix", fix_pyapi, "", py::arg("src"), py::arg("dst"),
-              py::arg("sycl_queue"), py::arg("depends") = py::list());
-
-        auto fix_result_type_pyapi = [&](const py::dtype &dtype) {
-            return py_int::py_unary_ufunc_result_type(dtype,
-                                                      fix_output_typeid_vector);
-        };
-        m.def("_fix_result_type", fix_result_type_pyapi);
-    }
-}
-} // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fix.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fix.hpp
deleted file mode 100644
index f9fed62cf818..000000000000
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/fix.hpp
+++ /dev/null
@@ -1,38 +0,0 @@
-//*****************************************************************************
-// Copyright (c) 2024, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-// - Neither the name of the copyright holder nor the names of its contributors
-//   may be used to endorse or promote products derived from this software
-//   without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-// THE POSSIBILITY OF SUCH DAMAGE.
-//*****************************************************************************
-
-#pragma once
-
-#include <pybind11/pybind11.h>
-
-namespace py = pybind11;
-
-namespace dpnp::extensions::ufunc
-{
-void init_fix(py::module_ m);
-} // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/kernels/elementwise_functions/fix.hpp b/dpnp/backend/kernels/elementwise_functions/fix.hpp
deleted file mode 100644
index f53bfc17e566..000000000000
--- a/dpnp/backend/kernels/elementwise_functions/fix.hpp
+++ /dev/null
@@ -1,52 +0,0 @@
-//*****************************************************************************
-// Copyright (c) 2024, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-// - Neither the name of the copyright holder nor the names of its contributors
-//   may be used to endorse or promote products derived from this software
-//   without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-// THE POSSIBILITY OF SUCH DAMAGE.
-//*****************************************************************************
-
-#pragma once
-
-#include <sycl/sycl.hpp>
-
-namespace dpnp::kernels::fix
-{
-template <typename argT, typename resT>
-struct FixFunctor
-{
-    // is function constant for given argT
-    using is_constant = typename std::false_type;
-    // constant value, if constant
-    // constexpr resT constant_value = resT{};
-    // is function defined for sycl::vec
-    using supports_vec = typename std::false_type;
-    // do both argT and resT support subgroup store/load operation
-    using supports_sg_loadstore = typename std::true_type;
-
-    resT operator()(const argT &x) const
-    {
-        return (x >= 0.0) ? sycl::floor(x) : sycl::ceil(x);
-    }
-};
-} // namespace dpnp::kernels::fix
diff --git a/dpnp/dpnp_algo/dpnp_arraycreation.py b/dpnp/dpnp_algo/dpnp_arraycreation.py
index a76736d27bb5..d94a031801f3 100644
--- a/dpnp/dpnp_algo/dpnp_arraycreation.py
+++ b/dpnp/dpnp_algo/dpnp_arraycreation.py
@@ -46,11 +46,46 @@
 
 
 def _as_usm_ndarray(a, usm_type, sycl_queue):
+    """Converts input object to `dpctl.tensor.usm_ndarray`"""
+
     if isinstance(a, dpnp_array):
-        return a.get_array()
+        a = a.get_array()
     return dpt.asarray(a, usm_type=usm_type, sycl_queue=sycl_queue)
 
 
+def _check_has_zero_val(a):
+    """Check if any element in input object is equal to zero"""
+
+    if dpnp.isscalar(a):
+        if a == 0:
+            return True
+    elif hasattr(a, "any"):
+        if (a == 0).any():
+            return True
+    elif (numpy.array(a) == 0).any():
+        return True
+    return False
+
+
+def _get_usm_allocations(objs, device=None, usm_type=None, sycl_queue=None):
+    """
+    Get common USM allocations based on a list of input objects and an explicit
+    device, a SYCL queue, or a USM type if specified.
+
+    """
+
+    alloc_usm_type, alloc_sycl_queue = get_usm_allocations(objs)
+
+    if sycl_queue is None and device is None:
+        sycl_queue = alloc_sycl_queue
+
+    if usm_type is None:
+        usm_type = alloc_usm_type or "device"
+    return usm_type, dpnp.get_normalized_queue_device(
+        sycl_queue=sycl_queue, device=device
+    )
+
+
 def dpnp_geomspace(
     start,
     stop,
@@ -62,76 +97,57 @@ def dpnp_geomspace(
     endpoint=True,
     axis=0,
 ):
-    usm_type_alloc, sycl_queue_alloc = get_usm_allocations([start, stop])
-
-    if sycl_queue is None and device is None:
-        sycl_queue = sycl_queue_alloc
-    sycl_queue_normalized = dpnp.get_normalized_queue_device(
-        sycl_queue=sycl_queue, device=device
+    usm_type, sycl_queue = _get_usm_allocations(
+        [start, stop], device=device, usm_type=usm_type, sycl_queue=sycl_queue
     )
 
-    if usm_type is None:
-        _usm_type = "device" if usm_type_alloc is None else usm_type_alloc
-    else:
-        _usm_type = usm_type
+    if _check_has_zero_val(start) or _check_has_zero_val(stop):
+        raise ValueError("Geometric sequence cannot include zero")
 
-    start = _as_usm_ndarray(start, _usm_type, sycl_queue_normalized)
-    stop = _as_usm_ndarray(stop, _usm_type, sycl_queue_normalized)
+    start = dpnp.array(start, usm_type=usm_type, sycl_queue=sycl_queue)
+    stop = dpnp.array(stop, usm_type=usm_type, sycl_queue=sycl_queue)
 
     dt = numpy.result_type(start, stop, float(num))
-    dt = map_dtype_to_device(dt, sycl_queue_normalized.sycl_device)
+    dt = map_dtype_to_device(dt, sycl_queue.sycl_device)
     if dtype is None:
         dtype = dt
 
-    if dpnp.any(start == 0) or dpnp.any(stop == 0):
-        raise ValueError("Geometric sequence cannot include zero")
+    # promote both arguments to the same dtype
+    start = start.astype(dt, copy=False)
+    stop = stop.astype(dt, copy=False)
 
-    out_sign = dpt.ones(
-        dpt.broadcast_arrays(start, stop)[0].shape,
-        dtype=dt,
-        usm_type=_usm_type,
-        sycl_queue=sycl_queue_normalized,
-    )
-    # Avoid negligible real or imaginary parts in output by rotating to
-    # positive real, calculating, then undoing rotation
-    if dpnp.issubdtype(dt, dpnp.complexfloating):
-        all_imag = (start.real == 0.0) & (stop.real == 0.0)
-        if dpnp.any(all_imag):
-            start[all_imag] = start[all_imag].imag
-            stop[all_imag] = stop[all_imag].imag
-            out_sign[all_imag] = 1j
-
-    both_negative = (dpt.sign(start) == -1) & (dpt.sign(stop) == -1)
-    if dpnp.any(both_negative):
-        dpt.negative(start[both_negative], out=start[both_negative])
-        dpt.negative(stop[both_negative], out=stop[both_negative])
-        dpt.negative(out_sign[both_negative], out=out_sign[both_negative])
-
-    log_start = dpt.log10(start)
-    log_stop = dpt.log10(stop)
+    # Allow negative real values and ensure a consistent result for complex
+    # (including avoiding negligible real or imaginary parts in output) by
+    # rotating start to positive real, calculating, then undoing rotation.
+    out_sign = dpnp.sign(start)
+    start = start / out_sign
+    stop = stop / out_sign
+
+    log_start = dpnp.log10(start)
+    log_stop = dpnp.log10(stop)
     res = dpnp_logspace(
         log_start,
         log_stop,
         num=num,
         endpoint=endpoint,
         base=10.0,
-        dtype=dtype,
-        usm_type=_usm_type,
-        sycl_queue=sycl_queue_normalized,
-    ).get_array()
+        dtype=dt,
+        usm_type=usm_type,
+        sycl_queue=sycl_queue,
+    )
 
+    # Make sure the endpoints match the start and stop arguments. This is
+    # necessary because np.exp(np.log(x)) is not necessarily equal to x.
     if num > 0:
         res[0] = start
         if num > 1 and endpoint:
             res[-1] = stop
 
-    res = out_sign * res
+    res *= out_sign
 
     if axis != 0:
-        res = dpt.moveaxis(res, 0, axis)
-
-    res = dpt.astype(res, dtype, copy=False)
-    return dpnp_array._create_from_usm_ndarray(res)
+        res = dpnp.moveaxis(res, 0, axis)
+    return res.astype(dtype, copy=False)
 
 
 def dpnp_linspace(
@@ -172,14 +188,9 @@ def dpnp_linspace(
 
     num = operator.index(num)
     if num < 0:
-        raise ValueError("Number of points must be non-negative")
+        raise ValueError(f"Number of samples={num} must be non-negative.")
     step_num = (num - 1) if endpoint else num
 
-    step_nan = False
-    if step_num == 0:
-        step_nan = True
-        step = dpnp.nan
-
     if dpnp.isscalar(start) and dpnp.isscalar(stop):
         # Call linspace() function for scalars.
         usm_res = dpt.linspace(
@@ -191,8 +202,13 @@ def dpnp_linspace(
             sycl_queue=sycl_queue_normalized,
             endpoint=endpoint,
         )
-        if retstep is True and step_nan is False:
-            step = (stop - start) / step_num
+
+        # calculate the used step to return
+        if retstep is True:
+            if step_num > 0:
+                step = (stop - start) / step_num
+            else:
+                step = dpnp.nan
     else:
         usm_start = dpt.asarray(
             start,
@@ -204,6 +220,8 @@ def dpnp_linspace(
             stop, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized
         )
 
+        delta = usm_stop - usm_start
+
         usm_res = dpt.arange(
             0,
             stop=num,
@@ -212,20 +230,30 @@ def dpnp_linspace(
             usm_type=_usm_type,
             sycl_queue=sycl_queue_normalized,
         )
+        usm_res = dpt.reshape(usm_res, (-1,) + (1,) * delta.ndim, copy=False)
+
+        if step_num > 0:
+            step = delta / step_num
+
+            # Needed a special handling for denormal numbers (when step == 0),
+            # see numpy#5437 for more details.
+            # Note, dpt.where() is used to avoid a synchronization branch.
+            usm_res = dpt.where(
+                step == 0, (usm_res / step_num) * delta, usm_res * step
+            )
+        else:
+            step = dpnp.nan
+            usm_res = usm_res * delta
 
-        if step_nan is False:
-            step = (usm_stop - usm_start) / step_num
-            usm_res = dpt.reshape(usm_res, (-1,) + (1,) * step.ndim, copy=False)
-            usm_res = usm_res * step
-            usm_res += usm_start
+        usm_res += usm_start
 
         if endpoint and num > 1:
-            usm_res[-1] = dpt.full(step.shape, usm_stop)
+            usm_res[-1, ...] = usm_stop
 
     if axis != 0:
         usm_res = dpt.moveaxis(usm_res, 0, axis)
 
-    if numpy.issubdtype(dtype, dpnp.integer):
+    if dpnp.issubdtype(dtype, dpnp.integer):
         dpt.floor(usm_res, out=usm_res)
 
     res = dpt.astype(usm_res, dtype, copy=False)
@@ -252,45 +280,36 @@ def dpnp_logspace(
     dtype=None,
     axis=0,
 ):
-    if not dpnp.isscalar(base):
-        usm_type_alloc, sycl_queue_alloc = get_usm_allocations(
-            [start, stop, base]
-        )
-
-        if sycl_queue is None and device is None:
-            sycl_queue = sycl_queue_alloc
-        sycl_queue = dpnp.get_normalized_queue_device(
-            sycl_queue=sycl_queue, device=device
-        )
-
-        if usm_type is None:
-            usm_type = "device" if usm_type_alloc is None else usm_type_alloc
-        else:
-            usm_type = usm_type
+    usm_type, sycl_queue = _get_usm_allocations(
+        [start, stop, base],
+        device=device,
+        usm_type=usm_type,
+        sycl_queue=sycl_queue,
+    )
 
-        start = _as_usm_ndarray(start, usm_type, sycl_queue)
-        stop = _as_usm_ndarray(stop, usm_type, sycl_queue)
-        base = _as_usm_ndarray(base, usm_type, sycl_queue)
+    if not dpnp.isscalar(base):
+        base = dpnp.array(base, usm_type=usm_type, sycl_queue=sycl_queue)
+        start = dpnp.array(start, usm_type=usm_type, sycl_queue=sycl_queue)
+        stop = dpnp.array(stop, usm_type=usm_type, sycl_queue=sycl_queue)
 
-        [start, stop, base] = dpt.broadcast_arrays(start, stop, base)
-        base = dpt.expand_dims(base, axis=axis)
+        start, stop, base = dpnp.broadcast_arrays(start, stop, base)
+        base = dpnp.expand_dims(base, axis=axis)
 
-    # assume res as not a tuple, because retstep is False
+    # assume `res` as not a tuple, because retstep is False
     res = dpnp_linspace(
         start,
         stop,
         num=num,
-        device=device,
         usm_type=usm_type,
         sycl_queue=sycl_queue,
         endpoint=endpoint,
         axis=axis,
-    ).get_array()
+    )
 
-    dpt.pow(base, res, out=res)
+    dpnp.pow(base, res, out=res)
     if dtype is not None:
-        res = dpt.astype(res, dtype, copy=False)
-    return dpnp_array._create_from_usm_ndarray(res)
+        res = res.astype(dtype, copy=False)
+    return res
 
 
 class dpnp_nd_grid:
diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
index 60a55acd1f45..57bf50422fa0 100644
--- a/dpnp/dpnp_algo/dpnp_elementwise_common.py
+++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -60,7 +60,7 @@
     "DPNPBinaryFunc",
     "DPNPBinaryFuncOutKw",
     "DPNPBinaryTwoOutputsFunc",
-    "DPNPFix",
+    "DPNPDeprecatedUnaryFunc",
     "DPNPImag",
     "DPNPReal",
     "DPNPRound",
@@ -231,6 +231,32 @@ def _unpack_out_kw(self, out):
         return out
 
 
+class DPNPDeprecatedUnaryFunc(DPNPUnaryFunc):
+    """
+    Class that implements a deprecated unary element-wise function.
+
+    Parameters
+    ----------
+    deprecated_msg : {str, None}, optional
+        Warning message to emit. If None, no warning is issued.
+
+        Default: ``None``.
+
+    """
+
+    def __init__(self, *args, deprecated_msg=None, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._deprecated_msg = deprecated_msg
+
+    @wraps(DPNPUnaryFunc.__call__)
+    def __call__(self, *args, **kwargs):
+        if self._deprecated_msg:
+            warnings.warn(
+                self._deprecated_msg, DeprecationWarning, stacklevel=2
+            )
+        return super().__call__(*args, **kwargs)
+
+
 class DPNPUnaryTwoOutputsFunc(UnaryElementwiseFunc):
     """
     Class that implements unary element-wise functions with two output arrays.
@@ -1188,55 +1214,6 @@ def __call__(self, x, /, deg=False, *, out=None, order="K"):
         return res
 
 
-class DPNPFix(DPNPUnaryFunc):
-    """Class that implements dpnp.fix unary element-wise functions."""
-
-    def __init__(
-        self,
-        name,
-        result_type_resolver_fn,
-        unary_dp_impl_fn,
-        docs,
-    ):
-        super().__init__(
-            name,
-            result_type_resolver_fn,
-            unary_dp_impl_fn,
-            docs,
-        )
-
-    def __call__(self, x, /, out=None, *, order="K"):
-        if not dpnp.is_supported_array_type(x):
-            pass  # pass to raise error in main implementation
-        elif dpnp.issubdtype(x.dtype, dpnp.inexact):
-            pass  # for inexact types, pass to calculate in the backend
-        elif not (
-            out is None
-            or isinstance(out, tuple)
-            or dpnp.is_supported_array_type(out)
-        ):
-            pass  # pass to raise error in main implementation
-        elif not (
-            out is None or isinstance(out, tuple) or out.dtype == x.dtype
-        ):
-            # passing will raise an error but with incorrect needed dtype
-            raise ValueError(
-                f"Output array of type {x.dtype} is needed, got {out.dtype}"
-            )
-        else:
-            # for exact types, return the input
-            out = self._unpack_out_kw(out)
-            if out is None:
-                return dpnp.copy(x, order=order)
-
-            if isinstance(out, dpt.usm_ndarray):
-                out = dpnp_array._create_from_usm_ndarray(out)
-            out[...] = x
-            return out
-
-        return super().__call__(x, out=out, order=order)
-
-
 class DPNPI0(DPNPUnaryFunc):
     """Class that implements dpnp.i0 unary element-wise functions."""
 
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index c42c9f8a2cb5..6a2b2fd1977f 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -98,6 +98,7 @@ def __init__(
             # or as USM memory allocation
             if isinstance(buffer, dpnp_array):
                 buffer = buffer.get_array()
+                offset += buffer._element_offset
 
             if dtype is None and hasattr(buffer, "dtype"):
                 dtype = buffer.dtype
diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index f019989a2ca8..4975db17c717 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -35,7 +35,6 @@
 
 """
 
-
 import dpctl.tensor as dpt
 import dpctl.utils as dpu
 
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 4cdc74c75ca0..fba1a215756a 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -38,6 +38,7 @@
  - The functions parameters check
 
 """
+
 # pylint: disable=protected-access
 
 import os
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 12c65d116a70..8d4ebdd1a6c2 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -39,7 +39,6 @@
 
 """
 
-
 # pylint: disable=duplicate-code
 
 import operator
@@ -2704,6 +2703,8 @@ def linspace(
         of tuples, tuples of lists, and ndarrays. If `endpoint` is set to
         ``False`` the sequence consists of all but the last of ``num + 1``
         evenly spaced samples, so that `stop` is excluded.
+    num : int
+        Number of samples. Must have a nonnegative value.
     dtype : {None, str, dtype object}, optional
         The desired dtype for the array. If not given, a default dtype will be
         used that can represent the values (by considering Promotion Type Rule
diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py
index 6375e5320e30..6e7ab778299b 100644
--- a/dpnp/dpnp_iface_indexing.py
+++ b/dpnp/dpnp_iface_indexing.py
@@ -721,23 +721,21 @@ def diagonal(a, offset=0, axis1=0, axis2=1):
     a_straides = a.strides
     n, m = a_shape[-2:]
     st_n, st_m = a_straides[-2:]
-    # pylint: disable=W0212
-    a_element_offset = a.get_array()._element_offset
 
     # Compute shape, strides and offset of the resulting diagonal array
     # based on the input offset
     if offset == 0:
         out_shape = a_shape[:-2] + (min(n, m),)
         out_strides = a_straides[:-2] + (st_n + st_m,)
-        out_offset = a_element_offset
+        out_offset = 0
     elif 0 < offset < m:
         out_shape = a_shape[:-2] + (min(n, m - offset),)
         out_strides = a_straides[:-2] + (st_n + st_m,)
-        out_offset = a_element_offset + st_m * offset
+        out_offset = st_m * offset
     else:
         out_shape = a_shape[:-2] + (0,)
         out_strides = a_straides[:-2] + (1,)
-        out_offset = a_element_offset
+        out_offset = 0
 
     return dpnp_array(
         out_shape, buffer=a, strides=out_strides, offset=out_offset
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index a0068a3597e5..acb123473482 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -1121,7 +1121,7 @@ def outer(a, b, out=None):
     return result
 
 
-def tensordot(a, b, axes=2):
+def tensordot(a, b, /, *, axes=2):
     r"""
     Compute tensor dot product along specified axes.
 
@@ -1148,7 +1148,10 @@ def tensordot(a, b, axes=2):
           axes must match.
         * (2,) array_like: A list of axes to be summed over, first sequence
           applying to `a`, second to `b`. Both elements array_like must be of
-          the same length.
+          the same length. Each axis may appear at most once; repeated axes are
+          not allowed.
+
+        Default: ``2``.
 
     Returns
     -------
@@ -1178,6 +1181,13 @@ def tensordot(a, b, axes=2):
     two sequences of the same length, with the first axis to sum over given
     first in both sequences, the second axis second, and so forth.
 
+    For example, if ``a.shape == (2, 3, 4)`` and ``b.shape == (3, 4, 5)``, then
+    ``axes=([1, 2], [0, 1])`` sums over the ``(3, 4)`` dimensions of both
+    arrays and produces an output of shape ``(2, 5)``.
+
+    Each summation axis corresponds to a distinct contraction index; repeating
+    an axis (for example ``axes=([1, 1], [0, 0])``) is invalid.
+
     The shape of the result consists of the non-contracted axes of the
     first tensor, followed by the non-contracted axes of the second.
 
diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 3cec24a44159..9df5278bd16b 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -39,7 +39,6 @@
 
 """
 
-
 import math
 import operator
 import warnings
@@ -3418,7 +3417,7 @@ def rot90(m, k=1, axes=(0, 1)):
         return dpnp.flip(dpnp.flip(m, axes[0]), axes[1])
 
     axes_list = list(range(0, m_ndim))
-    (axes_list[axes[0]], axes_list[axes[1]]) = (
+    axes_list[axes[0]], axes_list[axes[1]] = (
         axes_list[axes[1]],
         axes_list[axes[0]],
     )
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 3dc5ccb82884..3e6a4b0ed121 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -66,7 +66,7 @@
     DPNPBinaryFunc,
     DPNPBinaryFuncOutKw,
     DPNPBinaryTwoOutputsFunc,
-    DPNPFix,
+    DPNPDeprecatedUnaryFunc,
     DPNPImag,
     DPNPReal,
     DPNPRound,
@@ -1854,6 +1854,12 @@ def ediff1d(ary, to_end=None, to_begin=None):
 :obj:`dpnp.floor` : Return the floor of the input, element-wise.
 :obj:`dpnp.ceil` : Return the ceiling of the input, element-wise.
 
+Warning
+-------
+This function is deprecated. It is recommended to use
+:func:`dpnp.trunc` instead, as it provides the same functionality of
+truncating decimal values to their integer parts.
+
 Examples
 --------
 >>> import dpnp as np
@@ -1867,11 +1873,15 @@ def ediff1d(ary, to_end=None, to_begin=None):
 
 """
 
-fix = DPNPFix(
+# reuse trunc backend implementation for fix
+fix = DPNPDeprecatedUnaryFunc(
     "fix",
-    ufi._fix_result_type,
-    ufi._fix,
+    ti._trunc_result_type,
+    ti._trunc,
     _FIX_DOCSTRING,
+    mkl_fn_to_call="_mkl_trunc_to_call",
+    mkl_impl_fn="_trunc",
+    deprecated_msg="dpnp.fix is deprecated in favor of dpnp.trunc",
 )
 
 
diff --git a/dpnp/linalg/__init__.py b/dpnp/linalg/__init__.py
index e04928393a8c..3fa27db33473 100644
--- a/dpnp/linalg/__init__.py
+++ b/dpnp/linalg/__init__.py
@@ -35,7 +35,6 @@
 
 """
 
-
 from .dpnp_iface_linalg import (
     LinAlgError,
     cholesky,
diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index 95bb849afa3c..6959565ecf17 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -1975,9 +1975,10 @@ def tensordot(a, b, /, *, axes=2):
           axes must match.
         * (2,) array_like: A list of axes to be summed over, first sequence
           applying to `a`, second to `b`. Both elements array_like must be of
-          the same length.
+          the same length. Each axis may appear at most once; repeated axes are
+          not allowed.
 
-          Default: ``2``.
+        Default: ``2``.
 
     Returns
     -------
@@ -2007,6 +2008,13 @@ def tensordot(a, b, /, *, axes=2):
     two sequences of the same length, with the first axis to sum over given
     first in both sequences, the second axis second, and so forth.
 
+    For example, if ``a.shape == (2, 3, 4)`` and ``b.shape == (3, 4, 5)``, then
+    ``axes=([1, 2], [0, 1])`` sums over the ``(3, 4)`` dimensions of both
+    arrays and produces an output of shape ``(2, 5)``.
+
+    Each summation axis corresponds to a distinct contraction index; repeating
+    an axis (for example ``axes=([1, 1], [0, 0])``) is invalid.
+
     The shape of the result consists of the non-contracted axes of the
     first tensor, followed by the non-contracted axes of the second.
 
diff --git a/dpnp/random/dpnp_random_state.py b/dpnp/random/dpnp_random_state.py
index 560e2670932b..e49fe739aedd 100644
--- a/dpnp/random/dpnp_random_state.py
+++ b/dpnp/random/dpnp_random_state.py
@@ -36,7 +36,6 @@
 
 """
 
-
 import dpctl.utils as dpu
 import numpy
 
diff --git a/dpnp/scipy/linalg/__init__.py b/dpnp/scipy/linalg/__init__.py
index a97f2cfb829c..3afc08a6fdb9 100644
--- a/dpnp/scipy/linalg/__init__.py
+++ b/dpnp/scipy/linalg/__init__.py
@@ -35,7 +35,6 @@
 
 """
 
-
 from ._decomp_lu import lu_factor, lu_solve
 
 __all__ = [
diff --git a/dpnp/scipy/linalg/_decomp_lu.py b/dpnp/scipy/linalg/_decomp_lu.py
index d2a58fba14d6..292d7fffe4b4 100644
--- a/dpnp/scipy/linalg/_decomp_lu.py
+++ b/dpnp/scipy/linalg/_decomp_lu.py
@@ -39,7 +39,6 @@
 
 """
 
-
 import dpnp
 from dpnp.linalg.dpnp_utils_linalg import (
     assert_stacked_2d,
@@ -181,7 +180,7 @@ def lu_solve(lu_and_piv, b, trans=0, overwrite_b=False, check_finite=True):
 
     """
 
-    (lu, piv) = lu_and_piv
+    lu, piv = lu_and_piv
     dpnp.check_supported_arrays_type(lu, piv, b)
     assert_stacked_2d(lu)
     assert_stacked_square(lu)
diff --git a/dpnp/scipy/linalg/_utils.py b/dpnp/scipy/linalg/_utils.py
index be736e076d86..282c645d1095 100644
--- a/dpnp/scipy/linalg/_utils.py
+++ b/dpnp/scipy/linalg/_utils.py
@@ -37,7 +37,6 @@
 
 """
 
-
 # pylint: disable=no-name-in-module
 # pylint: disable=protected-access
 
diff --git a/dpnp/tests/helper.py b/dpnp/tests/helper.py
index e5e251231654..65917ee6c340 100644
--- a/dpnp/tests/helper.py
+++ b/dpnp/tests/helper.py
@@ -1,4 +1,5 @@
 import importlib.util
+from enum import Enum
 from sys import platform
 
 import dpctl
@@ -11,6 +12,11 @@
 from . import config
 
 
+class LTS_VERSION(Enum):
+    V1_3 = "1.3"
+    V1_6 = "1.6"
+
+
 def _assert_dtype(a_dt, b_dt, check_only_type_kind=False):
     if check_only_type_kind:
         assert a_dt.kind == b_dt.kind, f"{a_dt.kind} != {b_dt.kind}"
@@ -69,6 +75,9 @@ def assert_dtype_allclose(
         x.dtype, dpnp.inexact
     )
 
+    if not hasattr(numpy_arr, "dtype"):
+        numpy_arr = numpy.array(numpy_arr)
+
     if is_inexact(dpnp_arr) or is_inexact(numpy_arr):
         tol_dpnp = (
             dpnp.finfo(dpnp_arr).resolution
@@ -472,13 +481,13 @@ def is_lnl(device=None):
     return _get_dev_mask(device) == 0x6400
 
 
-def is_lts_driver(device=None):
+def is_lts_driver(version=LTS_VERSION.V1_3, device=None):
     """
     Return True if a test is running on a GPU device with LTS driver version,
     False otherwise.
     """
     dev = dpctl.select_default_device() if device is None else device
-    return dev.has_aspect_gpu and "1.3" in dev.driver_version
+    return dev.has_aspect_gpu and version.value in dev.driver_version
 
 
 def is_ptl(device=None):
diff --git a/dpnp/tests/test_arraycreation.py b/dpnp/tests/test_arraycreation.py
index 28d51e8e6d31..eb20f9b3ffe5 100644
--- a/dpnp/tests/test_arraycreation.py
+++ b/dpnp/tests/test_arraycreation.py
@@ -19,9 +19,8 @@
     assert_dtype_allclose,
     get_all_dtypes,
     get_array,
-    is_lts_driver,
-    is_tgllp_iris_xe,
-    is_win_platform,
+    get_float_dtypes,
+    has_support_aspect64,
 )
 from .third_party.cupy import testing
 
@@ -83,6 +82,242 @@ def test_validate_positional_args(self, xp):
         )
 
 
+class TestGeomspace:
+    @pytest.mark.parametrize("sign", [-1, 1])
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
+    @pytest.mark.parametrize("num", [2, 4, 8, 3, 9, 27])
+    @pytest.mark.parametrize("endpoint", [True, False])
+    def test_basic(self, sign, dtype, num, endpoint):
+        start = 2 * sign
+        stop = 127 * sign
+
+        func = lambda xp: xp.geomspace(
+            start, stop, num, endpoint=endpoint, dtype=dtype
+        )
+
+        np_res = func(numpy)
+        dpnp_res = func(dpnp)
+
+        assert_allclose(dpnp_res, np_res, rtol=1e-06)
+
+    @pytest.mark.parametrize("start", [1j, 1 + 1j])
+    @pytest.mark.parametrize("stop", [10j, 10 + 10j])
+    def test_complex(self, start, stop):
+        func = lambda xp: xp.geomspace(start, stop, num=10)
+        np_res = func(numpy)
+        dpnp_res = func(dpnp)
+        assert_allclose(dpnp_res, np_res, rtol=1e-06)
+
+    @pytest.mark.parametrize("axis", [0, 1])
+    def test_axis(self, axis):
+        func = lambda xp: xp.geomspace([2, 3], [20, 15], num=10, axis=axis)
+        np_res = func(numpy)
+        dpnp_res = func(dpnp)
+        assert_allclose(dpnp_res, np_res, rtol=1e-06)
+
+    def test_num_zero(self):
+        func = lambda xp: xp.geomspace(1, 10, num=0, endpoint=False)
+        np_res = func(numpy)
+        dpnp_res = func(dpnp)
+        assert_allclose(dpnp_res, np_res)
+
+    @pytest.mark.parametrize(
+        "start, stop, num",
+        [
+            (0, 5, 3),
+            (2, 0, 3),
+            (0, 0, 3),
+            (dpnp.array([0]), 7, 10),
+            (-2, numpy.array([[0]]), 7),
+            ([2, 4, 0], 3, 5),
+            (10, [[1, 0], [2, 3]], 3),
+        ],
+    )
+    def test_zero_error(self, start, stop, num):
+        with pytest.raises(ValueError):
+            dpnp.geomspace(start, stop, num)
+
+
+class TestLinspace:
+    @pytest.mark.parametrize("start", [0, -5, 10, -2.5, 9.7])
+    @pytest.mark.parametrize("stop", [0, 10, -2, 20.5, 120])
+    @pytest.mark.parametrize("num", [0, 1, 5, numpy.array(10)])
+    @pytest.mark.parametrize(
+        "dt", get_all_dtypes(no_bool=True, no_float16=False)
+    )
+    @pytest.mark.parametrize("retstep", [True, False])
+    def test_basic(self, start, stop, num, dt, retstep):
+        if (
+            not has_support_aspect64()
+            and numpy.issubdtype(dt, numpy.integer)
+            and start == -5
+            and stop == 10
+            and num == 10
+        ):
+            pytest.skip("due to dpctl-1056")
+
+        if numpy.issubdtype(dt, numpy.unsignedinteger):
+            start = abs(start)
+            stop = abs(stop)
+
+        res = dpnp.linspace(start, stop, num, dtype=dt, retstep=retstep)
+        exp = numpy.linspace(start, stop, num, dtype=dt, retstep=retstep)
+        if retstep:
+            res, res_step = res
+            exp, exp_step = exp
+            assert_dtype_allclose(res_step, exp_step)
+
+        if numpy.issubdtype(dt, numpy.integer):
+            assert_allclose(res, exp, rtol=1)
+        else:
+            assert_dtype_allclose(res, exp)
+
+    @pytest.mark.parametrize(
+        "start, stop",
+        [
+            (dpnp.array(1), dpnp.array([-4])),
+            (dpnp.array([2.6]), dpnp.array([[2.6], [-4]])),
+            (numpy.array([[-6.7, 3]]), numpy.array(2)),
+            ([1, -4], [[-4.6]]),
+            ((3, 5), (3,)),
+        ],
+    )
+    @pytest.mark.parametrize("num", [0, 1, 5])
+    @pytest.mark.parametrize(
+        "dt", get_all_dtypes(no_bool=True, no_float16=False)
+    )
+    @pytest.mark.parametrize("retstep", [True, False])
+    def test_start_stop_arrays(self, start, stop, num, dt, retstep):
+        res = dpnp.linspace(start, stop, num, dtype=dt, retstep=retstep)
+        exp = numpy.linspace(
+            get_array(numpy, start),
+            get_array(numpy, stop),
+            num,
+            dtype=dt,
+            retstep=retstep,
+        )
+        if retstep:
+            res, res_step = res
+            exp, exp_step = exp
+            assert_dtype_allclose(res_step, exp_step)
+        assert_dtype_allclose(res, exp)
+
+    @pytest.mark.parametrize(
+        "start, stop",
+        [(1 + 2j, 3 + 4j), (1j, 10), ([0, 1], 3 + 2j)],
+    )
+    def test_start_stop_complex(self, start, stop):
+        result = dpnp.linspace(start, stop, num=5)
+        expected = numpy.linspace(start, stop, num=5)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dt", get_float_dtypes())
+    def test_denormal_numbers(self, dt):
+        stop = numpy.nextafter(dt(0), dt(1)) * 5  # denormal number
+
+        result = dpnp.linspace(0, stop, num=10, endpoint=False, dtype=dt)
+        expected = numpy.linspace(0, stop, num=10, endpoint=False, dtype=dt)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.skipif(not has_support_aspect64(), reason="due to dpctl-1056")
+    def test_equivalent_to_arange(self):
+        result = dpnp.linspace(0, 35, num=36, dtype=int)
+        expected = numpy.linspace(0, 35, num=36, dtype=int)
+        assert_equal(result, expected)
+
+    def test_round_negative(self):
+        result = dpnp.linspace(-1, 3, num=8, dtype=int)
+        expected = numpy.linspace(-1, 3, num=8, dtype=int)
+        assert_array_equal(result, expected)
+
+    def test_step_zero(self):
+        start = numpy.array([0.0, 1.0])
+        stop = numpy.array([2.0, 1.0])
+
+        result = dpnp.linspace(start, stop, num=3)
+        expected = numpy.linspace(start, stop, num=3)
+        assert_array_equal(result, expected)
+
+    @pytest.mark.parametrize("endpoint", [True, False])
+    def test_num_zero(self, endpoint):
+        start, stop = 0, [0, 1, 2, 3, 4]
+        result = dpnp.linspace(start, stop, num=0, endpoint=endpoint)
+        expected = numpy.linspace(start, stop, num=0, endpoint=endpoint)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("axis", [0, 1])
+    def test_axis(self, axis):
+        func = lambda xp: xp.linspace([2, 3], [20, 15], num=10, axis=axis)
+        assert_allclose(func(dpnp), func(numpy))
+
+    @pytest.mark.parametrize("xp", [dpnp, numpy])
+    def test_negative_num(self, xp):
+        with pytest.raises(ValueError, match="must be non-negative"):
+            _ = xp.linspace(0, 10, num=-1)
+
+    @pytest.mark.parametrize("xp", [dpnp, numpy])
+    def test_float_num(self, xp):
+        with pytest.raises(
+            TypeError, match="cannot be interpreted as an integer"
+        ):
+            _ = xp.linspace(0, 1, num=2.5)
+
+
+class TestLogspace:
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
+    @pytest.mark.parametrize("num", [2, 4, 8, 3, 9, 27])
+    @pytest.mark.parametrize("endpoint", [True, False])
+    def test_basic(self, dtype, num, endpoint):
+        start = 2
+        stop = 5
+        base = 2
+
+        func = lambda xp: xp.logspace(
+            start, stop, num, endpoint=endpoint, dtype=dtype, base=base
+        )
+
+        np_res = func(numpy)
+        dpnp_res = func(dpnp)
+        assert_allclose(dpnp_res, np_res, rtol=1e-06)
+
+    @testing.with_requires("numpy>=1.25.0")
+    @pytest.mark.parametrize("axis", [0, 1])
+    def test_axis(self, axis):
+        func = lambda xp: xp.logspace(
+            [2, 3], [20, 15], num=2, base=[[1, 3], [5, 7]], axis=axis
+        )
+        assert_dtype_allclose(func(dpnp), func(numpy))
+
+    def test_list_input(self):
+        expected = numpy.logspace([0], [2], base=[5])
+        result = dpnp.logspace([0], [2], base=[5])
+        assert_dtype_allclose(result, expected)
+
+
+class TestSpaceLike:
+    @pytest.mark.parametrize("func", ["geomspace", "linspace", "logspace"])
+    @pytest.mark.parametrize(
+        "start_dtype", [numpy.float64, numpy.float32, numpy.int64, numpy.int32]
+    )
+    @pytest.mark.parametrize(
+        "stop_dtype", [numpy.float64, numpy.float32, numpy.int64, numpy.int32]
+    )
+    def test_numpy_dtype(self, func, start_dtype, stop_dtype):
+        start = numpy.array([1, 2, 3], dtype=start_dtype)
+        stop = numpy.array([11, 7, -2], dtype=stop_dtype)
+        getattr(dpnp, func)(start, stop, 10)
+
+    @pytest.mark.parametrize("xp", [dpnp, numpy])
+    @pytest.mark.parametrize("func", ["geomspace", "logspace"])
+    @pytest.mark.parametrize(
+        "start, stop, num",
+        [(2, 5, -3), ([2, 3], 5, -3)],
+    )
+    def test_space_num_error(self, xp, func, start, stop, num):
+        with pytest.raises(ValueError):
+            getattr(xp, func)(start, stop, num)
+
+
 class TestTrace:
     @pytest.mark.parametrize("a_sh", [(3, 4), (2, 2, 2)])
     @pytest.mark.parametrize(
@@ -141,6 +376,16 @@ def test_linalg_trace(self, dtype, offset):
         expected = numpy.linalg.trace(a, offset=offset, dtype=dtype)
         assert_equal(result, expected)
 
+    @pytest.mark.parametrize("offset", [-1, 0, 1])
+    def test_ndarray_offset(self, offset):
+        ia = dpnp.arange(8, dtype=dpnp.uint8).reshape((2, 2, 2))
+        ia = dpnp.ndarray((2, 2), buffer=ia, offset=1)
+        a = ia.asnumpy()
+
+        result = dpnp.linalg.trace(ia, offset=offset)
+        expected = numpy.linalg.trace(a, offset=offset)
+        assert_equal(result, expected)
+
 
 @pytest.mark.parametrize(
     "func, args",
@@ -734,101 +979,6 @@ def test_dpctl_tensor_input(func, args):
         assert_array_equal(X, Y)
 
 
-@pytest.mark.parametrize("start", [0, -5, 10, -2.5, 9.7])
-@pytest.mark.parametrize("stop", [0, 10, -2, 20.5, 120])
-@pytest.mark.parametrize(
-    "num",
-    [1, 5, numpy.array(10), dpnp.array(17), dpt.asarray(100)],
-    ids=["1", "5", "numpy.array(10)", "dpnp.array(17)", "dpt.asarray(100)"],
-)
-@pytest.mark.parametrize(
-    "dtype",
-    get_all_dtypes(no_bool=True, no_float16=False),
-)
-@pytest.mark.parametrize("retstep", [True, False])
-def test_linspace(start, stop, num, dtype, retstep):
-    if numpy.issubdtype(dtype, numpy.unsignedinteger):
-        start = abs(start)
-        stop = abs(stop)
-
-    res_np = numpy.linspace(start, stop, num, dtype=dtype, retstep=retstep)
-    res_dp = dpnp.linspace(start, stop, num, dtype=dtype, retstep=retstep)
-
-    if retstep:
-        [res_np, step_np] = res_np
-        [res_dp, step_dp] = res_dp
-        assert_allclose(step_np, step_dp)
-
-    if numpy.issubdtype(dtype, dpnp.integer):
-        assert_allclose(res_np, res_dp, rtol=1)
-    else:
-        assert_dtype_allclose(res_dp, res_np)
-
-
-@pytest.mark.parametrize("func", ["geomspace", "linspace", "logspace"])
-@pytest.mark.parametrize(
-    "start_dtype", [numpy.float64, numpy.float32, numpy.int64, numpy.int32]
-)
-@pytest.mark.parametrize(
-    "stop_dtype", [numpy.float64, numpy.float32, numpy.int64, numpy.int32]
-)
-def test_space_numpy_dtype(func, start_dtype, stop_dtype):
-    start = numpy.array([1, 2, 3], dtype=start_dtype)
-    stop = numpy.array([11, 7, -2], dtype=stop_dtype)
-    getattr(dpnp, func)(start, stop, 10)
-
-
-@pytest.mark.parametrize(
-    "start",
-    [
-        dpnp.array(1),
-        dpnp.array([2.6]),
-        numpy.array([[-6.7, 3]]),
-        [1, -4],
-        (3, 5),
-    ],
-)
-@pytest.mark.parametrize(
-    "stop",
-    [
-        dpnp.array([-4]),
-        dpnp.array([[2.6], [-4]]),
-        numpy.array(2),
-        [[-4.6]],
-        (3,),
-    ],
-)
-def test_linspace_arrays(start, stop):
-    func = lambda xp: xp.linspace(get_array(xp, start), get_array(xp, stop), 10)
-    assert func(numpy).shape == func(dpnp).shape
-
-
-def test_linspace_complex():
-    func = lambda xp: xp.linspace(0, 3 + 2j, num=1000)
-    assert_allclose(func(dpnp), func(numpy))
-
-
-@pytest.mark.parametrize("axis", [0, 1])
-def test_linspace_axis(axis):
-    func = lambda xp: xp.linspace([2, 3], [20, 15], num=10, axis=axis)
-    assert_allclose(func(dpnp), func(numpy))
-
-
-def test_linspace_step_nan():
-    func = lambda xp: xp.linspace(1, 2, num=0, endpoint=False)
-    assert_allclose(func(dpnp), func(numpy))
-
-
-@pytest.mark.parametrize("start", [1, [1, 1]])
-@pytest.mark.parametrize("stop", [10, [10 + 10]])
-def test_linspace_retstep(start, stop):
-    func = lambda xp: xp.linspace(start, stop, num=10, retstep=True)
-    np_res = func(numpy)
-    dpnp_res = func(dpnp)
-    assert_allclose(dpnp_res[0], np_res[0])
-    assert_allclose(dpnp_res[1], np_res[1])
-
-
 @pytest.mark.parametrize(
     "arrays",
     [[], [[1]], [[1, 2, 3], [4, 5, 6]], [[1, 2], [3, 4], [5, 6]]],
@@ -853,106 +1003,6 @@ def test_set_shape(shape):
     assert_array_equal(na, da)
 
 
-def test_geomspace_zero_error():
-    with pytest.raises(ValueError):
-        dpnp.geomspace(0, 5, 3)
-        dpnp.geomspace(2, 0, 3)
-        dpnp.geomspace(0, 0, 3)
-
-
-def test_space_num_error():
-    with pytest.raises(ValueError):
-        dpnp.linspace(2, 5, -3)
-        dpnp.geomspace(2, 5, -3)
-        dpnp.logspace(2, 5, -3)
-        dpnp.linspace([2, 3], 5, -3)
-        dpnp.geomspace([2, 3], 5, -3)
-        dpnp.logspace([2, 3], 5, -3)
-
-
-@pytest.mark.parametrize("sign", [-1, 1])
-@pytest.mark.parametrize("dtype", get_all_dtypes())
-@pytest.mark.parametrize("num", [2, 4, 8, 3, 9, 27])
-@pytest.mark.parametrize("endpoint", [True, False])
-def test_geomspace(sign, dtype, num, endpoint):
-    start = 2 * sign
-    stop = 127 * sign
-
-    func = lambda xp: xp.geomspace(
-        start, stop, num, endpoint=endpoint, dtype=dtype
-    )
-
-    np_res = func(numpy)
-    dpnp_res = func(dpnp)
-
-    assert_allclose(dpnp_res, np_res, rtol=1e-06)
-
-
-@pytest.mark.parametrize("start", [1j, 1 + 1j])
-@pytest.mark.parametrize("stop", [10j, 10 + 10j])
-def test_geomspace_complex(start, stop):
-    func = lambda xp: xp.geomspace(start, stop, num=10)
-    np_res = func(numpy)
-    dpnp_res = func(dpnp)
-    assert_allclose(dpnp_res, np_res, rtol=1e-06)
-
-
-@pytest.mark.parametrize("axis", [0, 1])
-def test_geomspace_axis(axis):
-    func = lambda xp: xp.geomspace([2, 3], [20, 15], num=10, axis=axis)
-    np_res = func(numpy)
-    dpnp_res = func(dpnp)
-    assert_allclose(dpnp_res, np_res, rtol=1e-06)
-
-
-def test_geomspace_num0():
-    func = lambda xp: xp.geomspace(1, 10, num=0, endpoint=False)
-    np_res = func(numpy)
-    dpnp_res = func(dpnp)
-    assert_allclose(dpnp_res, np_res)
-
-
-@pytest.mark.parametrize("dtype", get_all_dtypes())
-@pytest.mark.parametrize("num", [2, 4, 8, 3, 9, 27])
-@pytest.mark.parametrize("endpoint", [True, False])
-def test_logspace(dtype, num, endpoint):
-    if not is_win_platform() and is_tgllp_iris_xe() and is_lts_driver():
-        if (
-            dpnp.issubdtype(dtype, dpnp.integer)
-            and num in [8, 27]
-            and endpoint is True
-        ):
-            pytest.skip("SAT-7978")
-
-    start = 2
-    stop = 5
-    base = 2
-
-    func = lambda xp: xp.logspace(
-        start, stop, num, endpoint=endpoint, dtype=dtype, base=base
-    )
-
-    np_res = func(numpy)
-    dpnp_res = func(dpnp)
-
-    assert_allclose(dpnp_res, np_res, rtol=1e-06)
-
-
-@testing.with_requires("numpy>=1.25.0")
-@pytest.mark.parametrize("axis", [0, 1])
-def test_logspace_axis(axis):
-    func = lambda xp: xp.logspace(
-        [2, 3], [20, 15], num=2, base=[[1, 3], [5, 7]], axis=axis
-    )
-    assert_dtype_allclose(func(dpnp), func(numpy))
-
-
-def test_logspace_list_input():
-    expected = numpy.logspace([0], [2], base=[5])
-    result = dpnp.logspace([0], [2], base=[5])
-    assert_dtype_allclose(result, expected)
-
-
 @pytest.mark.parametrize(
     "data", [(), 1, (2, 3), [4], numpy.array(5), numpy.array([6, 7])]
 )
diff --git a/dpnp/tests/test_arraypad.py b/dpnp/tests/test_arraypad.py
index 9a88dd8bab96..aee93703895b 100644
--- a/dpnp/tests/test_arraypad.py
+++ b/dpnp/tests/test_arraypad.py
@@ -73,11 +73,8 @@ def test_non_contiguous_array(self, mode):
         else:
             assert_array_equal(result, expected)
 
-    # TODO: include "linear_ramp" when dpnp issue gh-2084 is resolved
     @pytest.mark.parametrize("pad_width", [0, (0, 0), ((0, 0), (0, 0))])
-    @pytest.mark.parametrize(
-        "mode", [m for m in _modes if m not in {"linear_ramp"}]
-    )
+    @pytest.mark.parametrize("mode", _modes)
     def test_zero_pad_width(self, pad_width, mode):
         arr = dpnp.arange(30).reshape(6, 5)
         assert_array_equal(arr, dpnp.pad(arr, pad_width, mode=mode))
diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index 77c65991e9cd..d443b71adff8 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -20,6 +20,7 @@
 from dpnp.dpnp_utils import map_dtype_to_device
 
 from .helper import (
+    LTS_VERSION,
     assert_dtype_allclose,
     generate_random_numpy_array,
     get_abs_array,
@@ -33,6 +34,7 @@
     has_support_aspect16,
     has_support_aspect64,
     is_intel_numpy,
+    is_lts_driver,
     numpy_version,
 )
 from .third_party.cupy import testing
@@ -217,6 +219,9 @@ def _get_exp_array(self, a, axis, dtype):
     @pytest.mark.parametrize("axis", [None, 2, -1])
     @pytest.mark.parametrize("include_initial", [True, False])
     def test_basic(self, dtype, axis, include_initial):
+        if axis is None and not is_lts_driver(version=LTS_VERSION.V1_6):
+            pytest.skip("due to SAT-8336")
+
         a = dpnp.ones((3, 4, 5, 6, 7), dtype=dtype)
         res = dpnp.cumlogsumexp(a, axis=axis, include_initial=include_initial)
 
@@ -234,6 +239,9 @@ def test_basic(self, dtype, axis, include_initial):
     @pytest.mark.parametrize("axis", [None, 2, -1])
     @pytest.mark.parametrize("include_initial", [True, False])
     def test_include_initial(self, dtype, axis, include_initial):
+        if axis is None and not is_lts_driver(version=LTS_VERSION.V1_6):
+            pytest.skip("due to SAT-8336")
+
         a = dpnp.ones((3, 4, 5, 6, 7), dtype=dtype)
 
         if dpnp.issubdtype(a, dpnp.float32):
@@ -2021,7 +2029,18 @@ def test_out_dtype(self, func):
 
     @pytest.mark.parametrize("xp", [numpy, dpnp])
     @pytest.mark.parametrize(
-        "func", ["abs", "fix", "round", "add", "frexp", "divmod"]
+        "func",
+        [
+            "abs",
+            pytest.param(
+                "fix",
+                marks=pytest.mark.filterwarnings("ignore::DeprecationWarning"),
+            ),
+            "round",
+            "add",
+            "frexp",
+            "divmod",
+        ],
     )
     def test_out_wrong_tuple_len(self, xp, func):
         if func == "round" and xp is numpy:
@@ -2536,7 +2555,18 @@ def test_projection(self, dtype):
         assert dpnp.allclose(result, expected)
 
 
-@pytest.mark.parametrize("func", ["ceil", "floor", "trunc", "fix"])
+@pytest.mark.parametrize(
+    "func",
+    [
+        "ceil",
+        "floor",
+        "trunc",
+        pytest.param(
+            "fix",
+            marks=pytest.mark.filterwarnings("ignore::DeprecationWarning"),
+        ),
+    ],
+)
 class TestRoundingFuncs:
     @testing.with_requires("numpy>=2.1.0")
     @pytest.mark.parametrize(
diff --git a/dpnp/tests/test_memory.py b/dpnp/tests/test_memory.py
index ce9c7e60f030..1bc0da8c1535 100644
--- a/dpnp/tests/test_memory.py
+++ b/dpnp/tests/test_memory.py
@@ -31,3 +31,14 @@ def test_ndarray_from_data(self):
         a = dpnp.empty(5)
         b = dpnp.ndarray(a.shape, buffer=a.data)
         assert b.data.ptr == a.data.ptr
+
+    def test_view_non_zero_offset(self):
+        n, m = 2, 8
+        plane = n * m
+
+        a = dpnp.empty(4 * plane)
+        sl = a[plane:]  # non-zero offset view
+
+        pl = dpnp.ndarray((n, m), dtype=a.dtype, buffer=sl)
+        assert pl.data.ptr == sl.data.ptr
+        assert a.data.ptr != sl.data.ptr
diff --git a/dpnp/tests/test_product.py b/dpnp/tests/test_product.py
index 763049e8791e..afe767a5e5d9 100644
--- a/dpnp/tests/test_product.py
+++ b/dpnp/tests/test_product.py
@@ -1842,6 +1842,13 @@ def test_error(self):
         with pytest.raises(ValueError):
             dpnp.tensordot(dpnp.arange(4), dpnp.array(5), axes=-1)
 
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    def test_repeated_axes(self, xp):
+        a = xp.ones((2, 3, 3))
+        b = xp.ones((3, 3, 4))
+        with pytest.raises(ValueError):
+            xp.tensordot(a, b, axes=([1, 1], [0, 0]))
+
 
 class TestVdot:
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py
index 0bd4d6b53337..d1853579036a 100644
--- a/dpnp/tests/test_sycl_queue.py
+++ b/dpnp/tests/test_sycl_queue.py
@@ -133,10 +133,8 @@ def test_array_creation_from_array(func, args, device_x, device_y):
     assert_sycl_queue_equal(y.sycl_queue, x.sycl_queue)
 
     # cross device
-    # TODO: include geomspace when issue dpnp#2352 is resolved
-    if func != "geomspace":
-        y = getattr(dpnp, func)(*args, device=device_y)
-        assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)
+    y = getattr(dpnp, func)(*args, device=device_y)
+    assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)
 
 
 @pytest.mark.parametrize("device_x", valid_dev, ids=dev_ids)
@@ -148,10 +146,9 @@ def test_array_creation_logspace_base(device_x, device_y):
     y = dpnp.logspace(0, 8, 4, base=x[1:3])
     assert_sycl_queue_equal(y.sycl_queue, x.sycl_queue)
 
-    # TODO: include geomspace when issue dpnp#2353 is resolved
     # cross device
-    # y = dpnp.logspace(0, 8, 4, base=x[1:3], device=device_y)
-    # assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)
+    y = dpnp.logspace(0, 8, 4, base=x[1:3], device=device_y)
+    assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)
 
 
 @pytest.mark.parametrize("device", valid_dev + [None], ids=dev_ids + [None])
@@ -264,7 +261,11 @@ def test_meshgrid(device):
         pytest.param("exp2", [0.0, 1.0, 2.0]),
         pytest.param("expm1", [1.0e-10, 1.0, 2.0, 4.0, 7.0]),
         pytest.param("fabs", [-1.2, 1.2]),
-        pytest.param("fix", [2.1, 2.9, -2.1, -2.9]),
+        pytest.param(
+            "fix",
+            [2.1, 2.9, -2.1, -2.9],
+            marks=pytest.mark.filterwarnings("ignore::DeprecationWarning"),
+        ),
         pytest.param("flatnonzero", [-2, -1, 0, 1, 2]),
         pytest.param("floor", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
         pytest.param("gradient", [1.0, 2.0, 4.0, 7.0, 11.0, 16.0]),
@@ -1081,7 +1082,6 @@ def test_array_creation_from_dpctl(copy, device):
     assert isinstance(result, dpnp_array)
 
 
-@pytest.mark.skip("due to dpctl-2213")
 @pytest.mark.parametrize("device", valid_dev, ids=dev_ids)
 @pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True))
 @pytest.mark.parametrize("shape", [tuple(), (2,), (3, 0, 1), (2, 2, 2)])
diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py
index fa4fc62e34c9..4fc0f2b958fa 100644
--- a/dpnp/tests/test_usm_type.py
+++ b/dpnp/tests/test_usm_type.py
@@ -572,7 +572,11 @@ def test_meshgrid(usm_type_x, usm_type_y):
         pytest.param("exp2", [0.0, 1.0, 2.0]),
         pytest.param("expm1", [1.0e-10, 1.0, 2.0, 4.0, 7.0]),
         pytest.param("fabs", [-1.2, 1.2]),
-        pytest.param("fix", [2.1, 2.9, -2.1, -2.9]),
+        pytest.param(
+            "fix",
+            [2.1, 2.9, -2.1, -2.9],
+            marks=pytest.mark.filterwarnings("ignore::DeprecationWarning"),
+        ),
         pytest.param("flatnonzero", [-2, -1, 0, 1, 2]),
         pytest.param("floor", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
         pytest.param("gradient", [1, 2, 4, 7, 11, 16]),
diff --git a/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py b/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py
index 431f1f27d8c7..95ec1d4374c8 100644
--- a/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py
+++ b/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py
@@ -1,11 +1,9 @@
 from __future__ import annotations
 
-import unittest
-
 from dpnp.tests.third_party.cupy import testing
 
 
-class TestElementwise(unittest.TestCase):
+class TestElementwise:
 
     @testing.for_int_dtypes()
     @testing.numpy_cupy_array_equal()
diff --git a/dpnp/tests/third_party/cupy/binary_tests/test_packing.py b/dpnp/tests/third_party/cupy/binary_tests/test_packing.py
index a72a8a558b08..518e74d98868 100644
--- a/dpnp/tests/third_party/cupy/binary_tests/test_packing.py
+++ b/dpnp/tests/third_party/cupy/binary_tests/test_packing.py
@@ -1,4 +1,4 @@
-import unittest
+from __future__ import annotations
 
 import numpy
 import pytest
@@ -11,7 +11,7 @@
 )
 
 
-class TestPacking(unittest.TestCase):
+class TestPacking:
 
     @testing.for_int_dtypes()
     @testing.numpy_cupy_array_equal()
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_array_function.py b/dpnp/tests/third_party/cupy/core_tests/test_array_function.py
index 7878a5a0aaff..f0c45900bcda 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_array_function.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_array_function.py
@@ -1,4 +1,4 @@
-import unittest
+from __future__ import annotations
 
 import numpy
 import pytest
@@ -11,7 +11,7 @@
 )
 
 
-class TestArrayFunction(unittest.TestCase):
+class TestArrayFunction:
 
     @testing.with_requires("numpy>=1.17.0")
     def test_array_function(self):
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_core.py b/dpnp/tests/third_party/cupy/core_tests/test_core.py
index c959b4f24954..d9f8196db562 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_core.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_core.py
@@ -1,20 +1,23 @@
 from __future__ import annotations
 
 import sys
-import unittest
 
 import numpy
 import pytest
 
 import dpnp as cupy
+
+# from cupy._core import core
 from dpnp.tests.third_party.cupy import testing
 from dpnp.tests.third_party.cupy.testing._protocol_helpers import (
     DummyObjectWithCudaArrayInterface,
     DummyObjectWithCuPyGetNDArray,
 )
 
+# from cupy_tests.core_tests import test_raw
+
 
-class TestSize(unittest.TestCase):
+class TestSize:
 
     # def tearDown(self):
     #     # Free huge memory for slow test
@@ -58,7 +61,7 @@ def test_size_huge(self, xp):
 
 
 @pytest.mark.skip("no cupy._core submodule")
-class TestOrder(unittest.TestCase):
+class TestOrder:
 
     @testing.for_orders(_orders.keys())
     def test_ndarray(self, order):
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py
index 2df8cbfc5f7c..41df0a82e0a0 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py
@@ -82,7 +82,6 @@ def _get_stream(self, stream_name):
             return dpctl.SyclQueue()
         return dpctl.SyclQueue()
 
-    @pytest.mark.skip("due to dpctl-2213")
     @testing.for_all_dtypes(no_bool=False)
     def test_conversion(self, dtype):
         orig_array = _gen_array(dtype)
@@ -102,7 +101,6 @@ def test_from_dlpack_and_conv_errors(self):
             # for host copies.
             cupy.from_dlpack(orig_array, copy=True)
 
-    @pytest.mark.skip("due to dpctl-2213")
     @pytest.mark.parametrize(
         "kwargs, versioned",
         [
@@ -130,7 +128,6 @@ def test_conversion_max_version(self, kwargs, versioned):
         testing.assert_array_equal(orig_array, out_array)
         testing.assert_array_equal(orig_array.data.ptr, out_array.data.ptr)
 
-    @pytest.mark.skip("due to dpctl-2213")
     def test_conversion_device(self):
         orig_array = _gen_array("float32")
 
@@ -202,7 +199,6 @@ def test_conversion_device_to_cpu(self):
             )
             assert numpy.may_share_memory(arr_nocopy, arr1)
 
-    @pytest.mark.skip("due to dpctl-2213")
     def test_stream(self):
         allowed_streams = ["null", True]
         # if not cuda.runtime.is_hip:
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_flags.py b/dpnp/tests/third_party/cupy/core_tests/test_flags.py
index b2cc4fca59fb..3f416304a789 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_flags.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_flags.py
@@ -29,9 +29,7 @@ def test_key_error(self):
     def test_repr(self):
         assert """  C_CONTIGUOUS : 1
   F_CONTIGUOUS : 2
-  OWNDATA : 3""" == repr(
-            self.flags
-        )
+  OWNDATA : 3""" == repr(self.flags)
 
 
 @testing.parameterize(
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_function.py b/dpnp/tests/third_party/cupy/core_tests/test_function.py
index 2943fc2bcf02..5480cdf6e126 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_function.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_function.py
@@ -154,9 +154,7 @@ def test_custom_user_struct(self):
     offsets[3] = (unsigned long long)&ptr->d;
     offsets[4] = (unsigned long long)&ptr->e;
 }}
-""".format(
-            struct_definition=struct_definition
-        )
+""".format(struct_definition=struct_definition)
 
         itemsize = cupy.ndarray(shape=(1,), dtype=numpy.uint64)
         sizes = cupy.ndarray(shape=(5,), dtype=numpy.uint64)
@@ -213,9 +211,7 @@ def make_packed(basetype, N, itemsize):
     sum += s.e[0] + s.e[1] + s.e[2];
     x[i] = a[i] + sum;
 }}
-""".format(
-            struct_definition=struct_definition
-        )
+""".format(struct_definition=struct_definition)
 
         a_cpu = numpy.arange(24, dtype=numpy.float64)
         a = cupy.array(a_cpu)
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_include.py b/dpnp/tests/third_party/cupy/core_tests/test_include.py
index 9c6372cf2bcd..a45d2b40cbf4 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_include.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_include.py
@@ -18,25 +18,19 @@
 #include <cupy/hip_workaround.cuh>
 """
 
-_code_nvcc = (
-    _code_base
-    + """
+_code_nvcc = _code_base + """
 #include <cupy/type_dispatcher.cuh>
 
 int main() {
     return 0;
 }
 """
-)
 
-_code_nvrtc = (
-    _code_base
-    + """
+_code_nvrtc = _code_base + """
 
 __device__ void kernel() {
 }
 """
-)
 
 
 @pytest.mark.skipif(cupy.cuda.runtime.is_hip, reason="for CUDA")
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_internal.py b/dpnp/tests/third_party/cupy/core_tests/test_internal.py
index 205661e80d75..2f4239a2e220 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_internal.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_internal.py
@@ -1,18 +1,16 @@
-import math
-import unittest
+from __future__ import annotations
 
 import numpy
 import pytest
 
 # from cupy._core import internal
-from dpnp.tests.third_party.cupy import testing
 
 pytest.skip(
     "CuPy internal functions are not supported", allow_module_level=True
 )
 
 
-class TestProd(unittest.TestCase):
+class TestProd:
 
     def test_empty(self):
         assert internal.prod([]) == 1
@@ -24,7 +22,7 @@ def test_two(self):
         assert internal.prod([2, 3]) == 6
 
 
-class TestProdSequence(unittest.TestCase):
+class TestProdSequence:
 
     def test_empty(self):
         assert internal.prod_sequence(()) == 1
@@ -74,7 +72,7 @@ def test_float(self):
         assert internal.get_size(1.0) == (1.0,)
 
 
-class TestVectorEqual(unittest.TestCase):
+class TestVectorEqual:
 
     def test_empty(self):
         assert internal.vector_equal([], []) is True
@@ -89,7 +87,7 @@ def test_different_size(self):
         assert internal.vector_equal([1, 2, 3], [1, 2]) is False
 
 
-class TestGetCContiguity(unittest.TestCase):
+class TestGetCContiguity:
 
     def test_zero_in_shape(self):
         assert internal.get_c_contiguity((1, 0, 1), (1, 1, 1), 3)
@@ -122,134 +120,101 @@ def test_no_contiguous3(self):
         assert not internal.get_c_contiguity((3, 1, 3), (6, 6, 4), 2)
 
 
-class TestInferUnknownDimension(unittest.TestCase):
+class TestInferUnknownDimension:
 
     def test_known_all(self):
         assert internal.infer_unknown_dimension((1, 2, 3), 6) == [1, 2, 3]
 
     def test_multiple_unknown(self):
-        with self.assertRaises(ValueError):
+        with pytest.raises(ValueError):
             internal.infer_unknown_dimension((-1, 1, -1), 10)
 
     def test_infer(self):
         assert internal.infer_unknown_dimension((-1, 2, 3), 12) == [2, 2, 3]
 
 
-@testing.parameterize(
-    {"slice": (2, 8, 1), "expect": (2, 8, 1)},
-    {"slice": (2, None, 1), "expect": (2, 10, 1)},
-    {"slice": (2, 1, 1), "expect": (2, 2, 1)},
-    {"slice": (2, -1, 1), "expect": (2, 9, 1)},
-    {"slice": (None, 8, 1), "expect": (0, 8, 1)},
-    {"slice": (-3, 8, 1), "expect": (7, 8, 1)},
-    {"slice": (11, 8, 1), "expect": (10, 10, 1)},
-    {"slice": (11, 11, 1), "expect": (10, 10, 1)},
-    {"slice": (-11, 8, 1), "expect": (0, 8, 1)},
-    {"slice": (-11, -11, 1), "expect": (0, 0, 1)},
-    {"slice": (8, 2, -1), "expect": (8, 2, -1)},
-    {"slice": (8, None, -1), "expect": (8, -1, -1)},
-    {"slice": (8, 9, -1), "expect": (8, 8, -1)},
-    {"slice": (8, -3, -1), "expect": (8, 7, -1)},
-    {"slice": (None, 8, -1), "expect": (9, 8, -1)},
-    {"slice": (-3, 6, -1), "expect": (7, 6, -1)},
-    {"slice": (10, 10, -1), "expect": (9, 9, -1)},
-    {"slice": (10, 8, -1), "expect": (9, 8, -1)},
-    {"slice": (9, 10, -1), "expect": (9, 9, -1)},
-    {"slice": (9, 9, -1), "expect": (9, 9, -1)},
-    {"slice": (9, 8, -1), "expect": (9, 8, -1)},
-    {"slice": (8, 8, -1), "expect": (8, 8, -1)},
-    {"slice": (-9, -8, -1), "expect": (1, 1, -1)},
-    {"slice": (-9, -9, -1), "expect": (1, 1, -1)},
-    {"slice": (-9, -10, -1), "expect": (1, 0, -1)},
-    {"slice": (-9, -11, -1), "expect": (1, -1, -1)},
-    {"slice": (-9, -12, -1), "expect": (1, -1, -1)},
-    {"slice": (-10, -9, -1), "expect": (0, 0, -1)},
-    {"slice": (-10, -10, -1), "expect": (0, 0, -1)},
-    {"slice": (-10, -11, -1), "expect": (0, -1, -1)},
-    {"slice": (-10, -12, -1), "expect": (0, -1, -1)},
-    {"slice": (-11, 8, -1), "expect": (-1, -1, -1)},
-    {"slice": (-11, -9, -1), "expect": (-1, -1, -1)},
-    {"slice": (-11, -10, -1), "expect": (-1, -1, -1)},
-    {"slice": (-11, -11, -1), "expect": (-1, -1, -1)},
-    {"slice": (-11, -12, -1), "expect": (-1, -1, -1)},
+@pytest.mark.parametrize(
+    ("slice_", "expect"),
+    [
+        ((2, 8, 1), (2, 8, 1)),
+        ((2, None, 1), (2, 10, 1)),
+        ((2, 1, 1), (2, 2, 1)),
+        ((2, -1, 1), (2, 9, 1)),
+        ((None, 8, 1), (0, 8, 1)),
+        ((-3, 8, 1), (7, 8, 1)),
+        ((11, 8, 1), (10, 10, 1)),
+        ((11, 11, 1), (10, 10, 1)),
+        ((-11, 8, 1), (0, 8, 1)),
+        ((-11, -11, 1), (0, 0, 1)),
+        ((8, 2, -1), (8, 2, -1)),
+        ((8, None, -1), (8, -1, -1)),
+        ((8, 9, -1), (8, 8, -1)),
+        ((8, -3, -1), (8, 7, -1)),
+        ((None, 8, -1), (9, 8, -1)),
+        ((-3, 6, -1), (7, 6, -1)),
+        ((10, 10, -1), (9, 9, -1)),
+        ((10, 8, -1), (9, 8, -1)),
+        ((9, 10, -1), (9, 9, -1)),
+        ((9, 9, -1), (9, 9, -1)),
+        ((9, 8, -1), (9, 8, -1)),
+        ((8, 8, -1), (8, 8, -1)),
+        ((-9, -8, -1), (1, 1, -1)),
+        ((-9, -9, -1), (1, 1, -1)),
+        ((-9, -10, -1), (1, 0, -1)),
+        ((-9, -11, -1), (1, -1, -1)),
+        ((-9, -12, -1), (1, -1, -1)),
+        ((-10, -9, -1), (0, 0, -1)),
+        ((-10, -10, -1), (0, 0, -1)),
+        ((-10, -11, -1), (0, -1, -1)),
+        ((-10, -12, -1), (0, -1, -1)),
+        ((-11, 8, -1), (-1, -1, -1)),
+        ((-11, -9, -1), (-1, -1, -1)),
+        ((-11, -10, -1), (-1, -1, -1)),
+        ((-11, -11, -1), (-1, -1, -1)),
+        ((-11, -12, -1), (-1, -1, -1)),
+    ],
 )
-class TestCompleteSlice(unittest.TestCase):
+def test_complete_slice(slice_, expect):
+    assert internal.complete_slice(slice(*slice_), 10) == slice(*expect)
 
-    def test_complete_slice(self):
-        assert internal.complete_slice(slice(*self.slice), 10) == slice(
-            *self.expect
-        )
 
-
-class TestCompleteSliceError(unittest.TestCase):
+class TestCompleteSliceError:
 
     def test_invalid_step_value(self):
-        with self.assertRaises(ValueError):
+        with pytest.raises(ValueError):
             internal.complete_slice(slice(1, 1, 0), 1)
 
     def test_invalid_step_type(self):
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             internal.complete_slice(slice(1, 1, (1, 2)), 1)
 
     def test_invalid_start_type(self):
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             internal.complete_slice(slice((1, 2), 1, 1), 1)
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             internal.complete_slice(slice((1, 2), 1, -1), 1)
 
     def test_invalid_stop_type(self):
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             internal.complete_slice(slice((1, 2), 1, 1), 1)
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             internal.complete_slice(slice((1, 2), 1, -1), 1)
 
 
-@testing.parameterize(
-    {"x": 0, "expect": 0},
-    {"x": 1, "expect": 1},
-    {"x": 2, "expect": 2},
-    {"x": 3, "expect": 4},
-    {"x": 2**10, "expect": 2**10},
-    {"x": 2**10 - 1, "expect": 2**10},
-    {"x": 2**10 + 1, "expect": 2**11},
-    {"x": 2**40, "expect": 2**40},
-    {"x": 2**40 - 1, "expect": 2**40},
-    {"x": 2**40 + 1, "expect": 2**41},
+@pytest.mark.parametrize(
+    ("x", "expect"),
+    [
+        (0, 0),
+        (1, 1),
+        (2, 2),
+        (3, 4),
+        (2**10, 2**10),
+        (2**10 - 1, 2**10),
+        (2**10 + 1, 2**11),
+        (2**40, 2**40),
+        (2**40 - 1, 2**40),
+        (2**40 + 1, 2**41),
+    ],
 )
-class TestClp2(unittest.TestCase):
-
-    def test_clp2(self):
-        assert internal.clp2(self.x) == self.expect
-
-
-@testing.parameterize(
-    *testing.product(
-        {
-            "value": [
-                0.0,
-                1.0,
-                -1.0,
-                0.25,
-                -0.25,
-                11.0,
-                -11.0,
-                2**-15,
-                -(2**-15),  # Denormalized Number
-                float("inf"),
-                float("-inf"),
-            ],
-        }
-    )
-)
-class TestConvertFloat16(unittest.TestCase):
-
-    def test_conversion(self):
-        half = internal.to_float16(self.value)
-        assert internal.from_float16(half) == self.value
-
-
-class TestConvertFloat16Nan(unittest.TestCase):
-
-    def test_conversion(self):
-        half = internal.to_float16(float("nan"))
-        assert math.isnan(internal.from_float16(half))
+def test_clp2(x, expect):
+    assert internal.clp2(x) == expect
diff --git a/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py b/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py
index 3790eae96462..69873473e0d7 100644
--- a/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py
+++ b/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import functools
 import math
 import unittest
@@ -226,8 +228,8 @@ def test_linspace_mixed_start_stop2(self, xp, dtype_range, dtype_out):
         # TODO (ev-br): np 2.0: had to bump the default rtol on Windows
         #               and numpy 1.26+weak promotion from 0 to 5e-6
         if xp.dtype(dtype_range).kind == "u":
-            # to avoid overflow, limit `val` to be smaller
-            # than xp.iinfo(dtype).max
+            # to avoid overflow, limit `val` to be smaller than
+            # xp.iinfo(dtype).max (TODO: check if dpctl-2230 resolves that)
             if dtype_range in [xp.uint8, xp.uint16] or dtype_out in [
                 xp.int8,
                 xp.uint8,
diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_cache.py b/dpnp/tests/third_party/cupy/fft_tests/test_cache.py
index e32f8e8305a0..29e0a7724edf 100644
--- a/dpnp/tests/third_party/cupy/fft_tests/test_cache.py
+++ b/dpnp/tests/third_party/cupy/fft_tests/test_cache.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import contextlib
 import io
 import queue
@@ -14,7 +16,7 @@
 # from cupy.cuda import runtime
 # from cupy.fft import config
 
-# from .test_fft import (multi_gpu_config, _skip_multi_gpu_bug)
+# from .test_fft import multi_gpu_config
 
 pytest.skip("FFT cache functions are not supported", allow_module_level=True)
 
@@ -30,26 +32,29 @@ def intercept_stdout(func):
 
 
 class TestPlanCache(unittest.TestCase):
-    def setUp(self):
-        self.caches = []
-        self.old_sizes = []
+    @contextlib.contextmanager
+    @staticmethod
+    def prepare_and_restore_caches():
+        old_sizes = []
         for i in range(n_devices):
             with device.Device(i):
                 cache = config.get_plan_cache()
-                self.old_sizes.append(cache.get_size())
+                old_sizes.append(cache.get_size())
                 cache.clear()
                 cache.set_memsize(-1)
                 cache.set_size(2)
-            self.caches.append(cache)
 
-    def tearDown(self):
-        for i in range(n_devices):
-            with device.Device(i):
-                cache = config.get_plan_cache()
-                cache.clear()
-                cache.set_size(self.old_sizes[i])
-                cache.set_memsize(-1)
+        try:
+            yield
+        finally:
+            for i in range(n_devices):
+                with device.Device(i):
+                    cache = config.get_plan_cache()
+                    cache.clear()
+                    cache.set_size(old_sizes[i])
+                    cache.set_memsize(-1)
 
+    @prepare_and_restore_caches()
     def test_LRU_cache1(self):
         # test if insertion and clean-up works
         cache = config.get_plan_cache()
@@ -62,6 +67,7 @@ def test_LRU_cache1(self):
         cache.clear()
         assert cache.get_curr_size() == 0 <= cache.get_size()
 
+    @prepare_and_restore_caches()
     def test_LRU_cache2(self):
         # test if plan is reused
         cache = config.get_plan_cache()
@@ -83,6 +89,7 @@ def test_LRU_cache2(self):
         # we should get the same plan
         assert plan0 is plan1
 
+    @prepare_and_restore_caches()
     def test_LRU_cache3(self):
         # test if cache size is limited
         cache = config.get_plan_cache()
@@ -108,6 +115,7 @@ def test_LRU_cache3(self):
         for _, node in cache:
             assert plan is not node.plan
 
+    @prepare_and_restore_caches()
     def test_LRU_cache4(self):
         # test if fetching the plan will reorder it to the top
         cache = config.get_plan_cache()
@@ -149,6 +157,8 @@ def test_LRU_cache4(self):
             cache[next(iterator)[0]]
 
     @testing.multi_gpu(2)
+    @prepare_and_restore_caches()
+    @pytest.mark.thread_unsafe(reason="intercepts stdout")
     def test_LRU_cache5(self):
         # test if the LRU cache is thread-local
 
@@ -210,10 +220,13 @@ def thread_init_caches(gpus, queue):
         assert stdout.count("uninitialized") == n_devices - 2
 
     @testing.multi_gpu(2)
-    def test_LRU_cache6(self):
+    @prepare_and_restore_caches()
+    def test_LRU_cache6(self, gpus=None):
         # test if each device has a separate cache
-        cache0 = self.caches[0]
-        cache1 = self.caches[1]
+        with device.Device(0):
+            cache0 = config.get_plan_cache()
+        with device.Device(1):
+            cache1 = config.get_plan_cache()
 
         # ensure a fresh state
         assert cache0.get_curr_size() == 0 <= cache0.get_size()
@@ -247,10 +260,13 @@ def test_LRU_cache6(self):
     @pytest.mark.skipif(
         runtime.is_hip, reason="hipFFT doesn't support multi-GPU"
     )
-    def test_LRU_cache7(self):
+    @prepare_and_restore_caches()
+    def test_LRU_cache7(self, gpus=None):
         # test accessing a multi-GPU plan
-        cache0 = self.caches[0]
-        cache1 = self.caches[1]
+        with device.Device(0):
+            cache0 = config.get_plan_cache()
+        with device.Device(1):
+            cache1 = config.get_plan_cache()
 
         # ensure a fresh state
         assert cache0.get_curr_size() == 0 <= cache0.get_size()
@@ -319,6 +335,7 @@ def test_LRU_cache7(self):
         assert cache0.get_curr_size() == 1 <= cache0.get_size()
         assert cache1.get_curr_size() == 2 <= cache1.get_size()
 
+    @prepare_and_restore_caches()
     def test_LRU_cache8(self):
         # test if Plan1d and PlanNd can coexist in the same cache
         cache = config.get_plan_cache()
@@ -340,6 +357,7 @@ def test_LRU_cache8(self):
         assert isinstance(next(iterator)[1].plan, cufft.PlanNd)
         assert isinstance(next(iterator)[1].plan, cufft.Plan1d)
 
+    @prepare_and_restore_caches()
     def test_LRU_cache9(self):
         # test if memsizes in the cache adds up
         cache = config.get_plan_cache()
@@ -358,6 +376,8 @@ def test_LRU_cache9(self):
 
         assert memsize == cache.get_curr_memsize()
 
+    @prepare_and_restore_caches()
+    @pytest.mark.thread_unsafe(reason="intercepts stdout")
     def test_LRU_cache10(self):
         # test if deletion works and if show_info() is consistent with data
         cache = config.get_plan_cache()
@@ -406,11 +426,13 @@ def test_LRU_cache10(self):
     @pytest.mark.skipif(
         runtime.is_hip, reason="hipFFT doesn't support multi-GPU"
     )
+    @prepare_and_restore_caches()
     def test_LRU_cache11(self):
         # test if collectively deleting a multi-GPU plan works
-        _skip_multi_gpu_bug((128,), self.gpus)
-        cache0 = self.caches[0]
-        cache1 = self.caches[1]
+        with device.Device(0):
+            cache0 = config.get_plan_cache()
+        with device.Device(1):
+            cache1 = config.get_plan_cache()
 
         # ensure a fresh state
         assert cache0.get_curr_size() == 0 <= cache0.get_size()
@@ -441,11 +463,14 @@ def test_LRU_cache11(self):
     @pytest.mark.skipif(
         runtime.is_hip, reason="hipFFT doesn't support multi-GPU"
     )
+    @prepare_and_restore_caches()
     def test_LRU_cache12(self):
         # test if an error is raise when one of the caches is unable
         # to fit it a multi-GPU plan
-        cache0 = self.caches[0]
-        cache1 = self.caches[1]
+        with device.Device(0):
+            cache0 = config.get_plan_cache()
+        with device.Device(1):
+            cache1 = config.get_plan_cache()
 
         # ensure a fresh state
         assert cache0.get_curr_size() == 0 <= cache0.get_size()
@@ -467,6 +492,7 @@ def test_LRU_cache12(self):
         runtime.runtimeGetVersion() >= 11080,
         "CUDA 11.8 has different plan size",
     )
+    @prepare_and_restore_caches()
     def test_LRU_cache13(self):
         # test if plan insertion respect the memory size limit
         cache = config.get_plan_cache()
diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_callback.py b/dpnp/tests/third_party/cupy/fft_tests/test_callback.py
index 12bfd4aa73df..e2682d0aaa6e 100644
--- a/dpnp/tests/third_party/cupy/fft_tests/test_callback.py
+++ b/dpnp/tests/third_party/cupy/fft_tests/test_callback.py
@@ -1,18 +1,62 @@
+from __future__ import annotations
+
 import contextlib
+import os
 import string
 import sys
 import tempfile
 from unittest import mock
 
+try:
+    import Cython
+except ImportError:
+    Cython = None
+else:
+    if Cython.__version__ < "0.29.0":
+        Cython = None
 import numpy as np
 import pytest
 
 import dpnp as cupy
 from dpnp.tests.third_party.cupy import testing
 
+# from cupy.cuda import cufft
+# from cupy.cuda.device import get_compute_capability
+
 pytest.skip("FFT callbacks are not supported", allow_module_level=True)
 
 
+def cuda_version():
+    return cupy.cuda.runtime.runtimeGetVersion()
+
+
+cb_ver_for_test = ("legacy", "jit")
+
+
+def check_should_skip_legacy_test():
+    if not sys.platform.startswith("linux"):
+        pytest.skip("legacy callbacks are only supported on Linux")
+    if Cython is None:
+        pytest.skip("no working Cython")
+    if "LD_PRELOAD" in os.environ:
+        pytest.skip(
+            "legacy callback does not work if libcufft.so " "is preloaded"
+        )
+    if cufft.getVersion() >= 12000 and get_compute_capability() == "75":
+        pytest.skip(
+            "cuFFT legacy callbacks in CUDA 13.0+ do not support " "cc 7.5"
+        )
+    if cufft.getVersion() == 11303 and get_compute_capability() == "120":
+        pytest.skip(
+            "cuFFT legacy callbacks in CUDA 12.8.0 do not support " "cc 12.0"
+        )
+
+
+def check_should_skip_jit_test():
+    if cufft.getVersion() < 11303:
+        pytest.skip("JIT callbacks require cuFFT from CUDA 12.8+")
+
+
 @contextlib.contextmanager
 def use_temporary_cache_dir():
     target = "cupy.fft._callback.get_cache_dir"
@@ -21,45 +65,50 @@ def use_temporary_cache_dir():
             yield path
 
 
+suppress_legacy_warning = pytest.mark.filterwarnings(
+    "ignore:.*legacy callback.*:DeprecationWarning"
+)
+
+
 _load_callback = r"""
-__device__ ${data_type} CB_ConvertInput(
-    void* dataIn, size_t offset, void* callerInfo, void* sharedPtr)
+__device__ ${data_type} ${cb_name}(
+    void* dataIn, ${offset_type} offset, void* callerInfo, void* sharedPtr)
 {
     ${data_type} x = ((${data_type}*)dataIn)[offset];
     ${element} *= 2.5;
     return x;
 }
 
-__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput;
+__device__ ${load_type} d_loadCallbackPtr = ${cb_name};
 """
 
 _load_callback_with_aux = r"""
-__device__ ${data_type} CB_ConvertInput(
-    void* dataIn, size_t offset, void* callerInfo, void* sharedPtr)
+__device__ ${data_type} ${cb_name}(
+    void* dataIn, ${offset_type} offset, void* callerInfo, void* sharedPtr)
 {
     ${data_type} x = ((${data_type}*)dataIn)[offset];
     ${element} *= *((${aux_type}*)callerInfo);
     return x;
 }
 
-__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput;
+__device__ ${load_type} d_loadCallbackPtr = ${cb_name};
 """
 
 _load_callback_with_aux2 = r"""
-__device__ ${data_type} CB_ConvertInput(
-    void* dataIn, size_t offset, void* callerInfo, void* sharedPtr)
+__device__ ${data_type} ${cb_name}(
+    void* dataIn, ${offset_type} offset, void* callerInfo, void* sharedPtr)
 {
     ${data_type} x = ((${data_type}*)dataIn)[offset];
     ${element} *= ((${aux_type}*)callerInfo)[offset];
     return x;
 }
 
-__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput;
+__device__ ${load_type} d_loadCallbackPtr = ${cb_name};
 """
 
 _store_callback = r"""
-__device__ void CB_ConvertOutput(
-    void *dataOut, size_t offset, ${data_type} element,
+__device__ void ${cb_name}(
+    void *dataOut, ${offset_type} offset, ${data_type} element,
     void *callerInfo, void *sharedPointer)
 {
     ${data_type} x = element;
@@ -67,12 +116,12 @@ def use_temporary_cache_dir():
     ((${data_type}*)dataOut)[offset] = x;
 }
 
-__device__ ${store_type} d_storeCallbackPtr = CB_ConvertOutput;
+__device__ ${store_type} d_storeCallbackPtr = ${cb_name};
 """
 
 _store_callback_with_aux = r"""
-__device__ void CB_ConvertOutput(
-    void *dataOut, size_t offset, ${data_type} element,
+__device__ void ${cb_name}(
+    void *dataOut, ${offset_type} offset, ${data_type} element,
     void *callerInfo, void *sharedPointer)
 {
     ${data_type} x = element;
@@ -80,26 +129,209 @@ def use_temporary_cache_dir():
     ((${data_type}*)dataOut)[offset] = x;
 }
 
-__device__ ${store_type} d_storeCallbackPtr = CB_ConvertOutput;
+__device__ ${store_type} d_storeCallbackPtr = ${cb_name};
 """
 
 
-def _set_load_cb(code, element, data_type, callback_type, aux_type=None):
-    return string.Template(code).substitute(
+def _set_load_cb(
+    code,
+    element,
+    data_type,
+    callback_type,
+    callback_name,
+    aux_type=None,
+    cb_ver="",
+):
+    if cb_ver == "jit":
+        callback_type = callback_type.replace(
+            "cufftCallback", "cufftJITCallback"
+        )
+    callback = string.Template(code).substitute(
         data_type=data_type,
         aux_type=aux_type,
         load_type=callback_type,
+        cb_name=callback_name,
         element=element,
+        offset_type=("size_t" if cb_ver == "legacy" else "unsigned long long"),
     )
-
-
-def _set_store_cb(code, element, data_type, callback_type, aux_type=None):
-    return string.Template(code).substitute(
+    if cb_ver == "jit":
+        callback = "#include <cufftXt.h>\n\n" + callback
+    return callback
+
+
+def _set_store_cb(
+    code,
+    element,
+    data_type,
+    callback_type,
+    callback_name,
+    aux_type=None,
+    cb_ver="",
+):
+    if cb_ver == "jit":
+        callback_type = callback_type.replace(
+            "cufftCallback", "cufftJITCallback"
+        )
+    callback = string.Template(code).substitute(
         data_type=data_type,
         aux_type=aux_type,
         store_type=callback_type,
+        cb_name=callback_name,
         element=element,
+        offset_type=("size_t" if cb_ver == "legacy" else "unsigned long long"),
     )
+    if cb_ver == "jit":
+        callback = "#include <cufftXt.h>\n\n" + callback
+    return callback
+
+
+# Note: this class is place here instead of at the end of this file, because
+# pytest does not reset warnings internally, and other tests would suppress
+# the warnings such that at the end we have no warnings to capture, but we want
+# to ensure warnings are raised.
+@pytest.mark.skipif(
+    cupy.cuda.runtime.is_hip, reason="hipFFT does not support callbacks"
+)
+class TestInputValidationWith1dCallbacks:
+
+    shape = (10,)
+    norm = "ortho"
+    dtype = np.complex64
+
+    @classmethod
+    def setup_class(cls):
+        # All tests in this class use a temporary cache dir (also if threaded)
+        with use_temporary_cache_dir():
+            yield
+
+    def test_fft_load_legacy(self):
+        check_should_skip_legacy_test()
+
+        fft = cupy.fft.fft
+        code = _load_callback
+        types = (
+            "x.x",
+            "cufftComplex",
+            "cufftCallbackLoadC",
+            "cufftJITCallbackLoadComplex",
+        )
+        cb_load = _set_load_cb(code, *types, cb_ver="legacy")
+
+        a = testing.shaped_random(self.shape, cupy, self.dtype)
+        with pytest.deprecated_call(
+            match="legacy callback is considered deprecated"
+        ):
+            with cupy.fft.config.set_cufft_callbacks(
+                cb_load=cb_load, cb_ver="legacy"
+            ):
+                fft(a, norm=self.norm)
+
+    def test_fft_load_jit_no_name(self):
+        check_should_skip_jit_test()
+
+        fft = cupy.fft.fft
+        code = _load_callback
+        types = (
+            "x.x",
+            "cufftComplex",
+            "cufftCallbackLoadC",
+            "cufftJITCallbackLoadComplex",
+        )
+        cb_load = _set_load_cb(code, *types, cb_ver="jit")
+
+        a = testing.shaped_random(self.shape, cupy, self.dtype)
+        # We omit passing cb_load_name. The test infra setup would check
+        # if we can infer it correctly.
+        with cupy.fft.config.set_cufft_callbacks(cb_load=cb_load, cb_ver="jit"):
+            fft(a, norm=self.norm)
+
+    def test_fft_store_legacy(self):
+        check_should_skip_legacy_test()
+
+        fft = cupy.fft.fft
+        code = _store_callback
+        types = (
+            "x.y",
+            "cufftComplex",
+            "cufftCallbackStoreC",
+            "cufftJITCallbackStoreComplex",
+        )
+        cb_store = _set_store_cb(code, *types, cb_ver="legacy")
+
+        a = testing.shaped_random(self.shape, cupy, self.dtype)
+        with pytest.deprecated_call(
+            match="legacy callback is considered deprecated"
+        ):
+            with cupy.fft.config.set_cufft_callbacks(
+                cb_store=cb_store, cb_ver="legacy"
+            ):
+                fft(a, norm=self.norm)
+
+    def test_fft_store_jit_no_name(self):
+        check_should_skip_jit_test()
+
+        fft = cupy.fft.fft
+        code = _store_callback
+        types = (
+            "x.y",
+            "cufftComplex",
+            "cufftCallbackStoreC",
+            "cufftJITCallbackStoreComplex",
+        )
+        cb_store = _set_store_cb(code, *types, cb_ver="jit")
+
+        a = testing.shaped_random(self.shape, cupy, self.dtype)
+        # We omit passing cb_store_name. The test infra setup would check
+        # if we can infer it correctly.
+        with cupy.fft.config.set_cufft_callbacks(
+            cb_store=cb_store, cb_ver="jit"
+        ):
+            fft(a, norm=self.norm)
+
+    def test_fft_load_store_legacy_aux(self):
+        check_should_skip_legacy_test()
+
+        fft = cupy.fft.fft
+        dtype = self.dtype
+        load_code = _load_callback_with_aux
+        store_code = _store_callback_with_aux
+        load_aux = cupy.asarray(2.5, dtype=cupy.dtype(dtype).char.lower())
+        store_aux = cupy.asarray(3.8, dtype=cupy.dtype(dtype).char.lower())
+
+        load_types = (
+            "x.x",
+            "cufftComplex",
+            "cufftCallbackLoadC",
+            "cufftJITCallbackLoadComplex",
+            "float",
+        )
+        store_types = (
+            "x.y",
+            "cufftComplex",
+            "cufftCallbackStoreC",
+            "cufftJITCallbackStoreComplex",
+            "float",
+        )
+        cb_load = _set_load_cb(load_code, *load_types, cb_ver="legacy")
+        cb_store = _set_store_cb(store_code, *store_types, cb_ver="legacy")
+
+        a = testing.shaped_random(self.shape, cupy, self.dtype)
+        with (
+            pytest.deprecated_call(
+                match="cb_load_aux_arr or cb_store_aux_arr is deprecated"
+            ),
+            pytest.deprecated_call(
+                match="legacy callback is considered deprecated"
+            ),
+        ):
+            with cupy.fft.config.set_cufft_callbacks(
+                cb_load=cb_load,
+                cb_store=cb_store,
+                cb_load_aux_arr=load_aux,
+                cb_store_aux_arr=store_aux,
+                cb_ver="legacy",
+            ):
+                fft(a, norm=self.norm)
 
 
 @testing.parameterize(
@@ -108,31 +340,60 @@ def _set_store_cb(code, element, data_type, callback_type, aux_type=None):
             "n": [None, 5, 10, 15],
             "shape": [(10, 7), (10,), (10, 10)],
             "norm": [None, "ortho"],
+            "cb_ver": cb_ver_for_test,
         }
     )
 )
-@testing.with_requires("cython>=0.29.0")
-@pytest.mark.skipif(
-    not sys.platform.startswith("linux"),
-    reason="callbacks are only supported on Linux",
-)
 @pytest.mark.skipif(
     cupy.cuda.runtime.is_hip, reason="hipFFT does not support callbacks"
 )
 class Test1dCallbacks:
 
+    @classmethod
+    def setup_class(cls):
+        # All tests in this class use a temporary cache dir (also if threaded)
+        with use_temporary_cache_dir():
+            yield
+
     def _test_load_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
+        # for simplicity we use the JIT callback names for both legacy/jit
         fft = getattr(xp.fft, fft_func)
         code = _load_callback
         if dtype == np.complex64:
-            types = ("x.x", "cufftComplex", "cufftCallbackLoadC")
+            types = (
+                "x.x",
+                "cufftComplex",
+                "cufftCallbackLoadC",
+                "cufftJITCallbackLoadComplex",
+            )
         elif dtype == np.complex128:
-            types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ")
+            types = (
+                "x.x",
+                "cufftDoubleComplex",
+                "cufftCallbackLoadZ",
+                "cufftJITCallbackLoadDoubleComplex",
+            )
         elif dtype == np.float32:
-            types = ("x", "cufftReal", "cufftCallbackLoadR")
-        else:
-            types = ("x", "cufftDoubleReal", "cufftCallbackLoadD")
-        cb_load = _set_load_cb(code, *types)
+            types = (
+                "x",
+                "cufftReal",
+                "cufftCallbackLoadR",
+                "cufftJITCallbackLoadReal",
+            )
+        else:  # float64
+            types = (
+                "x",
+                "cufftDoubleReal",
+                "cufftCallbackLoadD",
+                "cufftJITCallbackLoadDoubleReal",
+            )
+        cb_load = _set_load_cb(code, *types, cb_ver=self.cb_ver)
+        cb_load_name = types[-1] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -144,50 +405,93 @@ def _test_load_helper(self, xp, dtype, fft_func):
                 else:
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(cb_load=cb_load):
-                    out = fft(a, n=self.n, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_load=cb_load, cb_load_name=cb_load_name, cb_ver=self.cb_ver
+            ):
+                out = fft(a, n=self.n, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_fft_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "fft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_ifft_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "ifft")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_rfft_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "rfft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_irfft_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "irfft")
 
     def _test_store_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
         fft = getattr(xp.fft, fft_func)
         code = _store_callback
+
+        # for simplicity we use the JIT callback names for both legacy/jit
         if dtype == np.complex64:
             if fft_func != "irfft":
-                types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
-            else:
-                types = ("x", "cufftReal", "cufftCallbackStoreR")
+                types = (
+                    "x.y",
+                    "cufftComplex",
+                    "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
+                )
+            else:  # float32 for irfft
+                types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackStoreR",
+                    "cufftJITCallbackStoreReal",
+                )
         elif dtype == np.complex128:
             if fft_func != "irfft":
-                types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ")
-            else:
-                types = ("x", "cufftDoubleReal", "cufftCallbackStoreD")
+                types = (
+                    "x.y",
+                    "cufftDoubleComplex",
+                    "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
+                )
+            else:  # float64 for irfft
+                types = (
+                    "x",
+                    "cufftDoubleReal",
+                    "cufftCallbackStoreD",
+                    "cufftJITCallbackStoreDoubleReal",
+                )
         elif dtype == np.float32:
-            types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
+            types = (
+                "x.y",
+                "cufftComplex",
+                "cufftCallbackStoreC",
+                "cufftJITCallbackStoreComplex",
+            )
         elif dtype == np.float64:
-            types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ")
-        cb_store = _set_store_cb(code, *types)
+            types = (
+                "x.y",
+                "cufftDoubleComplex",
+                "cufftCallbackStoreZ",
+                "cufftJITCallbackStoreDoubleComplex",
+            )
+        cb_store = _set_store_cb(code, *types, cb_ver=self.cb_ver)
+        cb_store_name = types[-1] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -201,67 +505,134 @@ def _test_store_helper(self, xp, dtype, fft_func):
                 if dtype in (np.float32, np.complex64):
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(cb_store=cb_store):
-                    out = fft(a, n=self.n, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_store=cb_store,
+                cb_store_name=cb_store_name,
+                cb_ver=self.cb_ver,
+            ):
+                out = fft(a, n=self.n, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_fft_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "fft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_ifft_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "ifft")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_rfft_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "rfft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_irfft_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "irfft")
 
     def _test_load_store_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
+        # for simplicity we use the JIT callback names for both legacy/jit
         fft = getattr(xp.fft, fft_func)
         load_code = _load_callback
         store_code = _store_callback
         if fft_func in ("fft", "ifft"):
             if dtype == np.complex64:
-                load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC")
-                store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
-            else:
-                load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ")
+                load_types = (
+                    "x.x",
+                    "cufftComplex",
+                    "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
+                )
+                store_types = (
+                    "x.y",
+                    "cufftComplex",
+                    "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
+                )
+            else:  # complex128
+                load_types = (
+                    "x.x",
+                    "cufftDoubleComplex",
+                    "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
+                )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                 )
         elif fft_func == "rfft":
             if dtype == np.float32:
-                load_types = ("x", "cufftReal", "cufftCallbackLoadR")
-                store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
-            else:
-                load_types = ("x", "cufftDoubleReal", "cufftCallbackLoadD")
+                load_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackLoadR",
+                    "cufftJITCallbackLoadReal",
+                )
+                store_types = (
+                    "x.y",
+                    "cufftComplex",
+                    "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
+                )
+            else:  # float64
+                load_types = (
+                    "x",
+                    "cufftDoubleReal",
+                    "cufftCallbackLoadD",
+                    "cufftJITCallbackLoadDoubleReal",
+                )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                 )
         else:  # irfft
             if dtype == np.complex64:
-                load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC")
-                store_types = ("x", "cufftReal", "cufftCallbackStoreR")
-            else:
-                load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ")
-                store_types = ("x", "cufftDoubleReal", "cufftCallbackStoreD")
-        cb_load = _set_load_cb(load_code, *load_types)
-        cb_store = _set_store_cb(store_code, *store_types)
+                load_types = (
+                    "x.x",
+                    "cufftComplex",
+                    "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
+                )
+                store_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackStoreR",
+                    "cufftJITCallbackStoreReal",
+                )
+            else:  # complex128
+                load_types = (
+                    "x.x",
+                    "cufftDoubleComplex",
+                    "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
+                )
+                store_types = (
+                    "x",
+                    "cufftDoubleReal",
+                    "cufftCallbackStoreD",
+                    "cufftJITCallbackStoreDoubleReal",
+                )
+        cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver)
+        cb_load_name = load_types[-1] if self.cb_ver == "jit" else None
+        cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver)
+        cb_store_name = store_types[-1] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -276,47 +647,71 @@ def _test_load_store_helper(self, xp, dtype, fft_func):
                 if dtype in (np.float32, np.complex64):
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(
-                    cb_load=cb_load, cb_store=cb_store
-                ):
-                    out = fft(a, n=self.n, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_load=cb_load,
+                cb_load_name=cb_load_name,
+                cb_store=cb_store,
+                cb_store_name=cb_store_name,
+                cb_ver=self.cb_ver,
+            ):
+                out = fft(a, n=self.n, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_fft_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "fft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_ifft_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "ifft")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_rfft_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "rfft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_irfft_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "irfft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_fft_load_aux(self, xp, dtype):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
         fft = xp.fft.fft
         c = _load_callback_with_aux2
+        # for simplicity we use the JIT callback names for both legacy/jit
         if dtype == np.complex64:
-            cb_load = _set_load_cb(
-                c, "x.x", "cufftComplex", "cufftCallbackLoadC", "float"
+            types = (
+                "x.x",
+                "cufftComplex",
+                "cufftCallbackLoadC",
+                "cufftJITCallbackLoadComplex",
+                "float",
             )
-        else:
-            cb_load = _set_load_cb(
-                c, "x.x", "cufftDoubleComplex", "cufftCallbackLoadZ", "double"
+        else:  # complex128
+            types = (
+                "x.x",
+                "cufftDoubleComplex",
+                "cufftCallbackLoadZ",
+                "cufftJITCallbackLoadDoubleComplex",
+                "double",
             )
+        cb_load = _set_load_cb(c, *types, cb_ver=self.cb_ver)
+        cb_load_name = types[3] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         out_last = self.n if self.n is not None else self.shape[-1]
@@ -333,15 +728,22 @@ def test_fft_load_aux(self, xp, dtype):
             if dtype in (np.float32, np.complex64):
                 out = out.astype(np.complex64)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(
-                    cb_load=cb_load, cb_load_aux_arr=b
-                ):
-                    out = fft(a, n=self.n, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_load=cb_load,
+                cb_load_name=cb_load_name,
+                cb_load_data=b.data,
+                cb_ver=self.cb_ver,
+            ):
+                out = fft(a, n=self.n, norm=self.norm)
 
         return out
 
     def _test_load_store_aux_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
         fft = getattr(xp.fft, fft_func)
         load_code = _load_callback_with_aux
         store_code = _store_callback_with_aux
@@ -349,53 +751,67 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func):
             load_aux = xp.asarray(2.5, dtype=xp.dtype(dtype).char.lower())
             store_aux = xp.asarray(3.8, dtype=xp.dtype(dtype).char.lower())
 
+        # for simplicity we use the JIT callback names for both legacy/jit
         if fft_func in ("fft", "ifft"):
             if dtype == np.complex64:
                 load_types = (
                     "x.x",
                     "cufftComplex",
                     "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
                     "float",
                 )
                 store_types = (
                     "x.y",
                     "cufftComplex",
                     "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
                     "float",
                 )
-            else:
+            else:  # complex128
                 load_types = (
                     "x.x",
                     "cufftDoubleComplex",
                     "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
                     "double",
                 )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                     "double",
                 )
         elif fft_func == "rfft":
             if dtype == np.float32:
-                load_types = ("x", "cufftReal", "cufftCallbackLoadR", "float")
+                load_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackLoadR",
+                    "cufftJITCallbackLoadReal",
+                    "float",
+                )
                 store_types = (
                     "x.y",
                     "cufftComplex",
                     "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
                     "float",
                 )
-            else:
+            else:  # float64
                 load_types = (
                     "x",
                     "cufftDoubleReal",
                     "cufftCallbackLoadD",
+                    "cufftJITCallbackLoadDoubleReal",
                     "double",
                 )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                     "double",
                 )
         else:  # irfft
@@ -404,24 +820,35 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func):
                     "x.x",
                     "cufftComplex",
                     "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
                     "float",
                 )
-                store_types = ("x", "cufftReal", "cufftCallbackStoreR", "float")
-            else:
+                store_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackStoreR",
+                    "cufftJITCallbackStoreReal",
+                    "float",
+                )
+            else:  # complex128
                 load_types = (
                     "x.x",
                     "cufftDoubleComplex",
                     "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
                     "double",
                 )
                 store_types = (
                     "x",
                     "cufftDoubleReal",
                     "cufftCallbackStoreD",
+                    "cufftJITCallbackStoreDoubleReal",
                     "double",
                 )
-        cb_load = _set_load_cb(load_code, *load_types)
-        cb_store = _set_store_cb(store_code, *store_types)
+        cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver)
+        cb_load_name = load_types[3] if self.cb_ver == "jit" else None
+        cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver)
+        cb_store_name = store_types[3] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -436,32 +863,38 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func):
                 if dtype in (np.float32, np.complex64):
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(
-                    cb_load=cb_load,
-                    cb_store=cb_store,
-                    cb_load_aux_arr=load_aux,
-                    cb_store_aux_arr=store_aux,
-                ):
-                    out = fft(a, n=self.n, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_load=cb_load,
+                cb_load_name=cb_load_name,
+                cb_store=cb_store,
+                cb_store_name=cb_store_name,
+                cb_load_data=load_aux.data,
+                cb_store_data=store_aux.data,
+                cb_ver=self.cb_ver,
+            ):
+                out = fft(a, n=self.n, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_fft_load_store_aux(self, xp, dtype):
         return self._test_load_store_aux_helper(xp, dtype, "fft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_ifft_load_store_aux(self, xp, dtype):
         return self._test_load_store_aux_helper(xp, dtype, "ifft")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_rfft_load_store_aux(self, xp, dtype):
         return self._test_load_store_aux_helper(xp, dtype, "rfft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_irfft_load_store_aux(self, xp, dtype):
@@ -469,38 +902,92 @@ def test_irfft_load_store_aux(self, xp, dtype):
 
 
 @testing.parameterize(
-    {"shape": (3, 4), "s": None, "axes": None, "norm": None},
-    {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1), "norm": None},
-    {"shape": (3, 4), "s": None, "axes": (-2, -1), "norm": None},
-    {"shape": (3, 4), "s": None, "axes": None, "norm": "ortho"},
-    {"shape": (2, 3, 4), "s": None, "axes": None, "norm": None},
-    {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1), "norm": None},
-    {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1), "norm": None},
-    {"shape": (2, 3, 4), "s": None, "axes": None, "norm": "ortho"},
-    {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2), "norm": "ortho"},
-)
-@testing.with_requires("cython>=0.29.0")
-@pytest.mark.skipif(
-    not sys.platform.startswith("linux"),
-    reason="callbacks are only supported on Linux",
+    *(
+        testing.product_dict(
+            [
+                {"shape": (3, 4), "s": None, "axes": None, "norm": None},
+                {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1), "norm": None},
+                {"shape": (3, 4), "s": None, "axes": (-2, -1), "norm": None},
+                {"shape": (3, 4), "s": None, "axes": None, "norm": "ortho"},
+                {"shape": (2, 3, 4), "s": None, "axes": None, "norm": None},
+                {
+                    "shape": (2, 3, 4),
+                    "s": (1, 4, 10),
+                    "axes": (-3, -2, -1),
+                    "norm": None,
+                },
+                {
+                    "shape": (2, 3, 4),
+                    "s": None,
+                    "axes": (-3, -2, -1),
+                    "norm": None,
+                },
+                {"shape": (2, 3, 4), "s": None, "axes": None, "norm": "ortho"},
+                {
+                    "shape": (2, 3, 4),
+                    "s": (2, 3),
+                    "axes": (0, 1, 2),
+                    "norm": "ortho",
+                },
+            ],
+            testing.product(
+                {
+                    "cb_ver": cb_ver_for_test,
+                },
+            ),
+        )
+    )
 )
 @pytest.mark.skipif(
     cupy.cuda.runtime.is_hip, reason="hipFFT does not support callbacks"
 )
 class TestNdCallbacks:
 
+    @classmethod
+    def setup_class(cls):
+        # All tests in this class use a temporary cache dir (also if threaded)
+        with use_temporary_cache_dir():
+            yield
+
     def _test_load_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
+        # for simplicity we use the JIT callback names for both legacy/jit
         fft = getattr(xp.fft, fft_func)
         load_code = _load_callback
         if dtype == np.complex64:
-            types = ("x.x", "cufftComplex", "cufftCallbackLoadC")
+            types = (
+                "x.x",
+                "cufftComplex",
+                "cufftCallbackLoadC",
+                "cufftJITCallbackLoadComplex",
+            )
         elif dtype == np.complex128:
-            types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ")
+            types = (
+                "x.x",
+                "cufftDoubleComplex",
+                "cufftCallbackLoadZ",
+                "cufftJITCallbackLoadDoubleComplex",
+            )
         elif dtype == np.float32:
-            types = ("x", "cufftReal", "cufftCallbackLoadR")
-        else:
-            types = ("x", "cufftDoubleReal", "cufftCallbackLoadD")
-        cb_load = _set_load_cb(load_code, *types)
+            types = (
+                "x",
+                "cufftReal",
+                "cufftCallbackLoadR",
+                "cufftJITCallbackLoadReal",
+            )
+        else:  # float64
+            types = (
+                "x",
+                "cufftDoubleReal",
+                "cufftCallbackLoadD",
+                "cufftJITCallbackLoadDoubleReal",
+            )
+        cb_load = _set_load_cb(load_code, *types, cb_ver=self.cb_ver)
+        cb_load_name = types[3] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -512,12 +999,14 @@ def _test_load_helper(self, xp, dtype, fft_func):
                 else:
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(cb_load=cb_load):
-                    out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_load=cb_load, cb_load_name=cb_load_name, cb_ver=self.cb_ver
+            ):
+                out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -525,6 +1014,7 @@ def _test_load_helper(self, xp, dtype, fft_func):
     def test_fftn_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "fftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -532,6 +1022,7 @@ def test_fftn_load(self, xp, dtype):
     def test_ifftn_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "ifftn")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -539,6 +1030,7 @@ def test_ifftn_load(self, xp, dtype):
     def test_rfftn_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "rfftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -547,23 +1039,61 @@ def test_irfftn_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "irfftn")
 
     def _test_store_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
         fft = getattr(xp.fft, fft_func)
         store_code = _store_callback
+
+        # for simplicity we use the JIT callback names for both legacy/jit
         if dtype == np.complex64:
             if fft_func != "irfftn":
-                types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
-            else:
-                types = ("x", "cufftReal", "cufftCallbackStoreR")
+                types = (
+                    "x.y",
+                    "cufftComplex",
+                    "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
+                )
+            else:  # float32 for irfftn
+                types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackStoreR",
+                    "cufftJITCallbackStoreReal",
+                )
         elif dtype == np.complex128:
             if fft_func != "irfftn":
-                types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ")
-            else:
-                types = ("x", "cufftDoubleReal", "cufftCallbackStoreD")
+                types = (
+                    "x.y",
+                    "cufftDoubleComplex",
+                    "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
+                )
+            else:  # float64 for irfftn
+                types = (
+                    "x",
+                    "cufftDoubleReal",
+                    "cufftCallbackStoreD",
+                    "cufftJITCallbackStoreDoubleReal",
+                )
         elif dtype == np.float32:
-            types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
+            types = (
+                "x.y",
+                "cufftComplex",
+                "cufftCallbackStoreC",
+                "cufftJITCallbackStoreComplex",
+            )
         elif dtype == np.float64:
-            types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ")
-        cb_store = _set_store_cb(store_code, *types)
+            types = (
+                "x.y",
+                "cufftDoubleComplex",
+                "cufftCallbackStoreZ",
+                "cufftJITCallbackStoreDoubleComplex",
+            )
+        cb_store = _set_store_cb(store_code, *types, cb_ver=self.cb_ver)
+        cb_store_name = types[3] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -577,12 +1107,16 @@ def _test_store_helper(self, xp, dtype, fft_func):
                 if dtype in (np.float32, np.complex64):
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(cb_store=cb_store):
-                    out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_store=cb_store,
+                cb_store_name=cb_store_name,
+                cb_ver=self.cb_ver,
+            ):
+                out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -590,6 +1124,7 @@ def _test_store_helper(self, xp, dtype, fft_func):
     def test_fftn_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "fftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -597,6 +1132,7 @@ def test_fftn_store(self, xp, dtype):
     def test_ifftn_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "ifftn")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -604,6 +1140,7 @@ def test_ifftn_store(self, xp, dtype):
     def test_rfftn_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "rfftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -612,40 +1149,101 @@ def test_irfftn_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "irfftn")
 
     def _test_load_store_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
         fft = getattr(xp.fft, fft_func)
         load_code = _load_callback
         store_code = _store_callback
+
+        # for simplicity we use the JIT callback names for both legacy/jit
         if fft_func in ("fftn", "ifftn"):
             if dtype == np.complex64:
-                load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC")
-                store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
-            else:
-                load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ")
+                load_types = (
+                    "x.x",
+                    "cufftComplex",
+                    "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
+                )
+                store_types = (
+                    "x.y",
+                    "cufftComplex",
+                    "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
+                )
+            else:  # complex128
+                load_types = (
+                    "x.x",
+                    "cufftDoubleComplex",
+                    "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
+                )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                 )
         elif fft_func == "rfftn":
             if dtype == np.float32:
-                load_types = ("x", "cufftReal", "cufftCallbackLoadR")
-                store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
-            else:
-                load_types = ("x", "cufftDoubleReal", "cufftCallbackLoadD")
+                load_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackLoadR",
+                    "cufftJITCallbackLoadReal",
+                )
+                store_types = (
+                    "x.y",
+                    "cufftComplex",
+                    "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
+                )
+            else:  # float64
+                load_types = (
+                    "x",
+                    "cufftDoubleReal",
+                    "cufftCallbackLoadD",
+                    "cufftJITCallbackLoadDoubleReal",
+                )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                 )
         else:  # irfft
             if dtype == np.complex64:
-                load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC")
-                store_types = ("x", "cufftReal", "cufftCallbackStoreR")
-            else:
-                load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ")
-                store_types = ("x", "cufftDoubleReal", "cufftCallbackStoreD")
-        cb_load = _set_load_cb(load_code, *load_types)
-        cb_store = _set_store_cb(store_code, *store_types)
+                load_types = (
+                    "x.x",
+                    "cufftComplex",
+                    "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
+                )
+                store_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackStoreR",
+                    "cufftJITCallbackStoreReal",
+                )
+            else:  # complex128
+                load_types = (
+                    "x.x",
+                    "cufftDoubleComplex",
+                    "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
+                )
+                store_types = (
+                    "x",
+                    "cufftDoubleReal",
+                    "cufftCallbackStoreD",
+                    "cufftJITCallbackStoreDoubleReal",
+                )
+        cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver)
+        cb_load_name = load_types[3] if self.cb_ver == "jit" else None
+        cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver)
+        cb_store_name = store_types[3] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -660,14 +1258,18 @@ def _test_load_store_helper(self, xp, dtype, fft_func):
                 if dtype in (np.float32, np.complex64):
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(
-                    cb_load=cb_load, cb_store=cb_store
-                ):
-                    out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_load=cb_load,
+                cb_load_name=cb_load_name,
+                cb_store=cb_store,
+                cb_store_name=cb_store_name,
+                cb_ver=self.cb_ver,
+            ):
+                out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -675,6 +1277,7 @@ def _test_load_store_helper(self, xp, dtype, fft_func):
     def test_fftn_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "fftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -682,6 +1285,7 @@ def test_fftn_load_store(self, xp, dtype):
     def test_ifftn_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "ifftn")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -689,6 +1293,7 @@ def test_ifftn_load_store(self, xp, dtype):
     def test_rfftn_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "rfftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -697,6 +1302,11 @@ def test_irfftn_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "irfftn")
 
     def _test_load_store_aux_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
         fft = getattr(xp.fft, fft_func)
         load_code = _load_callback_with_aux
         store_code = _store_callback_with_aux
@@ -704,53 +1314,67 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func):
             load_aux = xp.asarray(2.5, dtype=xp.dtype(dtype).char.lower())
             store_aux = xp.asarray(3.8, dtype=xp.dtype(dtype).char.lower())
 
+        # for simplicity we use the JIT callback names for both legacy/jit
         if fft_func in ("fftn", "ifftn"):
             if dtype == np.complex64:
                 load_types = (
                     "x.x",
                     "cufftComplex",
                     "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
                     "float",
                 )
                 store_types = (
                     "x.y",
                     "cufftComplex",
                     "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
                     "float",
                 )
-            else:
+            else:  # complex128
                 load_types = (
                     "x.x",
                     "cufftDoubleComplex",
                     "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
                     "double",
                 )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                     "double",
                 )
         elif fft_func == "rfftn":
             if dtype == np.float32:
-                load_types = ("x", "cufftReal", "cufftCallbackLoadR", "float")
+                load_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackLoadR",
+                    "cufftJITCallbackLoadReal",
+                    "float",
+                )
                 store_types = (
                     "x.y",
                     "cufftComplex",
                     "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
                     "float",
                 )
-            else:
+            else:  # float64
                 load_types = (
                     "x",
                     "cufftDoubleReal",
                     "cufftCallbackLoadD",
+                    "cufftJITCallbackLoadDoubleReal",
                     "double",
                 )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                     "double",
                 )
         else:  # irfftn
@@ -759,24 +1383,35 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func):
                     "x.x",
                     "cufftComplex",
                     "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
                     "float",
                 )
-                store_types = ("x", "cufftReal", "cufftCallbackStoreR", "float")
-            else:
+                store_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackStoreR",
+                    "cufftJITCallbackStoreReal",
+                    "float",
+                )
+            else:  # complex128
                 load_types = (
                     "x.x",
                     "cufftDoubleComplex",
                     "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
                     "double",
                 )
                 store_types = (
                     "x",
                     "cufftDoubleReal",
                     "cufftCallbackStoreD",
+                    "cufftJITCallbackStoreDoubleReal",
                     "double",
                 )
-        cb_load = _set_load_cb(load_code, *load_types)
-        cb_store = _set_store_cb(store_code, *store_types)
+        cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver)
+        cb_load_name = load_types[3] if self.cb_ver == "jit" else None
+        cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver)
+        cb_store_name = store_types[3] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -791,17 +1426,20 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func):
                 if dtype in (np.float32, np.complex64):
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(
-                    cb_load=cb_load,
-                    cb_store=cb_store,
-                    cb_load_aux_arr=load_aux,
-                    cb_store_aux_arr=store_aux,
-                ):
-                    out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_load=cb_load,
+                cb_load_name=cb_load_name,
+                cb_store=cb_store,
+                cb_store_name=cb_store_name,
+                cb_load_data=load_aux.data,
+                cb_store_data=store_aux.data,
+                cb_ver=self.cb_ver,
+            ):
+                out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -809,6 +1447,7 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func):
     def test_fftn_load_store_aux(self, xp, dtype):
         return self._test_load_store_aux_helper(xp, dtype, "fftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -816,6 +1455,7 @@ def test_fftn_load_store_aux(self, xp, dtype):
     def test_ifftn_load_store_aux(self, xp, dtype):
         return self._test_load_store_aux_helper(xp, dtype, "ifftn")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -823,6 +1463,7 @@ def test_ifftn_load_store_aux(self, xp, dtype):
     def test_rfftn_load_store_aux(self, xp, dtype):
         return self._test_load_store_aux_helper(xp, dtype, "rfftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_fft.py b/dpnp/tests/third_party/cupy/fft_tests/test_fft.py
index 534b474363f1..369409ba001c 100644
--- a/dpnp/tests/third_party/cupy/fft_tests/test_fft.py
+++ b/dpnp/tests/third_party/cupy/fft_tests/test_fft.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import functools
 import warnings
 
@@ -6,6 +8,14 @@
 
 import dpnp as cupy
 from dpnp.tests.helper import has_support_aspect64
+
+# from cupy.fft import config
+# from cupy.fft._fft import (
+#     _default_fft_func,
+#     _fft,
+#     _fftn,
+#     _size_last_transform_axis,
+# )
 from dpnp.tests.third_party.cupy import testing
 from dpnp.tests.third_party.cupy.testing._loops import _wraps_partial
 
@@ -36,12 +46,16 @@ def decorator(impl):
         @_wraps_partial(impl, name)
         def test_func(self, *args, **kw):
             # get original global planning state
-            # planning_state = config.enable_nd_planning
+            # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+            #     planning_state = config.enable_nd_planning
             try:
                 for nd_planning in states:
                     try:
                         # enable or disable nd planning
-                        # config.enable_nd_planning = nd_planning
+                        # with pytest.warns(
+                        #     DeprecationWarning, match="enable_nd_planning"
+                        # ):
+                        #     config.enable_nd_planning = nd_planning
 
                         kw[name] = nd_planning
                         impl(self, *args, **kw)
@@ -50,7 +64,10 @@ def test_func(self, *args, **kw):
                         raise
             finally:
                 # restore original global planning state
-                # config.enable_nd_planning = planning_state
+                # with pytest.warns(
+                #     DeprecationWarning, match="enable_nd_planning"
+                # ):
+                #     config.enable_nd_planning = planning_state
                 pass
 
         return test_func
@@ -71,8 +88,8 @@ def multi_gpu_config(gpu_configs=None):
     def decorator(impl):
         @functools.wraps(impl)
         def test_func(self, *args, **kw):
-            use_multi_gpus = config.use_multi_gpus
-            _devices = config._devices
+            use_multi_gpus = config._use_multi_gpus.get()
+            _devices = config._devices.get()
 
             try:
                 for gpus in gpu_configs:
@@ -81,23 +98,21 @@ def test_func(self, *args, **kw):
                         assert nGPUs >= 2, "Must use at least two gpus"
                         config.use_multi_gpus = True
                         config.set_cufft_gpus(gpus)
-                        self.gpus = gpus
 
                         impl(self, *args, **kw)
                     except Exception:
                         print("GPU config is:", gpus)
                         raise
             finally:
-                config.use_multi_gpus = use_multi_gpus
-                config._devices = _devices
-                del self.gpus
+                config._use_multi_gpus.set(use_multi_gpus)
+                config._devices.set(_devices)
 
         return test_func
 
     return decorator
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @pytest.mark.usefixtures("skip_forward_backward")
 @testing.parameterize(
     *testing.product(
@@ -138,7 +153,7 @@ def test_ifft(self, xp, dtype):
         return xp.fft.ifft(a, n=self.n, norm=self.norm)
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @testing.parameterize(
     *testing.product(
         {
@@ -179,19 +194,6 @@ def test_ifft(self, xp, dtype):
         return xp.fft.ifft(a, axis=self.axis)
 
 
-# See #3757 and NVIDIA internal ticket 3093094
-def _skip_multi_gpu_bug(shape, gpus):
-    # avoid CUDA 11.0 (will be fixed by CUDA 11.2) bug triggered by
-    # - batch = 1
-    # - gpus = [1, 0]
-    if (
-        11000 <= cupy.cuda.runtime.runtimeGetVersion() < 11020
-        and len(shape) == 1
-        and gpus == [1, 0]
-    ):
-        pytest.skip("avoid CUDA 11 bug")
-
-
 # Almost identical to the TestFft class, except that
 # 1. multi-GPU cuFFT is used
 # 2. the tested parameter combinations are adjusted to meet the requirements
@@ -208,6 +210,9 @@ def _skip_multi_gpu_bug(shape, gpus):
 )
 @pytest.mark.skip("multi GPU is not supported")
 @testing.multi_gpu(2)
+# @pytest.mark.skipif(
+#     cupy.cuda.runtime.is_hip, reason="hipFFT does not support multi-GPU FFT"
+# )
 class TestMultiGpuFft:
 
     @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]])
@@ -216,8 +221,6 @@ class TestMultiGpuFft:
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_fft(self, xp, dtype):
-        _skip_multi_gpu_bug(self.shape, self.gpus)
-
         a = testing.shaped_random(self.shape, xp, dtype)
         return xp.fft.fft(a, n=self.n, norm=self.norm)
 
@@ -230,8 +233,6 @@ def test_fft(self, xp, dtype):
     @testing.with_requires("numpy!=1.17.0")
     @testing.with_requires("numpy!=1.17.1")
     def test_ifft(self, xp, dtype):
-        _skip_multi_gpu_bug(self.shape, self.gpus)
-
         a = testing.shaped_random(self.shape, xp, dtype)
         return xp.fft.ifft(a, n=self.n, norm=self.norm)
 
@@ -251,6 +252,9 @@ def test_ifft(self, xp, dtype):
 )
 @pytest.mark.skip("multi GPU is not supported")
 @testing.multi_gpu(2)
+# @pytest.mark.skipif(
+#     cupy.cuda.runtime.is_hip, reason="hipFFT does not support multi-GPU FFT"
+# )
 class TestMultiGpuFftOrder:
     @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]])
     @testing.for_complex_dtypes()
@@ -258,8 +262,6 @@ class TestMultiGpuFftOrder:
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_fft(self, xp, dtype):
-        _skip_multi_gpu_bug(self.shape, self.gpus)
-
         a = testing.shaped_random(self.shape, xp, dtype)
         if self.data_order == "F":
             a = xp.asfortranarray(a)
@@ -271,8 +273,6 @@ def test_fft(self, xp, dtype):
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_ifft(self, xp, dtype):
-        _skip_multi_gpu_bug(self.shape, self.gpus)
-
         a = testing.shaped_random(self.shape, xp, dtype)
         if self.data_order == "F":
             a = xp.asfortranarray(a)
@@ -348,9 +348,13 @@ def test_default_fft_func(self, enable_nd):
 
 @pytest.mark.skip("memory management is not supported")
 @testing.with_requires("numpy>=2.0")
+# @pytest.mark.skipif(
+#     10010 <= cupy.cuda.runtime.runtimeGetVersion() <= 11010,
+#     reason="avoid a cuFFT bug (cupy/cupy#3777)",
+# )
 @testing.slow
 class TestFftAllocate:
-
+    # @pytest.mark.thread_unsafe(reason="does large allocations")
     def test_fft_allocate(self):
         # Check CuFFTError is not raised when the GPU memory is enough.
         # See https://github.com/cupy/cupy/issues/1063
@@ -368,7 +372,7 @@ def test_fft_allocate(self):
         cupy.fft.config.clear_plan_cache()
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @pytest.mark.usefixtures("skip_forward_backward")
 @testing.parameterize(
     *(
@@ -413,7 +417,8 @@ class TestFft2:
         type_check=has_support_aspect64(),
     )
     def test_fft2(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
@@ -440,7 +445,8 @@ def test_fft2(self, xp, dtype, order, enable_nd):
         type_check=has_support_aspect64(),
     )
     def test_ifft2(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
@@ -457,7 +463,7 @@ def test_ifft2(self, xp, dtype, order, enable_nd):
         return out
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @pytest.mark.usefixtures("skip_forward_backward")
 @testing.parameterize(
     *(
@@ -503,7 +509,8 @@ class TestFftn:
         type_check=has_support_aspect64(),
     )
     def test_fftn(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
@@ -530,7 +537,8 @@ def test_fftn(self, xp, dtype, order, enable_nd):
         type_check=has_support_aspect64(),
     )
     def test_ifftn(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
@@ -595,7 +603,8 @@ def skip_buggy(self):
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_fftn(self, xp, dtype, enable_nd):
-        assert config.enable_nd_planning == enable_nd
+        with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+            assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
 
         if xp is np:
@@ -613,7 +622,8 @@ def test_fftn(self, xp, dtype, enable_nd):
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_ifftn(self, xp, dtype, enable_nd):
-        assert config.enable_nd_planning == enable_nd
+        with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+            assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
 
         if xp is np:
@@ -635,7 +645,8 @@ def test_fftn_error_on_wrong_plan(self, dtype, enable_nd):
         from cupy.fft import fftn
         from cupyx.scipy.fftpack import get_fft_plan
 
-        assert config.enable_nd_planning == enable_nd
+        with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+            assert config.enable_nd_planning == enable_nd
 
         # can't get a plan, so skip
         if self.axes is not None:
@@ -747,6 +758,9 @@ def test_fft_error_on_wrong_plan(self, dtype):
 )
 @pytest.mark.skip("get_fft_plan() is not supported")
 @testing.multi_gpu(2)
+# @pytest.mark.skipif(
+#     cupy.cuda.runtime.is_hip, reason="hipFFT does not support multi-GPU FFT"
+# )
 class TestMultiGpuPlanCtxManagerFft:
 
     @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]])
@@ -755,8 +769,6 @@ class TestMultiGpuPlanCtxManagerFft:
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_fft(self, xp, dtype):
-        _skip_multi_gpu_bug(self.shape, self.gpus)
-
         a = testing.shaped_random(self.shape, xp, dtype)
 
         if xp is np:
@@ -776,8 +788,6 @@ def test_fft(self, xp, dtype):
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_ifft(self, xp, dtype):
-        _skip_multi_gpu_bug(self.shape, self.gpus)
-
         a = testing.shaped_random(self.shape, xp, dtype)
 
         if xp is np:
@@ -1029,7 +1039,8 @@ class TestRfft2:
         type_check=has_support_aspect64(),
     )
     def test_rfft2(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
@@ -1046,14 +1057,21 @@ def test_rfft2(self, xp, dtype, order, enable_nd):
         type_check=has_support_aspect64(),
     )
     def test_irfft2(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
+        # if (
+        #     10020 >= cupy.cuda.runtime.runtimeGetVersion() >= 10010
+        #     and int(cupy.cuda.device.get_compute_capability()) < 70
+        #     and _size_last_transform_axis(self.shape, self.s, self.axes) == 2
+        # ):
+        #     pytest.skip("work-around for cuFFT issue")
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
         return xp.fft.irfft2(a, s=self.s, axes=self.axes, norm=self.norm)
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @testing.parameterize(
     {"shape": (3, 4), "s": None, "axes": (), "norm": None},
     {"shape": (2, 3, 4), "s": None, "axes": (), "norm": None},
@@ -1115,7 +1133,8 @@ class TestRfftn:
         type_check=has_support_aspect64(),
     )
     def test_rfftn(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
@@ -1132,7 +1151,14 @@ def test_rfftn(self, xp, dtype, order, enable_nd):
         type_check=has_support_aspect64(),
     )
     def test_irfftn(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
+        # if (
+        #     10020 >= cupy.cuda.runtime.runtimeGetVersion() >= 10010
+        #     and int(cupy.cuda.device.get_compute_capability()) < 70
+        #     and _size_last_transform_axis(self.shape, self.s, self.axes) == 2
+        # ):
+        #     pytest.skip("work-around for cuFFT issue")
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
@@ -1182,7 +1208,8 @@ def skip_buggy(self):
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_rfftn(self, xp, dtype, enable_nd):
-        assert config.enable_nd_planning == enable_nd
+        with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+            assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
 
         if xp is np:
@@ -1194,13 +1221,17 @@ def test_rfftn(self, xp, dtype, enable_nd):
         with plan:
             return xp.fft.rfftn(a, s=self.s, axes=self.axes, norm=self.norm)
 
+    # @pytest.mark.skipif(
+    #     cupy.cuda.runtime.is_hip, reason="hipFFT's PlanNd for C2R is buggy"
+    # )
     @nd_planning_states()
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_irfftn(self, xp, dtype, enable_nd):
-        assert config.enable_nd_planning == enable_nd
+        with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+            assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
             return xp.fft.irfftn(a, s=self.s, axes=self.axes, norm=self.norm)
@@ -1282,7 +1313,7 @@ def test_ifftn_orders(self, dtype, enable_nd):
                 pass
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @testing.parameterize(
     {"shape": (3, 4), "s": None, "axes": (), "norm": None},
     {"shape": (2, 3, 4), "s": None, "axes": (), "norm": None},
@@ -1343,7 +1374,7 @@ def test_ihfft(self, xp, dtype):
         return xp.fft.ihfft(a, n=self.n, norm=self.norm)
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @testing.parameterize(
     {"n": 1, "d": 1},
     {"n": 10, "d": 0.5},
@@ -1372,7 +1403,7 @@ def test_rfftfreq(self, xp, dtype):
         return xp.fft.rfftfreq(self.n, self.d)
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @testing.parameterize(
     {"shape": (5,), "axes": None},
     {"shape": (5,), "axes": 0},
diff --git a/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py b/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py
index 5ce72bd806b8..c16a9e91f9dd 100644
--- a/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py
+++ b/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import unittest
 
 import numpy
diff --git a/dpnp/tests/third_party/cupy/math_tests/test_rounding.py b/dpnp/tests/third_party/cupy/math_tests/test_rounding.py
index a2ad717f2500..10e79715dd10 100644
--- a/dpnp/tests/third_party/cupy/math_tests/test_rounding.py
+++ b/dpnp/tests/third_party/cupy/math_tests/test_rounding.py
@@ -66,6 +66,7 @@ def test_trunc(self):
         self.check_unary("trunc")
         self.check_unary_complex_unsupported("trunc")
 
+    @pytest.mark.filterwarnings("ignore::DeprecationWarning")
     @testing.with_requires("numpy>=2.1")
     def test_fix(self):
         self.check_unary("fix")
diff --git a/dpnp/tests/third_party/cupy/random_tests/test_generator.py b/dpnp/tests/third_party/cupy/random_tests/test_generator.py
index 9d1a3d233473..abb58df07af9 100644
--- a/dpnp/tests/third_party/cupy/random_tests/test_generator.py
+++ b/dpnp/tests/third_party/cupy/random_tests/test_generator.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import functools
 import os
 import threading
@@ -850,6 +852,7 @@ def test_goodness_of_fit(self):
         assert _hypothesis.chi_square_test(counts, expected)
 
     @_condition.repeat(3, 10)
+    # @pytest.mark.xfail(runtime.is_hip, reason="ROCm/HIP may have a bug")
     def test_goodness_of_fit_2(self):
         vals = self.generate(3, (5, 20), True, [0.3, 0.3, 0.4]).get()
         counts = numpy.histogram(vals, bins=numpy.arange(4))[0]
@@ -929,6 +932,163 @@ def test_bound(self):
         assert numpy.unique(val).size == val.size
 
 
+@testing.parameterize(
+    # Edge cases with small domain sizes
+    {"a": 0, "size": 0},
+    {"a": 1, "size": 1},
+    {"a": 2, "size": 1},
+    {"a": 256, "size": 100},  # Minimum cipher bits threshold
+    {"a": 257, "size": 100},
+    # large scalare uniqueness
+    {"a": 100, "size": 50},
+    {"a": 1000, "size": 500},
+    {"a": 10000, "size": 5000},
+    {"a": 100000, "size": 50000},
+    # full inpupt permutation
+    {"a": 10, "size": 10},
+    {"a": 100, "size": 100},
+    {"a": 1000, "size": 1000},
+    # Power of 2
+    {"a": 2**8, "size": 100},
+    {"a": 2**10, "size": 500},
+    {"a": 2**16, "size": 1000},
+    {"a": 2**20, "size": 5000},
+    {"a": 2**24, "size": 10000},
+    # Just below power of 2
+    {"a": 2**8 - 1, "size": 100},
+    {"a": 2**16 - 1, "size": 1000},
+    {"a": 2**20 - 1, "size": 5000},
+    # Just above power of 2
+    {"a": 2**8 + 1, "size": 100},
+    {"a": 2**16 + 1, "size": 1000},
+    {"a": 2**20 + 1, "size": 5000},
+    # Test multi-dimensional shapes.
+    {"a": 6, "size": (2, 3)},
+    {"a": 32, "size": (4, 5)},
+    {"a": 120, "size": (5, 4, 5)},
+)
+@testing.fix_random()
+class TestChoiceReplaceFalseLargeScale(RandomGeneratorTestCase):
+    """Test large-scale uniqueness for Feistel bijection implementation."""
+
+    target_method = "choice"
+
+    def test_uniqueness_and_bounds(self):
+        """Test that samples have no duplicates and correct bounds."""
+        val = self.generate(a=self.a, size=self.size, replace=False).get()
+        size = self.size if isinstance(self.size, tuple) else (self.size,)
+
+        # Check shape
+        assert val.shape == size
+
+        # Check bounds
+        assert (0 <= val).all()
+        assert (val < self.a).all()
+
+        # Check uniqueness
+        val_flat = numpy.asarray(val).flatten()
+        assert (
+            numpy.unique(val_flat).size == val_flat.size
+        ), "Found duplicate values in replace=False sample"
+
+
+@testing.fix_random()
+class TestChoiceReplaceFalseStatistical(RandomGeneratorTestCase):
+    """Statistical tests for uniformity of Feistel bijection."""
+
+    target_method = "choice"
+
+    @_condition.repeat(3)
+    def test_small_domain_uniformity(self):
+        """Chi-square test for uniform sampling in small domain."""
+        # Sample from domain of size 10, taking 5 elements
+        # Repeat many times and check each index appears uniformly
+        n = 10
+        sample_size = 5
+        n_trials = 1000
+
+        counts = cupy.zeros(n, dtype=int)
+        vals = self.generate_many(
+            n, size=sample_size, replace=False, _count=n_trials
+        )
+        for val in vals:
+            counts[val] += 1
+        counts = counts.get()
+
+        # Each index should appear ~500 times (5/10 * 1000)
+        expected = numpy.ones(n, dtype=int) * (sample_size * n_trials // n)
+        assert _hypothesis.chi_square_test(counts, expected)
+
+    @_condition.repeat(3, 10)
+    def test_permutation_variability(self):
+        """Test that repeated full permutations are different."""
+        n = 20
+        n_trials = 10
+
+        vals = self.generate_many(n, size=n, replace=False, _count=n_trials)
+        perms = cupy.vstack(vals)
+
+        # Should have multiple unique permutations
+        unique_perms = cupy.unique(perms, axis=0)
+        assert (
+            len(unique_perms) == n_trials
+        ), "Permutations should vary across multiple calls"
+
+
+@testing.slow
+@testing.fix_random()
+class TestChoiceReplaceFalseVeryLargeDomain(unittest.TestCase):
+    """Test memory efficiency with very large domains."""
+
+    def setUp(self):
+        self.rs = _generator.RandomState(seed=testing.generate_seed())
+
+    def test_large_domain_memory_efficiency(self):
+        """Test that very large domains don't allocate full arrays."""
+        # This should NOT allocate a 2^30 element array
+        # If it did, it would require ~8GB of memory
+        a = 2**30
+        size = 1000
+
+        val = self.rs.choice(a=a, size=size, replace=False).get()
+
+        # Check bounds
+        assert (0 <= val).all()
+        assert (val < a).all()
+
+        # Check uniqueness
+        assert numpy.unique(val).size == size
+
+    def test_near_32bit_limit(self):
+        """Test at the 32-bit boundary."""
+        # Current implementation supports up to 2^32
+        a = 2**31
+        size = 500
+
+        val = self.rs.choice(a=a, size=size, replace=False).get()
+
+        # Check bounds
+        assert (0 <= val).all()
+        assert (val < a).all()
+
+        # Check uniqueness
+        assert numpy.unique(val).size == size
+
+
+@testing.fix_random()
+class TestChoiceReplaceFalseDtypeConsistency(RandomGeneratorTestCase):
+    """Test output dtype consistency."""
+
+    target_method = "choice"
+
+    def test_integer_input_dtype(self):
+        """Integer input should produce int64/long dtype."""
+        val = self.generate(a=100, size=50, replace=False)
+
+        # Should be 'l' (long) dtype, which is int64 on most platforms
+        assert val.dtype == numpy.dtype("l") or val.dtype == numpy.int64
+
+
 @testing.fix_random()
 class TestGumbel(RandomGeneratorTestCase):
 
diff --git a/dpnp/tests/third_party/cupy/test_init.py b/dpnp/tests/third_party/cupy/test_init.py
index 0a841ba28b21..a4f1ad78f7f5 100644
--- a/dpnp/tests/third_party/cupy/test_init.py
+++ b/dpnp/tests/third_party/cupy/test_init.py
@@ -36,11 +36,9 @@ def _run_script(code):
 
 
 def _test_cupy_available(self):
-    returncode, stdoutdata, stderrdata = _run_script(
-        """
+    returncode, stdoutdata, stderrdata = _run_script("""
 import dpnp as cupy
-print(cupy.is_available())"""
-    )
+print(cupy.is_available())""")
     assert returncode == 0, "stderr: {!r}".format(stderrdata)
     assert stdoutdata in (b"True\n", b"True\r\n", b"False\n", b"False\r\n")
     return stdoutdata == b"True\n" or stdoutdata == b"True\r\n"
@@ -49,14 +47,12 @@ def _test_cupy_available(self):
 class TestImportError(unittest.TestCase):
 
     def test_import_error(self):
-        returncode, stdoutdata, stderrdata = _run_script(
-            """
+        returncode, stdoutdata, stderrdata = _run_script("""
 try:
     import dpnp as cupy
 except Exception as e:
     print(type(e).__name__)
-"""
-        )
+""")
         assert returncode == 0, "stderr: {!r}".format(stderrdata)
         assert stdoutdata in (b"", b"RuntimeError\n")
 
diff --git a/dpnp/tests/third_party/cupy/testing/_loops.py b/dpnp/tests/third_party/cupy/testing/_loops.py
index 6a383780b9ca..63cd09147c4b 100644
--- a/dpnp/tests/third_party/cupy/testing/_loops.py
+++ b/dpnp/tests/third_party/cupy/testing/_loops.py
@@ -307,7 +307,7 @@ def decorator(impl):
         @_wraps_partial_xp(impl, name, sp_name, scipy_name)
         def test_func(*args, **kw):
             # Run cupy and numpy
-            (cupy_result, cupy_error, numpy_result, numpy_error) = (
+            cupy_result, cupy_error, numpy_result, numpy_error = (
                 _call_func_numpy_cupy(impl, args, kw, name, sp_name, scipy_name)
             )
             assert cupy_result is not None or cupy_error is not None
@@ -344,9 +344,7 @@ def test_func(*args, **kw):
                         raise AssertionError(
                             """ndarrays of different dtypes are returned.
 cupy: {}
-numpy: {}""".format(
-                                cupy_r.dtype, numpy_r.dtype
-                            )
+numpy: {}""".format(cupy_r.dtype, numpy_r.dtype)
                         )
 
             # Check contiguous
@@ -902,7 +900,7 @@ def decorator(impl):
         @_wraps_partial_xp(impl, name, sp_name, scipy_name)
         def test_func(*args, **kw):
             # Run cupy and numpy
-            (cupy_result, cupy_error, numpy_result, numpy_error) = (
+            cupy_result, cupy_error, numpy_result, numpy_error = (
                 _call_func_numpy_cupy(impl, args, kw, name, sp_name, scipy_name)
             )
 
@@ -958,7 +956,7 @@ def decorator(impl):
         @_wraps_partial_xp(impl, name, sp_name, scipy_name)
         def test_func(*args, **kw):
             # Run cupy and numpy
-            (cupy_result, cupy_error, numpy_result, numpy_error) = (
+            cupy_result, cupy_error, numpy_result, numpy_error = (
                 _call_func_numpy_cupy(impl, args, kw, name, sp_name, scipy_name)
             )
 
diff --git a/dpnp/tests/third_party/cupy/testing/_pytest_impl.py b/dpnp/tests/third_party/cupy/testing/_pytest_impl.py
index 97dfee5e174a..ecc4a6338eb5 100644
--- a/dpnp/tests/third_party/cupy/testing/_pytest_impl.py
+++ b/dpnp/tests/third_party/cupy/testing/_pytest_impl.py
@@ -19,14 +19,10 @@ def is_available():
 
 def check_available(feature):
     if not is_available():
-        raise RuntimeError(
-            """\
+        raise RuntimeError("""\
 cupy.testing: {} is not available.
 
-Reason: {}: {}""".format(
-                feature, type(_error).__name__, _error
-            )
-        )
+Reason: {}: {}""".format(feature, type(_error).__name__, _error))
 
 
 if is_available():
diff --git a/environments/dpctl_pkg.txt b/environments/dpctl_pkg.txt
index 29fdceb21e82..9d585f1ec230 100644
--- a/environments/dpctl_pkg.txt
+++ b/environments/dpctl_pkg.txt
@@ -1,2 +1,2 @@
 --index-url https://pypi.anaconda.org/dppy/label/dev/simple
-dpctl>=0.21.0dev0
+dpctl>=0.22.0dev0
diff --git a/environments/dpctl_pkg.yml b/environments/dpctl_pkg.yml
index 6cea30d0e65a..16d295cdc09c 100644
--- a/environments/dpctl_pkg.yml
+++ b/environments/dpctl_pkg.yml
@@ -2,4 +2,4 @@ name: Install dpctl package
 channels:
   - dppy/label/dev
 dependencies:
-  - dpctl>=0.21.0dev0
+  - dpctl>=0.22.0dev0
diff --git a/examples/example1.py b/examples/example1.py
index fdcbfbac9249..4d84c43c686c 100644
--- a/examples/example1.py
+++ b/examples/example1.py
@@ -37,7 +37,6 @@
 
 """
 
-
 import time
 
 import dpctl
diff --git a/examples/example2.py b/examples/example2.py
index 853f879a7f11..eb856a2bed80 100644
--- a/examples/example2.py
+++ b/examples/example2.py
@@ -37,7 +37,6 @@
 
 """
 
-
 import time
 
 import numpy
diff --git a/examples/example4.py b/examples/example4.py
index 0149c22f7ced..ea70ef18d612 100644
--- a/examples/example4.py
+++ b/examples/example4.py
@@ -34,7 +34,6 @@
 
 """
 
-
 import numpy
 
 """
diff --git a/pyproject.toml b/pyproject.toml
index 6fbc23ac1b88..d659428877fc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,7 +50,7 @@ dependencies = [
   # "dpcpp-cpp-rt>=0.59.0",
   # "intel-cmplr-lib-rt>=0.59.0"
   # WARNING: use the latest dpctl dev version, otherwise stable w/f will fail
-  "dpctl>=0.21.0dev0",
+  "dpctl>=0.22.0dev0",
   "numpy>=1.26.0"
 ]
 description = "Data Parallel Extension for NumPy"
@@ -163,26 +163,6 @@ allow-wildcard-with-all = true
 [tool.pylint.miscellaneous]
 notes = ["FIXME", "XXX"]
 
-[tool.pytest.ini.options]
-addopts = [
-  "--junitxml=junit.xml",
-  "--ignore setup.py",
-  "--ignore run_test.py",
-  "--cov-report term-missing",
-  "--tb native",
-  "--strict",
-  "--durations=20",
-  "-q -ra"
-]
-minversion = "6.0"
-norecursedirs = [
-  ".*",
-  "*.egg*",
-  "build",
-  "dist",
-  "conda-recipe"
-]
-
 [tool.versioneer]
 VCS = "git"
 parentdir_prefix = "dpnp-"
diff --git a/scripts/_build_helper.py b/scripts/_build_helper.py
new file mode 100644
index 000000000000..f9e303454848
--- /dev/null
+++ b/scripts/_build_helper.py
@@ -0,0 +1,249 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+#   may be used to endorse or promote products derived from this software
+#   without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+import os
+import shutil
+import subprocess
+import sys
+import warnings
+
+
+def get_dpctl_cmake_dir():
+    """
+    If dpctl is locally built using `script/build_locally.py`, it is needed
+    to pass the -DDpctl_ROOT=$(python -m dpctl --cmakedir) during the build.
+    If dpctl is conda installed, it is optional to pass this parameter.
+
+    """
+
+    process = subprocess.Popen(
+        [sys.executable, "-m", "dpctl", "--cmakedir"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    output, error = process.communicate()
+    if process.returncode == 0:
+        return output.decode("utf-8").strip()
+
+    raise RuntimeError(
+        "Failed to retrieve dpctl cmake directory: "
+        + error.decode("utf-8").strip()
+    )
+
+
+def resolve_compilers(
+    oneapi: bool,
+    c_compiler: str,
+    cxx_compiler: str,
+    compiler_root: str,
+):
+    is_linux = "linux" in sys.platform
+
+    if oneapi or (
+        c_compiler is None and cxx_compiler is None and compiler_root is None
+    ):
+        return "icx", ("icpx" if is_linux else "icx")
+
+    if (
+        (c_compiler is None or not os.path.isabs(c_compiler))
+        and (cxx_compiler is None or not os.path.isabs(cxx_compiler))
+        and (not compiler_root or not os.path.exists(compiler_root))
+    ):
+        raise RuntimeError(
+            "--compiler-root option must be set when using non-default DPC++ "
+            "layout unless absolute paths are provided for both compilers"
+        )
+
+    # default values
+    if c_compiler is None:
+        c_compiler = "icx"
+    if cxx_compiler is None:
+        cxx_compiler = "icpx" if is_linux else "icx"
+
+    compiler_paths = []
+    for name, opt_name in (
+        (c_compiler, "--c-compiler"),
+        (cxx_compiler, "--cxx-compiler"),
+    ):
+        if os.path.isabs(name):
+            path = name
+        else:
+            path = os.path.join(compiler_root, name)
+
+        if not os.path.exists(path):
+            raise RuntimeError(
+                f"{opt_name} value {name} not found and {path} not exist"
+            )
+
+        compiler_paths.append(path)
+    return tuple(compiler_paths)
+
+
+def resolve_onemath(
+    onemath: bool,
+    onemath_dir: str,
+    target_cuda: str = None,
+    target_hip: str = None,
+    onemkl_interfaces: bool = False,
+    onemkl_interfaces_dir: str = None,
+):
+    # always enable build with oneMath i/f when oneMath path is passed
+    if onemath_dir:
+        onemath = True
+
+    # always enable build with oneMath i/f for CUDA or HIP target
+    if target_cuda or target_hip:
+        onemath = True
+
+    # TODO: onemkl_interfaces and onemkl_interfaces_dir are deprecated in
+    # dpnp-0.19.0 and should be removed in dpnp-0.20.0.
+    if onemkl_interfaces:
+        warnings.warn(
+            "Using 'onemkl_interfaces' is deprecated. Please use 'onemath' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        onemath = True
+    if onemkl_interfaces_dir is not None:
+        warnings.warn(
+            "Using 'onemkl_interfaces_dir' is deprecated. Please use 'onemath_dir' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        onemath_dir = onemkl_interfaces_dir
+    return onemath, onemath_dir
+
+
+def run(cmd: list[str], env: dict[str, str] = None, cwd: str = None):
+    print("+", " ".join(cmd))
+    subprocess.check_call(
+        cmd, env=env or os.environ.copy(), cwd=cwd or os.getcwd()
+    )
+
+
+def capture_cmd_output(cmd: list[str], cwd: str = None):
+    print("+", " ".join(cmd))
+    return (
+        subprocess.check_output(cmd, cwd=cwd or os.getcwd())
+        .decode("utf-8")
+        .strip("\n")
+    )
+
+
+def err(msg: str, script: str):
+    raise RuntimeError(f"[{script}] error: {msg}")
+
+
+def log_cmake_args(cmake_args: list[str], script: str):
+    print(f"[{script}] Using CMake args:\n{' '.join(cmake_args)}")
+
+
+def make_cmake_args(
+    c_compiler: str = None,
+    cxx_compiler: str = None,
+    dpctl_cmake_dir: str = None,
+    onemath: bool = False,
+    onemath_dir: str = None,
+    verbose: bool = False,
+    other_opts: str = None,
+):
+    args = [
+        f"-DCMAKE_C_COMPILER:PATH={c_compiler}" if c_compiler else "",
+        f"-DCMAKE_CXX_COMPILER:PATH={cxx_compiler}" if cxx_compiler else "",
+        f"-DDpctl_ROOT={dpctl_cmake_dir}" if dpctl_cmake_dir else "",
+    ]
+
+    if onemath:
+        args.append("-DDPNP_USE_ONEMATH=ON")
+        if onemath_dir:
+            args.append(f"-DDPNP_ONEMATH_DIR={onemath_dir}")
+
+    if verbose:
+        args.append("-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON")
+    if other_opts:
+        args.extend(other_opts.split())
+
+    return args
+
+
+def build_extension(
+    setup_dir: str,
+    env: dict[str, str],
+    cmake_args: list[str],
+    cmake_executable: str = None,
+    generator: str = None,
+    build_type: str = None,
+):
+    cmd = [sys.executable, "setup.py", "build_ext", "--inplace"]
+    if cmake_executable:
+        cmd.append(f"--cmake-executable={cmake_executable}")
+    if generator:
+        cmd.append(f"--generator={generator}")
+    if build_type:
+        cmd.append(f"--build-type={build_type}")
+    if cmake_args:
+        cmd.append("--")
+        cmd += cmake_args
+    run(
+        cmd,
+        env=env,
+        cwd=setup_dir,
+    )
+
+
+def install_editable(setup_dir: str, env: dict[str, str]):
+    run(
+        [
+            sys.executable,
+            "-m",
+            "pip",
+            "install",
+            "-e",
+            ".",
+            "--no-build-isolation",
+        ],
+        env=env,
+        cwd=setup_dir,
+    )
+
+
+def clean_build_dir(setup_dir: str):
+    if (
+        not isinstance(setup_dir, str)
+        or not setup_dir
+        or not os.path.isdir(setup_dir)
+    ):
+        raise RuntimeError(f"Invalid setup directory provided: '{setup_dir}'")
+    target = os.path.join(setup_dir, "_skbuild")
+    if os.path.exists(target):
+        print(f"Cleaning build directory: {target}")
+        try:
+            shutil.rmtree(target)
+        except Exception as e:
+            print(f"Failed to remove build directory: '{target}'")
+            raise e
diff --git a/scripts/build_locally.py b/scripts/build_locally.py
index 1197de9d9455..e17fc8e478fe 100644
--- a/scripts/build_locally.py
+++ b/scripts/build_locally.py
@@ -26,290 +26,211 @@
 # THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
 
+import argparse
 import os
-import subprocess
 import sys
-import warnings
 
-warnings.simplefilter("default", DeprecationWarning)
-
-
-def run(
-    use_oneapi=True,
-    build_type="Release",
-    c_compiler=None,
-    cxx_compiler=None,
-    compiler_root=None,
-    cmake_executable=None,
-    verbose=False,
-    cmake_opts="",
-    target_cuda=None,
-    target_hip=None,
-    onemkl_interfaces=False,
-    onemkl_interfaces_dir=None,
-    onemath=False,
-    onemath_dir=None,
-):
-    build_system = None
-
-    if "linux" in sys.platform:
-        build_system = "Ninja"
-    elif sys.platform in ["win32", "cygwin"]:
-        build_system = "Ninja"
-    else:
-        raise AssertionError(sys.platform + " not supported")
-
-    setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    cmake_args = [
-        sys.executable,
-        "setup.py",
-        "develop",
-    ]
-    if cmake_executable:
-        cmake_args += [
-            "--cmake-executable=" + cmake_executable,
-        ]
-
-    # if dpctl is locally built using `script/build_locally.py`, it is needed
-    # to pass the -DDpctl_ROOT=$(python -m dpctl --cmakedir)
-    # if dpctl is conda installed, it is optional to pass this parameter
-    process = subprocess.Popen(
-        ["python", "-m", "dpctl", "--cmakedir"],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-    )
-    output, error = process.communicate()
-    if process.returncode == 0:
-        cmake_dir = output.decode("utf-8").strip()
-    else:
-        raise RuntimeError(
-            "Failed to retrieve dpctl cmake directory: "
-            + error.decode("utf-8").strip()
-        )
-
-    cmake_args += [
-        "--build-type=" + build_type,
-        "--generator=" + build_system,
-        "--",
-        "-DCMAKE_C_COMPILER:PATH=" + c_compiler,
-        "-DCMAKE_CXX_COMPILER:PATH=" + cxx_compiler,
-        "-DDpctl_ROOT=" + cmake_dir,
-    ]
-    if verbose:
-        cmake_args += [
-            "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON",
-        ]
-    if cmake_opts:
-        cmake_args += cmake_opts.split()
-    if use_oneapi:
-        if "DPL_ROOT" in os.environ:
-            os.environ["DPL_ROOT_HINT"] = os.environ["DPL_ROOT"]
-
-    # TODO: onemkl_interfaces and onemkl_interfaces_dir are deprecated in
-    # dpnp-0.19.0 and should be removed in dpnp-0.20.0.
-    if onemkl_interfaces:
-        warnings.warn(
-            "Using 'onemkl_interfaces' is deprecated. Please use 'onemath' instead.",
-            DeprecationWarning,
-            stacklevel=1,
-        )
-        onemath = True
-    if onemkl_interfaces_dir is not None:
-        warnings.warn(
-            "Using 'onemkl_interfaces_dir' is deprecated. Please use 'onemath_dir' instead.",
-            DeprecationWarning,
-            stacklevel=1,
-        )
-        onemath_dir = onemkl_interfaces_dir
-
-    if target_cuda is not None:
-        if not target_cuda.strip():
-            raise ValueError(
-                "--target-cuda can not be an empty string. "
-                "Use --target-cuda=<arch> or --target-cuda"
-            )
-        cmake_args += [
-            f"-DDPNP_TARGET_CUDA={target_cuda}",
-        ]
-        # Always builds using oneMath for the cuda target
-        onemath = True
-
-    if target_hip is not None:
-        if not target_hip.strip():
-            raise ValueError(
-                "--target-hip requires an architecture (e.g., gfx90a)"
-            )
-        cmake_args += [
-            f"-DHIP_TARGETS={target_hip}",
-        ]
-        # Always builds using oneMath for the hip target
-        onemath = True
-
-    if onemath:
-        cmake_args += [
-            "-DDPNP_USE_ONEMATH=ON",
-        ]
-
-        if onemath_dir:
-            cmake_args += [
-                f"-DDPNP_ONEMATH_DIR={onemath_dir}",
-            ]
-    elif onemath_dir:
-        raise RuntimeError("--onemath-dir option is not supported")
-
-    subprocess.check_call(
-        cmake_args, shell=False, cwd=setup_dir, env=os.environ
+from _build_helper import (
+    build_extension,
+    clean_build_dir,
+    err,
+    get_dpctl_cmake_dir,
+    install_editable,
+    log_cmake_args,
+    make_cmake_args,
+    resolve_compilers,
+    resolve_onemath,
+)
+
+
+def parse_args():
+    p = argparse.ArgumentParser(description="Local dpnp build driver")
+
+    # compiler and oneAPI relating options
+    p.add_argument(
+        "--c-compiler",
+        type=str,
+        default=None,
+        help="Path or name of C compiler",
     )
-
-
-if __name__ == "__main__":
-    import argparse
-
-    parser = argparse.ArgumentParser(
-        description="Driver to build dpnp for in-place installation"
+    p.add_argument(
+        "--cxx-compiler",
+        type=str,
+        default=None,
+        help="Path or name of C++ compiler",
     )
-    driver = parser.add_argument_group(title="Coverage driver arguments")
-    driver.add_argument("--c-compiler", help="Name of C compiler", default=None)
-    driver.add_argument(
-        "--cxx-compiler", help="Name of C++ compiler", default=None
+    p.add_argument(
+        "--compiler-root",
+        type=str,
+        default=None,
+        help="Path to compiler installation root",
     )
-    driver.add_argument(
+    p.add_argument(
         "--oneapi",
-        help="Set if using one-API installation",
         dest="oneapi",
         action="store_true",
+        help="Use default oneAPI compiler layout",
     )
-    driver.add_argument(
-        "--debug",
-        default="Release",
-        const="Debug",
-        action="store_const",
-        help="Set the compilation mode to debugging",
+
+    # CMake relating options
+    p.add_argument(
+        "--generator", type=str, default="Ninja", help="CMake generator"
     )
-    driver.add_argument(
-        "--compiler-root",
+    p.add_argument(
+        "--cmake-executable",
         type=str,
-        help="Path to compiler home directory",
         default=None,
+        help="Path to CMake executable used by build",
     )
-    driver.add_argument(
-        "--cmake-executable",
+    p.add_argument(
+        "--cmake-opts",
         type=str,
-        help="Path to cmake executable",
-        default=None,
+        default="",
+        help="Additional options to pass directly to CMake",
     )
-    driver.add_argument(
+    p.add_argument(
+        "--debug",
+        dest="build_type",
+        const="Debug",
+        action="store_const",
+        default="Release",
+        help="Set build type to Debug (defaults to Release)",
+    )
+    p.add_argument(
         "--verbose",
-        help="Build using vebose makefile mode",
         dest="verbose",
         action="store_true",
+        help="Enable verbose makefile output",
     )
-    driver.add_argument(
-        "--cmake-opts",
-        help="Channels through additional cmake options",
-        dest="cmake_opts",
-        default="",
-        type=str,
-    )
-    driver.add_argument(
+
+    # platform target relating options
+    p.add_argument(
         "--target-cuda",
         nargs="?",
         const="ON",
-        help="Enable CUDA target for build; "
-        "optionally specify architecture (e.g., --target-cuda=sm_80)",
         default=None,
-        type=str,
+        help="Enable CUDA build. Architecture is optional to specify (e.g., --target-cuda=sm_80).",
     )
-    driver.add_argument(
+    p.add_argument(
         "--target-hip",
         required=False,
-        help="Enable HIP target for build. "
-        "Must specify HIP architecture (e.g., --target-hip=gfx90a)",
         type=str,
+        help="Enable HIP backend. Architecture required to be specified  (e.g., --target-hip=gfx90a).",
     )
-    driver.add_argument(
+
+    # oneMath relating options
+    p.add_argument(
         "--onemkl_interfaces",
         help="(DEPRECATED) Build using oneMath",
         dest="onemkl_interfaces",
         action="store_true",
     )
-    driver.add_argument(
+    p.add_argument(
         "--onemkl_interfaces_dir",
         help="(DEPRECATED) Local directory with source of oneMath",
         dest="onemkl_interfaces_dir",
         default=None,
         type=str,
     )
-    driver.add_argument(
+    p.add_argument(
         "--onemath",
         help="Build using oneMath",
         dest="onemath",
         action="store_true",
     )
-    driver.add_argument(
+    p.add_argument(
         "--onemath-dir",
         help="Local directory with source of oneMath",
         dest="onemath_dir",
         default=None,
         type=str,
     )
-    args = parser.parse_args()
 
-    args_to_validate = [
-        "c_compiler",
-        "cxx_compiler",
-        "compiler_root",
-    ]
+    # build relating options
+    p.add_argument(
+        "--clean",
+        action="store_true",
+        help="Remove build dir before rebuild",
+    )
+    p.add_argument(
+        "--skip-editable",
+        action="store_true",
+        help="Skip pip editable install step",
+    )
 
-    if args.oneapi or (
-        args.c_compiler is None
-        and args.cxx_compiler is None
-        and args.compiler_root is None
-    ):
-        args.c_compiler = "icx"
-        args.cxx_compiler = "icpx" if "linux" in sys.platform else "icx"
-        args.compiler_root = None
-    else:
-        cr = args.compiler_root
-        if isinstance(cr, str) and os.path.exists(cr):
-            if args.c_compiler is None:
-                args.c_compiler = "icx"
-            if args.cxx_compiler is None:
-                args.cxx_compiler = "icpx" if "linux" in sys.platform else "icx"
-        else:
-            raise RuntimeError(
-                "Option 'compiler-root' must be provided when "
-                "using non-default DPC++ layout."
-            )
-        args_to_validate = [
-            "c_compiler",
-            "cxx_compiler",
-        ]
-        for p in args_to_validate:
-            arg = getattr(args, p)
-            assert isinstance(arg, str)
-            if not os.path.exists(arg):
-                arg2 = os.path.join(cr, arg)
-                if os.path.exists(arg2):
-                    arg = arg2
-                    setattr(args, p, arg)
-            if not os.path.exists(arg):
-                opt_name = p.replace("_", "-")
-                raise RuntimeError(f"Option {opt_name} value {arg} must exist.")
+    return p.parse_args()
 
-    run(
-        use_oneapi=args.oneapi,
-        build_type=args.debug,
-        c_compiler=args.c_compiler,
-        cxx_compiler=args.cxx_compiler,
-        compiler_root=args.compiler_root,
-        cmake_executable=args.cmake_executable,
+
+def main():
+    if sys.platform not in ["cygwin", "win32", "linux"]:
+        err(f"{sys.platform} not supported", "build_locally")
+
+    args = parse_args()
+    setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+    c_compiler, cxx_compiler = resolve_compilers(
+        args.oneapi, args.c_compiler, args.cxx_compiler, args.compiler_root
+    )
+
+    dpctl_cmake_dir = get_dpctl_cmake_dir()
+    print(f"[build_locally] Found DPCTL CMake dir: {dpctl_cmake_dir}")
+
+    onemath, onemath_dir = resolve_onemath(
+        args.onemath,
+        args.onemath_dir,
+        args.target_cuda,
+        args.target_hip,
+        args.onemkl_interfaces,
+        args.onemkl_interfaces_dir,
+    )
+
+    # clean build dir if --clean set
+    if args.clean:
+        clean_build_dir(setup_dir)
+
+    cmake_args = make_cmake_args(
+        c_compiler=c_compiler,
+        cxx_compiler=cxx_compiler,
+        dpctl_cmake_dir=dpctl_cmake_dir,
+        onemath=onemath,
+        onemath_dir=onemath_dir,
         verbose=args.verbose,
-        cmake_opts=args.cmake_opts,
-        target_cuda=args.target_cuda,
-        target_hip=args.target_hip,
-        onemkl_interfaces=args.onemkl_interfaces,
-        onemkl_interfaces_dir=args.onemkl_interfaces_dir,
-        onemath=args.onemath,
-        onemath_dir=args.onemath_dir,
+        other_opts=args.cmake_opts,
     )
+
+    # handle architecture conflicts
+    if args.target_hip is not None and not args.target_hip.strip():
+        err("--target-hip requires an explicit architecture", "build_locally")
+
+    # CUDA/HIP targets
+    if args.target_cuda:
+        cmake_args += [f"-DDPNP_TARGET_CUDA={args.target_cuda}"]
+    if args.target_hip:
+        cmake_args += [f"-DDPNP_TARGET_HIP={args.target_hip}"]
+
+    log_cmake_args(cmake_args, "build_locally")
+
+    print("[build_locally] Building extensions in-place...")
+
+    env = os.environ.copy()
+    if args.oneapi and "DPL_ROOT" in env:
+        env["DPL_ROOT_HINT"] = env["DPL_ROOT"]
+
+    build_extension(
+        setup_dir,
+        env,
+        cmake_args,
+        cmake_executable=args.cmake_executable,
+        generator=args.generator,
+        build_type=args.build_type,
+    )
+    if not args.skip_editable:
+        install_editable(setup_dir, env)
+    else:
+        print("[build_locally] Skipping editable install (--skip-editable)")
+
+    print("[build_locally] Build complete")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py
index a624bd570e2d..588345d91b2e 100644
--- a/scripts/gen_coverage.py
+++ b/scripts/gen_coverage.py
@@ -26,67 +26,185 @@
 # THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
 
+import argparse
 import os
 import subprocess
 import sys
 
+from _build_helper import (
+    build_extension,
+    capture_cmd_output,
+    clean_build_dir,
+    err,
+    get_dpctl_cmake_dir,
+    install_editable,
+    log_cmake_args,
+    make_cmake_args,
+    resolve_compilers,
+    run,
+)
 
-def run(
-    use_oneapi=True,
-    c_compiler=None,
-    cxx_compiler=None,
-    compiler_root=None,
-    bin_llvm=None,
-    pytest_opts="",
-    verbose=False,
-):
-    IS_LIN = False
-
-    if "linux" in sys.platform:
-        IS_LIN = True
-    elif sys.platform in ["win32", "cygwin"]:
-        pass
+
+def parse_args():
+    p = argparse.ArgumentParser(description="Build dpnp and generate coverage")
+
+    # compiler and oneAPI relating options
+    p.add_argument(
+        "--c-compiler", default=None, help="Path or name of C compiler"
+    )
+    p.add_argument(
+        "--cxx-compiler", default=None, help="Path or name of C++ compiler"
+    )
+    p.add_argument(
+        "--compiler-root",
+        type=str,
+        default=None,
+        help="Path to compiler installation root",
+    )
+    p.add_argument(
+        "--oneapi",
+        dest="oneapi",
+        action="store_true",
+        help="Use default oneAPI compiler layout",
+    )
+    p.add_argument(
+        "--bin-llvm",
+        type=str,
+        default=None,
+        help="Path to folder where llvm-cov/llvm-profdata can be found",
+    )
+
+    # CMake relating options
+    p.add_argument(
+        "--generator", type=str, default="Ninja", help="CMake generator"
+    )
+    p.add_argument(
+        "--cmake-executable",
+        type=str,
+        default=None,
+        help="Path to CMake executable used by build",
+    )
+
+    p.add_argument(
+        "--cmake-opts",
+        type=str,
+        default="",
+        help="Additional options to pass directly to CMake",
+    )
+    p.add_argument(
+        "--verbose",
+        dest="verbose",
+        action="store_true",
+        help="Enable verbose makefile output",
+    )
+
+    # test relating options
+    p.add_argument(
+        "--skip-pytest",
+        dest="run_pytest",
+        action="store_false",
+        help="Skip running pytest and coverage generation",
+    )
+    p.add_argument(
+        "--pytest-opts",
+        help="Channels through additional pytest options",
+        dest="pytest_opts",
+        default="",
+        type=str,
+    )
+
+    # build relating options
+    p.add_argument(
+        "--clean",
+        action="store_true",
+        help="Remove build dir before rebuild (default: False)",
+    )
+
+    return p.parse_args()
+
+
+def find_bin_llvm(compiler):
+    if os.path.isabs(compiler):
+        bin_dir = os.path.dirname(compiler)
     else:
-        raise AssertionError(sys.platform + " not supported")
+        compiler_path = capture_cmd_output(["which", compiler])
+        if not compiler_path:
+            raise RuntimeError(f"Compiler {compiler} not found in PATH")
+        bin_dir = os.path.dirname(compiler_path)
 
-    if not IS_LIN:
+    compiler_dir = os.path.join(bin_dir, "compiler")
+    if os.path.exists(compiler_dir):
+        bin_llvm = compiler_dir
+    else:
+        bin_dir = os.path.dirname(bin_dir)
+        bin_llvm = os.path.join(bin_dir, "bin-llvm")
+
+    if not os.path.exists(bin_llvm):
         raise RuntimeError(
-            "This scripts only supports coverage collection on Linux"
+            f"Path to folder with llvm-cov/llvm-profdata={bin_llvm} "
+            "seems to not exist"
         )
+    return bin_llvm
+
 
+def main():
+    is_linux = "linux" in sys.platform
+    if not is_linux:
+        err(f"{sys.platform} not supported", "gen_coverage")
+
+    args = parse_args()
     setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
-    cmake_args = [
-        sys.executable,
-        "setup.py",
-        "develop",
-        "--generator=Ninja",
-        "--",
-        "-DCMAKE_C_COMPILER:PATH=" + c_compiler,
-        "-DCMAKE_CXX_COMPILER:PATH=" + cxx_compiler,
-        "-DDPNP_GENERATE_COVERAGE=ON",
-    ]
-
-    env = {}
-    if bin_llvm:
-        env = {
-            "PATH": ":".join((os.environ.get("PATH", ""), bin_llvm)),
-            "LLVM_TOOLS_HOME": bin_llvm,
-        }
+    c_compiler, cxx_compiler = resolve_compilers(
+        args.oneapi,
+        args.c_compiler,
+        args.cxx_compiler,
+        args.compiler_root,
+    )
 
-    # extend with global environment variables
-    env.update({k: v for k, v in os.environ.items() if k != "PATH"})
+    dpctl_cmake_dir = get_dpctl_cmake_dir()
+    print(f"[gen_coverage] Found DPCTL CMake dir: {dpctl_cmake_dir}")
 
-    if verbose:
-        cmake_args += [
-            "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON",
-        ]
+    if args.clean:
+        clean_build_dir(setup_dir)
+
+    cmake_args = make_cmake_args(
+        c_compiler=c_compiler,
+        cxx_compiler=cxx_compiler,
+        dpctl_cmake_dir=dpctl_cmake_dir,
+        verbose=args.verbose,
+    )
+    cmake_args.append("-DDPNP_GENERATE_COVERAGE=ON")
+
+    env = os.environ.copy()
+
+    if args.bin_llvm:
+        bin_llvm = args.bin_llvm
+    else:
+        bin_llvm = find_bin_llvm(c_compiler)
+    print(
+        f"[gen_coverage] Path to folder with llvm-cov/llvm-profdata: {bin_llvm}"
+    )
+
+    if bin_llvm:
+        env["PATH"] = ":".join((env.get("PATH", ""), bin_llvm))
+        env["LLVM_TOOLS_HOME"] = bin_llvm
 
-    subprocess.check_call(cmake_args, shell=False, cwd=setup_dir, env=env)
+    log_cmake_args(cmake_args, "gen_coverage")
 
-    env["LLVM_PROFILE_FILE"] = "dpnp_pytest.profraw"
-    subprocess.check_call(
-        [
+    build_extension(
+        setup_dir,
+        env,
+        cmake_args,
+        cmake_executable=args.cmake_executable,
+        generator=args.generator,
+        build_type="Coverage",
+    )
+    install_editable(setup_dir, env)
+
+    if args.run_pytest:
+        env["LLVM_PROFILE_FILE"] = "dpnp_pytest.profraw"
+        pytest_cmd = [
             "pytest",
             "-q",
             "-ra",
@@ -99,133 +217,66 @@ def run(
             "--cov-report=lcov:coverage-python.lcov",
             "--pyargs",
             "dpnp",
-            *pytest_opts.split(),
-        ],
-        cwd=setup_dir,
-        shell=False,
-        env=env,
-    )
-
-    def find_objects():
-        objects = []
-        dpnp_path = os.getcwd()
-        search_path = os.path.join(dpnp_path, "dpnp")
-        for root, _, files in os.walk(search_path):
-            for file in files:
-                if (
-                    file.endswith("_c.so")
-                    or root.find("extensions") != -1
-                    and file.find("_impl.cpython") != -1
-                ):
-                    objects.extend(["-object", os.path.join(root, file)])
-        return objects
-
-    objects = find_objects()
-    instr_profile_fn = "dpnp_pytest.profdata"
-    # generate instrumentation profile data
-    subprocess.check_call(
-        [
-            os.path.join(bin_llvm, "llvm-profdata"),
-            "merge",
-            "-sparse",
-            env["LLVM_PROFILE_FILE"],
-            "-o",
-            instr_profile_fn,
+            *args.pytest_opts.split(),
         ]
-    )
+        run(pytest_cmd, env=env, cwd=setup_dir)
 
-    # export lcov
-    with open("coverage-cpp.lcov", "w") as fh:
-        subprocess.check_call(
+        def find_objects():
+            objects = []
+            dpnp_path = os.getcwd()
+            search_path = os.path.join(dpnp_path, "dpnp")
+            for root, _, files in os.walk(search_path):
+                for file in files:
+                    if (
+                        file.endswith("_c.so")
+                        or root.find("extensions") != -1
+                        and file.find("_impl.cpython") != -1
+                    ):
+                        objects.extend(["-object", os.path.join(root, file)])
+            return objects
+
+        objects = find_objects()
+        instr_profile_fn = "dpnp_pytest.profdata"
+
+        # generate instrumentation profile data
+        run(
             [
-                os.path.join(bin_llvm, "llvm-cov"),
-                "export",
-                "-format=lcov",
-                "-ignore-filename-regex=/tmp/icpx*",
-                r"-ignore-filename-regex=.*/backend/kernels/elementwise_functions/.*\.hpp$",
-                "-instr-profile=" + instr_profile_fn,
+                os.path.join(bin_llvm, "llvm-profdata"),
+                "merge",
+                "-sparse",
+                env["LLVM_PROFILE_FILE"],
+                "-o",
+                instr_profile_fn,
             ]
-            + objects
-            + ["-sources", "dpnp"],
-            stdout=fh,
         )
 
+        # export lcov
+        with open("coverage-cpp.lcov", "w") as fh:
+            subprocess.check_call(
+                [
+                    os.path.join(bin_llvm, "llvm-cov"),
+                    "export",
+                    "-format=lcov",
+                    "-ignore-filename-regex=/tmp/icpx*",
+                    r"-ignore-filename-regex=.*/backend/kernels/elementwise_functions/.*\.hpp$",
+                    "-instr-profile=" + instr_profile_fn,
+                ]
+                + objects
+                + ["-sources", "dpnp"],
+                cwd=setup_dir,
+                env=env,
+                stdout=fh,
+            )
 
-if __name__ == "__main__":
-    import argparse
-
-    parser = argparse.ArgumentParser(
-        description="Driver to build dpnp and generate coverage"
-    )
-    driver = parser.add_argument_group(title="Coverage driver arguments")
-    driver.add_argument("--c-compiler", help="Name of C compiler", default=None)
-    driver.add_argument(
-        "--cxx-compiler", help="Name of C++ compiler", default=None
-    )
-    driver.add_argument(
-        "--not-oneapi",
-        help="Is one-API installation",
-        dest="oneapi",
-        action="store_false",
-    )
-    driver.add_argument(
-        "--compiler-root", type=str, help="Path to compiler home directory"
-    )
-    driver.add_argument(
-        "--bin-llvm", help="Path to folder where llvm-cov can be found"
-    )
-    driver.add_argument(
-        "--pytest-opts",
-        help="Channels through additional pytest options",
-        dest="pytest_opts",
-        default="",
-        type=str,
-    )
-    driver.add_argument(
-        "--verbose",
-        help="Build using vebose makefile mode",
-        dest="verbose",
-        action="store_true",
-    )
-    args = parser.parse_args()
-
-    if args.oneapi:
-        args.c_compiler = "icx"
-        args.cxx_compiler = "icpx"
-        args.compiler_root = None
-        icx_path = subprocess.check_output(["which", "icx"])
-        bin_dir = os.path.dirname(icx_path)
-        compiler_dir = os.path.join(bin_dir.decode("utf-8"), "compiler")
-        if os.path.exists(compiler_dir):
-            args.bin_llvm = os.path.join(bin_dir.decode("utf-8"), "compiler")
-        else:
-            bin_dir = os.path.dirname(bin_dir)
-            args.bin_llvm = os.path.join(bin_dir.decode("utf-8"), "bin-llvm")
-        assert os.path.exists(args.bin_llvm)
+        print("[gen_coverage] Coverage export is completed")
     else:
-        args_to_validate = [
-            "c_compiler",
-            "cxx_compiler",
-            "compiler_root",
-            "bin_llvm",
-        ]
-        for p in args_to_validate:
-            arg = getattr(args, p, None)
-            if not isinstance(arg, str):
-                opt_name = p.replace("_", "-")
-                raise RuntimeError(
-                    f"Option {opt_name} must be provided is "
-                    "using non-default DPC++ layout"
-                )
-            if not os.path.exists(arg):
-                raise RuntimeError(f"Path {arg} must exist")
-
-    run(
-        use_oneapi=args.oneapi,
-        c_compiler=args.c_compiler,
-        cxx_compiler=args.cxx_compiler,
-        compiler_root=args.compiler_root,
-        bin_llvm=args.bin_llvm,
-        pytest_opts=args.pytest_opts,
-        verbose=args.verbose,
-    )
+        print(
+            "[gen_coverage] Skipping pytest and coverage collection "
+            "(--skip-pytest)"
+        )
+
+    print("[gen_coverage] Done")
+
+
+if __name__ == "__main__":
+    main()