From 4d0462d030cf8120e8ebfda7608dc2ad32b217d1 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Fri, 16 Jan 2026 15:52:34 +0100
Subject: [PATCH 01/16] Improve `linspace` implementation with non-scalar
 inputs (#2712)

The PR closes #2084.

This PR improves implementation of `dpnp.linspace` function aligning it
with the latest changes added to NumPy.
The tests coverage is also extended to verify more use cases.
---
 CHANGELOG.md                                  |   1 +
 dpnp/dpnp_algo/dpnp_arraycreation.py          |  42 ++--
 dpnp/dpnp_iface_arraycreation.py              |   2 +
 dpnp/tests/helper.py                          |   3 +
 dpnp/tests/test_arraycreation.py              | 211 +++++++++++-------
 dpnp/tests/test_arraypad.py                   |   5 +-
 .../cupy/creation_tests/test_ranges.py        |   6 +-
 .../cupy/functional_tests/test_piecewise.py   |   2 +
 8 files changed, 167 insertions(+), 105 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9bd28a21d194..7e067b3783b4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -59,6 +59,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 
 * Suppressed a potential deprecation warning triggered during import of the `dpctl.tensor` module [#2709](https://github.com/IntelPython/dpnp/pull/2709)
 * Corrected a phonetic spelling issue due to incorrect using of `a nd` in docstrings [#2719](https://github.com/IntelPython/dpnp/pull/2719)
+* Resolved an issue causing `dpnp.linspace` to return an incorrect output shape when inputs were passed as arrays [#2712](https://github.com/IntelPython/dpnp/pull/2712)
 
 ### Security
 
diff --git a/dpnp/dpnp_algo/dpnp_arraycreation.py b/dpnp/dpnp_algo/dpnp_arraycreation.py
index a76736d27bb5..009586906099 100644
--- a/dpnp/dpnp_algo/dpnp_arraycreation.py
+++ b/dpnp/dpnp_algo/dpnp_arraycreation.py
@@ -172,14 +172,9 @@ def dpnp_linspace(
 
     num = operator.index(num)
     if num < 0:
-        raise ValueError("Number of points must be non-negative")
+        raise ValueError(f"Number of samples={num} must be non-negative.")
     step_num = (num - 1) if endpoint else num
 
-    step_nan = False
-    if step_num == 0:
-        step_nan = True
-        step = dpnp.nan
-
     if dpnp.isscalar(start) and dpnp.isscalar(stop):
         # Call linspace() function for scalars.
         usm_res = dpt.linspace(
@@ -191,8 +186,13 @@ def dpnp_linspace(
             sycl_queue=sycl_queue_normalized,
             endpoint=endpoint,
         )
-        if retstep is True and step_nan is False:
-            step = (stop - start) / step_num
+
+        # calculate the used step to return
+        if retstep is True:
+            if step_num > 0:
+                step = (stop - start) / step_num
+            else:
+                step = dpnp.nan
     else:
         usm_start = dpt.asarray(
             start,
@@ -204,6 +204,8 @@ def dpnp_linspace(
             stop, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized
         )
 
+        delta = usm_stop - usm_start
+
         usm_res = dpt.arange(
             0,
             stop=num,
@@ -212,20 +214,30 @@ def dpnp_linspace(
             usm_type=_usm_type,
             sycl_queue=sycl_queue_normalized,
         )
+        usm_res = dpt.reshape(usm_res, (-1,) + (1,) * delta.ndim, copy=False)
+
+        if step_num > 0:
+            step = delta / step_num
+
+            # Needed a special handling for denormal numbers (when step == 0),
+            # see numpy#5437 for more details.
+            # Note, dpt.where() is used to avoid a synchronization branch.
+            usm_res = dpt.where(
+                step == 0, (usm_res / step_num) * delta, usm_res * step
+            )
+        else:
+            step = dpnp.nan
+            usm_res = usm_res * delta
 
-        if step_nan is False:
-            step = (usm_stop - usm_start) / step_num
-            usm_res = dpt.reshape(usm_res, (-1,) + (1,) * step.ndim, copy=False)
-            usm_res = usm_res * step
-            usm_res += usm_start
+        usm_res += usm_start
 
         if endpoint and num > 1:
-            usm_res[-1] = dpt.full(step.shape, usm_stop)
+            usm_res[-1, ...] = usm_stop
 
     if axis != 0:
         usm_res = dpt.moveaxis(usm_res, 0, axis)
 
-    if numpy.issubdtype(dtype, dpnp.integer):
+    if dpnp.issubdtype(dtype, dpnp.integer):
         dpt.floor(usm_res, out=usm_res)
 
     res = dpt.astype(usm_res, dtype, copy=False)
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 12c65d116a70..50b425003764 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -2704,6 +2704,8 @@ def linspace(
         of tuples, tuples of lists, and ndarrays. If `endpoint` is set to
         ``False`` the sequence consists of all but the last of ``num + 1``
         evenly spaced samples, so that `stop` is excluded.
+    num : int
+        Number of samples. Must have a nonnegative value.
     dtype : {None, str, dtype object}, optional
         The desired dtype for the array. If not given, a default dtype will be
         used that can represent the values (by considering Promotion Type Rule
diff --git a/dpnp/tests/helper.py b/dpnp/tests/helper.py
index e5e251231654..7678028d4880 100644
--- a/dpnp/tests/helper.py
+++ b/dpnp/tests/helper.py
@@ -69,6 +69,9 @@ def assert_dtype_allclose(
         x.dtype, dpnp.inexact
     )
 
+    if not hasattr(numpy_arr, "dtype"):
+        numpy_arr = numpy.array(numpy_arr)
+
     if is_inexact(dpnp_arr) or is_inexact(numpy_arr):
         tol_dpnp = (
             dpnp.finfo(dpnp_arr).resolution
diff --git a/dpnp/tests/test_arraycreation.py b/dpnp/tests/test_arraycreation.py
index 28d51e8e6d31..1e20df9425b9 100644
--- a/dpnp/tests/test_arraycreation.py
+++ b/dpnp/tests/test_arraycreation.py
@@ -19,6 +19,8 @@
     assert_dtype_allclose,
     get_all_dtypes,
     get_array,
+    get_float_dtypes,
+    has_support_aspect64,
     is_lts_driver,
     is_tgllp_iris_xe,
     is_win_platform,
@@ -83,6 +85,131 @@ def test_validate_positional_args(self, xp):
         )
 
 
+class TestLinspace:
+    @pytest.mark.parametrize("start", [0, -5, 10, -2.5, 9.7])
+    @pytest.mark.parametrize("stop", [0, 10, -2, 20.5, 120])
+    @pytest.mark.parametrize("num", [0, 1, 5, numpy.array(10)])
+    @pytest.mark.parametrize(
+        "dt", get_all_dtypes(no_bool=True, no_float16=False)
+    )
+    @pytest.mark.parametrize("retstep", [True, False])
+    def test_basic(self, start, stop, num, dt, retstep):
+        if (
+            not has_support_aspect64()
+            and numpy.issubdtype(dt, numpy.integer)
+            and start == -5
+            and stop == 10
+            and num == 10
+        ):
+            pytest.skip("due to dpctl-1056")
+
+        if numpy.issubdtype(dt, numpy.unsignedinteger):
+            start = abs(start)
+            stop = abs(stop)
+
+        res = dpnp.linspace(start, stop, num, dtype=dt, retstep=retstep)
+        exp = numpy.linspace(start, stop, num, dtype=dt, retstep=retstep)
+        if retstep:
+            res, res_step = res
+            exp, exp_step = exp
+            assert_dtype_allclose(res_step, exp_step)
+
+        if numpy.issubdtype(dt, numpy.integer):
+            assert_allclose(res, exp, rtol=1)
+        else:
+            assert_dtype_allclose(res, exp)
+
+    @pytest.mark.parametrize(
+        "start, stop",
+        [
+            (dpnp.array(1), dpnp.array([-4])),
+            (dpnp.array([2.6]), dpnp.array([[2.6], [-4]])),
+            (numpy.array([[-6.7, 3]]), numpy.array(2)),
+            ([1, -4], [[-4.6]]),
+            ((3, 5), (3,)),
+        ],
+    )
+    @pytest.mark.parametrize("num", [0, 1, 5])
+    @pytest.mark.parametrize(
+        "dt", get_all_dtypes(no_bool=True, no_float16=False)
+    )
+    @pytest.mark.parametrize("retstep", [True, False])
+    def test_start_stop_arrays(self, start, stop, num, dt, retstep):
+        res = dpnp.linspace(start, stop, num, dtype=dt, retstep=retstep)
+        exp = numpy.linspace(
+            get_array(numpy, start),
+            get_array(numpy, stop),
+            num,
+            dtype=dt,
+            retstep=retstep,
+        )
+        if retstep:
+            res, res_step = res
+            exp, exp_step = exp
+            assert_dtype_allclose(res_step, exp_step)
+        assert_dtype_allclose(res, exp)
+
+    @pytest.mark.parametrize(
+        "start, stop",
+        [(1 + 2j, 3 + 4j), (1j, 10), ([0, 1], 3 + 2j)],
+    )
+    def test_start_stop_complex(self, start, stop):
+        result = dpnp.linspace(start, stop, num=5)
+        expected = numpy.linspace(start, stop, num=5)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dt", get_float_dtypes())
+    def test_denormal_numbers(self, dt):
+        stop = numpy.nextafter(dt(0), dt(1)) * 5  # denormal number
+
+        result = dpnp.linspace(0, stop, num=10, endpoint=False, dtype=dt)
+        expected = numpy.linspace(0, stop, num=10, endpoint=False, dtype=dt)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.skipif(not has_support_aspect64(), reason="due to dpctl-1056")
+    def test_equivalent_to_arange(self):
+        result = dpnp.linspace(0, 35, num=36, dtype=int)
+        expected = numpy.linspace(0, 35, num=36, dtype=int)
+        assert_equal(result, expected)
+
+    def test_round_negative(self):
+        result = dpnp.linspace(-1, 3, num=8, dtype=int)
+        expected = numpy.linspace(-1, 3, num=8, dtype=int)
+        assert_array_equal(result, expected)
+
+    def test_step_zero(self):
+        start = numpy.array([0.0, 1.0])
+        stop = numpy.array([2.0, 1.0])
+
+        result = dpnp.linspace(start, stop, num=3)
+        expected = numpy.linspace(start, stop, num=3)
+        assert_array_equal(result, expected)
+
+    @pytest.mark.parametrize("endpoint", [True, False])
+    def test_num_zero(self, endpoint):
+        start, stop = 0, [0, 1, 2, 3, 4]
+        result = dpnp.linspace(start, stop, num=0, endpoint=endpoint)
+        expected = numpy.linspace(start, stop, num=0, endpoint=endpoint)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("axis", [0, 1])
+    def test_axis(self, axis):
+        func = lambda xp: xp.linspace([2, 3], [20, 15], num=10, axis=axis)
+        assert_allclose(func(dpnp), func(numpy))
+
+    @pytest.mark.parametrize("xp", [dpnp, numpy])
+    def test_negative_num(self, xp):
+        with pytest.raises(ValueError, match="must be non-negative"):
+            _ = xp.linspace(0, 10, num=-1)
+
+    @pytest.mark.parametrize("xp", [dpnp, numpy])
+    def test_float_num(self, xp):
+        with pytest.raises(
+            TypeError, match="cannot be interpreted as an integer"
+        ):
+            _ = xp.linspace(0, 1, num=2.5)
+
+
 class TestTrace:
     @pytest.mark.parametrize("a_sh", [(3, 4), (2, 2, 2)])
     @pytest.mark.parametrize(
@@ -734,37 +861,6 @@ def test_dpctl_tensor_input(func, args):
         assert_array_equal(X, Y)
 
 
-@pytest.mark.parametrize("start", [0, -5, 10, -2.5, 9.7])
-@pytest.mark.parametrize("stop", [0, 10, -2, 20.5, 120])
-@pytest.mark.parametrize(
-    "num",
-    [1, 5, numpy.array(10), dpnp.array(17), dpt.asarray(100)],
-    ids=["1", "5", "numpy.array(10)", "dpnp.array(17)", "dpt.asarray(100)"],
-)
-@pytest.mark.parametrize(
-    "dtype",
-    get_all_dtypes(no_bool=True, no_float16=False),
-)
-@pytest.mark.parametrize("retstep", [True, False])
-def test_linspace(start, stop, num, dtype, retstep):
-    if numpy.issubdtype(dtype, numpy.unsignedinteger):
-        start = abs(start)
-        stop = abs(stop)
-
-    res_np = numpy.linspace(start, stop, num, dtype=dtype, retstep=retstep)
-    res_dp = dpnp.linspace(start, stop, num, dtype=dtype, retstep=retstep)
-
-    if retstep:
-        [res_np, step_np] = res_np
-        [res_dp, step_dp] = res_dp
-        assert_allclose(step_np, step_dp)
-
-    if numpy.issubdtype(dtype, dpnp.integer):
-        assert_allclose(res_np, res_dp, rtol=1)
-    else:
-        assert_dtype_allclose(res_dp, res_np)
-
-
 @pytest.mark.parametrize("func", ["geomspace", "linspace", "logspace"])
 @pytest.mark.parametrize(
     "start_dtype", [numpy.float64, numpy.float32, numpy.int64, numpy.int32]
@@ -778,57 +874,6 @@ def test_space_numpy_dtype(func, start_dtype, stop_dtype):
     getattr(dpnp, func)(start, stop, 10)
 
 
-@pytest.mark.parametrize(
-    "start",
-    [
-        dpnp.array(1),
-        dpnp.array([2.6]),
-        numpy.array([[-6.7, 3]]),
-        [1, -4],
-        (3, 5),
-    ],
-)
-@pytest.mark.parametrize(
-    "stop",
-    [
-        dpnp.array([-4]),
-        dpnp.array([[2.6], [-4]]),
-        numpy.array(2),
-        [[-4.6]],
-        (3,),
-    ],
-)
-def test_linspace_arrays(start, stop):
-    func = lambda xp: xp.linspace(get_array(xp, start), get_array(xp, stop), 10)
-    assert func(numpy).shape == func(dpnp).shape
-
-
-def test_linspace_complex():
-    func = lambda xp: xp.linspace(0, 3 + 2j, num=1000)
-    assert_allclose(func(dpnp), func(numpy))
-
-
-@pytest.mark.parametrize("axis", [0, 1])
-def test_linspace_axis(axis):
-    func = lambda xp: xp.linspace([2, 3], [20, 15], num=10, axis=axis)
-    assert_allclose(func(dpnp), func(numpy))
-
-
-def test_linspace_step_nan():
-    func = lambda xp: xp.linspace(1, 2, num=0, endpoint=False)
-    assert_allclose(func(dpnp), func(numpy))
-
-
-@pytest.mark.parametrize("start", [1, [1, 1]])
-@pytest.mark.parametrize("stop", [10, [10 + 10]])
-def test_linspace_retstep(start, stop):
-    func = lambda xp: xp.linspace(start, stop, num=10, retstep=True)
-    np_res = func(numpy)
-    dpnp_res = func(dpnp)
-    assert_allclose(dpnp_res[0], np_res[0])
-    assert_allclose(dpnp_res[1], np_res[1])
-
-
 @pytest.mark.parametrize(
     "arrays",
     [[], [[1]], [[1, 2, 3], [4, 5, 6]], [[1, 2], [3, 4], [5, 6]]],
@@ -862,10 +907,8 @@ def test_geomspace_zero_error():
 
 def test_space_num_error():
     with pytest.raises(ValueError):
-        dpnp.linspace(2, 5, -3)
         dpnp.geomspace(2, 5, -3)
         dpnp.logspace(2, 5, -3)
-        dpnp.linspace([2, 3], 5, -3)
         dpnp.geomspace([2, 3], 5, -3)
         dpnp.logspace([2, 3], 5, -3)
 
diff --git a/dpnp/tests/test_arraypad.py b/dpnp/tests/test_arraypad.py
index 9a88dd8bab96..aee93703895b 100644
--- a/dpnp/tests/test_arraypad.py
+++ b/dpnp/tests/test_arraypad.py
@@ -73,11 +73,8 @@ def test_non_contiguous_array(self, mode):
         else:
             assert_array_equal(result, expected)
 
-    # TODO: include "linear_ramp" when dpnp issue gh-2084 is resolved
     @pytest.mark.parametrize("pad_width", [0, (0, 0), ((0, 0), (0, 0))])
-    @pytest.mark.parametrize(
-        "mode", [m for m in _modes if m not in {"linear_ramp"}]
-    )
+    @pytest.mark.parametrize("mode", _modes)
     def test_zero_pad_width(self, pad_width, mode):
         arr = dpnp.arange(30).reshape(6, 5)
         assert_array_equal(arr, dpnp.pad(arr, pad_width, mode=mode))
diff --git a/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py b/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py
index 3790eae96462..69873473e0d7 100644
--- a/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py
+++ b/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import functools
 import math
 import unittest
@@ -226,8 +228,8 @@ def test_linspace_mixed_start_stop2(self, xp, dtype_range, dtype_out):
         # TODO (ev-br): np 2.0: had to bump the default rtol on Windows
         #               and numpy 1.26+weak promotion from 0 to 5e-6
         if xp.dtype(dtype_range).kind == "u":
-            # to avoid overflow, limit `val` to be smaller
-            # than xp.iinfo(dtype).max
+            # to avoid overflow, limit `val` to be smaller than
+            # xp.iinfo(dtype).max (TODO: check if dpctl-2230 resolves that)
             if dtype_range in [xp.uint8, xp.uint16] or dtype_out in [
                 xp.int8,
                 xp.uint8,
diff --git a/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py b/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py
index 5ce72bd806b8..c16a9e91f9dd 100644
--- a/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py
+++ b/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import unittest
 
 import numpy

From 6ff93b98f1536bb07becd8c1ad7c782df9b13b69 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Sat, 17 Jan 2026 12:43:36 +0100
Subject: [PATCH 02/16] Consider the buffer offset when creating a view on
 ndarray (#2651)

The PR adds passing the buffer offset to `usm_ndarray` constructor.

This is fast and straightforward solution. The complete fix needs to be
implemented in dpctl in scope of
[dpctl#2226](https://github.com/IntelPython/dpctl/pull/2226).
---
 CHANGELOG.md                     |  1 +
 dpnp/dpnp_array.py               |  1 +
 dpnp/dpnp_iface_indexing.py      |  8 +++-----
 dpnp/tests/test_arraycreation.py | 10 ++++++++++
 dpnp/tests/test_memory.py        | 11 +++++++++++
 5 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7e067b3783b4..96649123cfad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -60,6 +60,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Suppressed a potential deprecation warning triggered during import of the `dpctl.tensor` module [#2709](https://github.com/IntelPython/dpnp/pull/2709)
 * Corrected a phonetic spelling issue due to incorrect using of `a nd` in docstrings [#2719](https://github.com/IntelPython/dpnp/pull/2719)
 * Resolved an issue causing `dpnp.linspace` to return an incorrect output shape when inputs were passed as arrays [#2712](https://github.com/IntelPython/dpnp/pull/2712)
+* Resolved an issue where `dpnp` always returns the base allocation pointer, when the view start is expected [#2651](https://github.com/IntelPython/dpnp/pull/2651)
 
 ### Security
 
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index c42c9f8a2cb5..6a2b2fd1977f 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -98,6 +98,7 @@ def __init__(
             # or as USM memory allocation
             if isinstance(buffer, dpnp_array):
                 buffer = buffer.get_array()
+                offset += buffer._element_offset
 
             if dtype is None and hasattr(buffer, "dtype"):
                 dtype = buffer.dtype
diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py
index 6375e5320e30..6e7ab778299b 100644
--- a/dpnp/dpnp_iface_indexing.py
+++ b/dpnp/dpnp_iface_indexing.py
@@ -721,23 +721,21 @@ def diagonal(a, offset=0, axis1=0, axis2=1):
     a_straides = a.strides
     n, m = a_shape[-2:]
     st_n, st_m = a_straides[-2:]
-    # pylint: disable=W0212
-    a_element_offset = a.get_array()._element_offset
 
     # Compute shape, strides and offset of the resulting diagonal array
     # based on the input offset
     if offset == 0:
         out_shape = a_shape[:-2] + (min(n, m),)
         out_strides = a_straides[:-2] + (st_n + st_m,)
-        out_offset = a_element_offset
+        out_offset = 0
     elif 0 < offset < m:
         out_shape = a_shape[:-2] + (min(n, m - offset),)
         out_strides = a_straides[:-2] + (st_n + st_m,)
-        out_offset = a_element_offset + st_m * offset
+        out_offset = st_m * offset
     else:
         out_shape = a_shape[:-2] + (0,)
         out_strides = a_straides[:-2] + (1,)
-        out_offset = a_element_offset
+        out_offset = 0
 
     return dpnp_array(
         out_shape, buffer=a, strides=out_strides, offset=out_offset
diff --git a/dpnp/tests/test_arraycreation.py b/dpnp/tests/test_arraycreation.py
index 1e20df9425b9..65747488310d 100644
--- a/dpnp/tests/test_arraycreation.py
+++ b/dpnp/tests/test_arraycreation.py
@@ -268,6 +268,16 @@ def test_linalg_trace(self, dtype, offset):
         expected = numpy.linalg.trace(a, offset=offset, dtype=dtype)
         assert_equal(result, expected)
 
+    @pytest.mark.parametrize("offset", [-1, 0, 1])
+    def test_ndarray_offset(self, offset):
+        ia = dpnp.arange(8, dtype=dpnp.uint8).reshape((2, 2, 2))
+        ia = dpnp.ndarray((2, 2), buffer=ia, offset=1)
+        a = ia.asnumpy()
+
+        result = dpnp.linalg.trace(ia, offset=offset)
+        expected = numpy.linalg.trace(a, offset=offset)
+        assert_equal(result, expected)
+
 
 @pytest.mark.parametrize(
     "func, args",
diff --git a/dpnp/tests/test_memory.py b/dpnp/tests/test_memory.py
index ce9c7e60f030..1bc0da8c1535 100644
--- a/dpnp/tests/test_memory.py
+++ b/dpnp/tests/test_memory.py
@@ -31,3 +31,14 @@ def test_ndarray_from_data(self):
         a = dpnp.empty(5)
         b = dpnp.ndarray(a.shape, buffer=a.data)
         assert b.data.ptr == a.data.ptr
+
+    def test_view_non_zero_offset(self):
+        n, m = 2, 8
+        plane = n * m
+
+        a = dpnp.empty(4 * plane)
+        sl = a[plane:]  # non-zero offset view
+
+        pl = dpnp.ndarray((n, m), dtype=a.dtype, buffer=sl)
+        assert pl.data.ptr == sl.data.ptr
+        assert a.data.ptr != sl.data.ptr

From 54f3005e4dc7e75726f97017abb5df4cef875be5 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sat, 17 Jan 2026 14:34:20 +0100
Subject: [PATCH 03/16] Weekly pre-commit autoupdate (#2724)

This PR updates the `.pre-commit-config.yaml` using `pre-commit
autoupdate`.
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 83eae28e76b3..6be33c740497 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -118,7 +118,7 @@ repos:
             ]
         files: '^dpnp/(dpnp_iface.*|fft|linalg|scipy|dpnp_array)'
 -   repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
-    rev: v2.15.0
+    rev: v2.16.0
     hooks:
     -   id: pretty-format-toml
         args: [--autofix]
@@ -127,7 +127,7 @@ repos:
     hooks:
     -   id: actionlint
 -   repo: https://github.com/BlankSpruce/gersemi
-    rev: 0.25.0
+    rev: 0.25.1
     hooks:
     -   id: gersemi
         exclude: "dpnp/backend/cmake/Modules/"

From 4dc779e2c115a0d8a61d093ce9bed21958010fa1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sat, 17 Jan 2026 16:06:34 +0100
Subject: [PATCH 04/16] Bump styfle/cancel-workflow-action from 0.12.1 to
 0.13.0 (#2725)

Bumps
[styfle/cancel-workflow-action](https://github.com/styfle/cancel-workflow-action)
from 0.12.1 to 0.13.0.
---
 .github/workflows/build-sphinx.yml       | 2 +-
 .github/workflows/check-onemath.yaml     | 2 +-
 .github/workflows/conda-package.yml      | 2 +-
 .github/workflows/cron-run-tests.yaml    | 2 +-
 .github/workflows/generate_coverage.yaml | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml
index a80a6669ffdd..926d10859af4 100644
--- a/.github/workflows/build-sphinx.yml
+++ b/.github/workflows/build-sphinx.yml
@@ -47,7 +47,7 @@ jobs:
 
     steps:
       - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1
+        uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0
         with:
           access_token: ${{ github.token }}
 
diff --git a/.github/workflows/check-onemath.yaml b/.github/workflows/check-onemath.yaml
index c6b12a4454f3..80bcccc1b5ab 100644
--- a/.github/workflows/check-onemath.yaml
+++ b/.github/workflows/check-onemath.yaml
@@ -34,7 +34,7 @@ jobs:
 
     steps:
       - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1
+        uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0
         with:
           access_token: ${{ github.token }}
 
diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index d195b51ae80b..e56c36d58782 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -49,7 +49,7 @@ jobs:
 
     steps:
       - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1
+        uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0
         with:
           access_token: ${{ github.token }}
 
diff --git a/.github/workflows/cron-run-tests.yaml b/.github/workflows/cron-run-tests.yaml
index ff3fe6780700..4db380c73775 100644
--- a/.github/workflows/cron-run-tests.yaml
+++ b/.github/workflows/cron-run-tests.yaml
@@ -43,7 +43,7 @@ jobs:
 
     steps:
       - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1
+        uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0
         with:
           access_token: ${{ github.token }}
 
diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index 734f006906f2..8149b63150d7 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -33,7 +33,7 @@ jobs:
 
     steps:
       - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1
+        uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0
         with:
           access_token: ${{ github.token }}
 

From 96bb408c72bcf28a28a54e96a5c6a4833ec29fbb Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sat, 17 Jan 2026 17:38:48 +0100
Subject: [PATCH 05/16] Bump github/codeql-action from 4.31.9 to 4.31.10
 (#2726)

Bumps [github/codeql-action](https://github.com/github/codeql-action)
from 4.31.9 to 4.31.10.
---
 .github/workflows/openssf-scorecard.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml
index 62e31b249cbf..7fa391d342d0 100644
--- a/.github/workflows/openssf-scorecard.yml
+++ b/.github/workflows/openssf-scorecard.yml
@@ -72,6 +72,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9
+        uses: github/codeql-action/upload-sarif@cdefb33c0f6224e58673d9004f47f7cb3e328b89 # v4.31.10
         with:
           sarif_file: results.sarif

From 7b73f740e93d032d60f18b9fd0edb21bf72812dd Mon Sep 17 00:00:00 2001
From: vlad-perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 19 Jan 2026 17:23:04 +0100
Subject: [PATCH 06/16] Refactor `dpnp.fix()` to reuse `dpnp.trunc` internally
 (#2722)

This PR rewrites `dpnp.fix` to reuse `dpnp.trunc` internally removing
the backend implementation of `dpnp.fix`
---
 CHANGELOG.md                                  |   1 +
 dpnp/backend/extensions/ufunc/CMakeLists.txt  |   1 -
 .../ufunc/elementwise_functions/common.cpp    |   2 -
 .../ufunc/elementwise_functions/fix.cpp       | 131 ------------------
 .../ufunc/elementwise_functions/fix.hpp       |  38 -----
 .../kernels/elementwise_functions/fix.hpp     |  52 -------
 dpnp/dpnp_algo/dpnp_elementwise_common.py     |  50 -------
 dpnp/dpnp_iface_mathematical.py               |  10 +-
 8 files changed, 7 insertions(+), 278 deletions(-)
 delete mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/fix.cpp
 delete mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/fix.hpp
 delete mode 100644 dpnp/backend/kernels/elementwise_functions/fix.hpp

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 96649123cfad..e00afb6b1c65 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -42,6 +42,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Unified `dpnp` public API exports by consolidating function exports in `__init__.py` and removing wildcard imports [#2665](https://github.com/IntelPython/dpnp/pull/2665) [#2666](https://github.com/IntelPython/dpnp/pull/2666)
 * Updated tests to reflect the new scalar conversion rules for non-0D `usm_ndarray` [#2694](https://github.com/IntelPython/dpnp/pull/2694)
 * Compile indexing extension with `-fno-sycl-id-queries-fit-in-int` to support huge arrays [#2721](https://github.com/IntelPython/dpnp/pull/2721)
+* Updated `dpnp.fix` to reuse `dpnp.trunc` internally [#2722](https://github.com/IntelPython/dpnp/pull/2722)
 
 ### Deprecated
 
diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt
index 5609522f58a4..b24d5d131cfe 100644
--- a/dpnp/backend/extensions/ufunc/CMakeLists.txt
+++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt
@@ -34,7 +34,6 @@ set(_elementwise_sources
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/divmod.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/erf_funcs.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fabs.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fix.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/float_power.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmax.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmin.cpp
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
index df409464a5c2..9254e87e52c4 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
@@ -33,7 +33,6 @@
 #include "divmod.hpp"
 #include "erf_funcs.hpp"
 #include "fabs.hpp"
-#include "fix.hpp"
 #include "float_power.hpp"
 #include "fmax.hpp"
 #include "fmin.hpp"
@@ -67,7 +66,6 @@ void init_elementwise_functions(py::module_ m)
     init_divmod(m);
     init_erf_funcs(m);
     init_fabs(m);
-    init_fix(m);
     init_float_power(m);
     init_fmax(m);
     init_fmin(m);
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fix.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fix.cpp
deleted file mode 100644
index 6b21245489fc..000000000000
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/fix.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-//*****************************************************************************
-// Copyright (c) 2024, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-// - Neither the name of the copyright holder nor the names of its contributors
-//   may be used to endorse or promote products derived from this software
-//   without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-// THE POSSIBILITY OF SUCH DAMAGE.
-//*****************************************************************************
-
-#include <type_traits>
-#include <vector>
-
-#include <sycl/sycl.hpp>
-
-#include "dpctl4pybind11.hpp"
-
-#include "fix.hpp"
-#include "kernels/elementwise_functions/fix.hpp"
-#include "populate.hpp"
-
-// include a local copy of elementwise common header from dpctl tensor:
-// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp
-// TODO: replace by including dpctl header once available
-#include "../../elementwise_functions/elementwise_functions.hpp"
-
-// dpctl tensor headers
-#include "kernels/elementwise_functions/common.hpp"
-#include "utils/type_dispatch.hpp"
-
-namespace dpnp::extensions::ufunc
-{
-namespace py = pybind11;
-namespace py_int = dpnp::extensions::py_internal;
-
-namespace impl
-{
-namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
-namespace td_ns = dpctl::tensor::type_dispatch;
-
-/**
- * @brief A factory to define pairs of supported types for which
- * sycl::fix<T> function is available.
- *
- * @tparam T Type of input vector `a` and of result vector `y`.
- */
-template <typename T>
-struct OutputType
-{
-    using value_type =
-        typename std::disjunction<td_ns::TypeMapResultEntry<T, sycl::half>,
-                                  td_ns::TypeMapResultEntry<T, float>,
-                                  td_ns::TypeMapResultEntry<T, double>,
-                                  td_ns::DefaultResultEntry<void>>::result_type;
-};
-
-using dpnp::kernels::fix::FixFunctor;
-
-template <typename argT,
-          typename resT = argT,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2,
-          bool enable_sg_loadstore = true>
-using ContigFunctor = ew_cmn_ns::UnaryContigFunctor<argT,
-                                                    resT,
-                                                    FixFunctor<argT, resT>,
-                                                    vec_sz,
-                                                    n_vecs,
-                                                    enable_sg_loadstore>;
-
-template <typename argTy, typename resTy, typename IndexerT>
-using StridedFunctor = ew_cmn_ns::
-    UnaryStridedFunctor<argTy, resTy, IndexerT, FixFunctor<argTy, resTy>>;
-
-using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
-using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
-
-static unary_contig_impl_fn_ptr_t fix_contig_dispatch_vector[td_ns::num_types];
-static int fix_output_typeid_vector[td_ns::num_types];
-static unary_strided_impl_fn_ptr_t
-    fix_strided_dispatch_vector[td_ns::num_types];
-
-MACRO_POPULATE_DISPATCH_VECTORS(fix);
-} // namespace impl
-
-void init_fix(py::module_ m)
-{
-    using arrayT = dpctl::tensor::usm_ndarray;
-    using event_vecT = std::vector<sycl::event>;
-    {
-        impl::populate_fix_dispatch_vectors();
-        using impl::fix_contig_dispatch_vector;
-        using impl::fix_output_typeid_vector;
-        using impl::fix_strided_dispatch_vector;
-
-        auto fix_pyapi = [&](const arrayT &src, const arrayT &dst,
-                             sycl::queue &exec_q,
-                             const event_vecT &depends = {}) {
-            return py_int::py_unary_ufunc(
-                src, dst, exec_q, depends, fix_output_typeid_vector,
-                fix_contig_dispatch_vector, fix_strided_dispatch_vector);
-        };
-        m.def("_fix", fix_pyapi, "", py::arg("src"), py::arg("dst"),
-              py::arg("sycl_queue"), py::arg("depends") = py::list());
-
-        auto fix_result_type_pyapi = [&](const py::dtype &dtype) {
-            return py_int::py_unary_ufunc_result_type(dtype,
-                                                      fix_output_typeid_vector);
-        };
-        m.def("_fix_result_type", fix_result_type_pyapi);
-    }
-}
-} // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fix.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fix.hpp
deleted file mode 100644
index f9fed62cf818..000000000000
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/fix.hpp
+++ /dev/null
@@ -1,38 +0,0 @@
-//*****************************************************************************
-// Copyright (c) 2024, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-// - Neither the name of the copyright holder nor the names of its contributors
-//   may be used to endorse or promote products derived from this software
-//   without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-// THE POSSIBILITY OF SUCH DAMAGE.
-//*****************************************************************************
-
-#pragma once
-
-#include <pybind11/pybind11.h>
-
-namespace py = pybind11;
-
-namespace dpnp::extensions::ufunc
-{
-void init_fix(py::module_ m);
-} // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/kernels/elementwise_functions/fix.hpp b/dpnp/backend/kernels/elementwise_functions/fix.hpp
deleted file mode 100644
index f53bfc17e566..000000000000
--- a/dpnp/backend/kernels/elementwise_functions/fix.hpp
+++ /dev/null
@@ -1,52 +0,0 @@
-//*****************************************************************************
-// Copyright (c) 2024, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-// - Neither the name of the copyright holder nor the names of its contributors
-//   may be used to endorse or promote products derived from this software
-//   without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-// THE POSSIBILITY OF SUCH DAMAGE.
-//*****************************************************************************
-
-#pragma once
-
-#include <sycl/sycl.hpp>
-
-namespace dpnp::kernels::fix
-{
-template <typename argT, typename resT>
-struct FixFunctor
-{
-    // is function constant for given argT
-    using is_constant = typename std::false_type;
-    // constant value, if constant
-    // constexpr resT constant_value = resT{};
-    // is function defined for sycl::vec
-    using supports_vec = typename std::false_type;
-    // do both argT and resT support subgroup store/load operation
-    using supports_sg_loadstore = typename std::true_type;
-
-    resT operator()(const argT &x) const
-    {
-        return (x >= 0.0) ? sycl::floor(x) : sycl::ceil(x);
-    }
-};
-} // namespace dpnp::kernels::fix
diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
index 60a55acd1f45..c404d71dfbc6 100644
--- a/dpnp/dpnp_algo/dpnp_elementwise_common.py
+++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -60,7 +60,6 @@
     "DPNPBinaryFunc",
     "DPNPBinaryFuncOutKw",
     "DPNPBinaryTwoOutputsFunc",
-    "DPNPFix",
     "DPNPImag",
     "DPNPReal",
     "DPNPRound",
@@ -1188,55 +1187,6 @@ def __call__(self, x, /, deg=False, *, out=None, order="K"):
         return res
 
 
-class DPNPFix(DPNPUnaryFunc):
-    """Class that implements dpnp.fix unary element-wise functions."""
-
-    def __init__(
-        self,
-        name,
-        result_type_resolver_fn,
-        unary_dp_impl_fn,
-        docs,
-    ):
-        super().__init__(
-            name,
-            result_type_resolver_fn,
-            unary_dp_impl_fn,
-            docs,
-        )
-
-    def __call__(self, x, /, out=None, *, order="K"):
-        if not dpnp.is_supported_array_type(x):
-            pass  # pass to raise error in main implementation
-        elif dpnp.issubdtype(x.dtype, dpnp.inexact):
-            pass  # for inexact types, pass to calculate in the backend
-        elif not (
-            out is None
-            or isinstance(out, tuple)
-            or dpnp.is_supported_array_type(out)
-        ):
-            pass  # pass to raise error in main implementation
-        elif not (
-            out is None or isinstance(out, tuple) or out.dtype == x.dtype
-        ):
-            # passing will raise an error but with incorrect needed dtype
-            raise ValueError(
-                f"Output array of type {x.dtype} is needed, got {out.dtype}"
-            )
-        else:
-            # for exact types, return the input
-            out = self._unpack_out_kw(out)
-            if out is None:
-                return dpnp.copy(x, order=order)
-
-            if isinstance(out, dpt.usm_ndarray):
-                out = dpnp_array._create_from_usm_ndarray(out)
-            out[...] = x
-            return out
-
-        return super().__call__(x, out=out, order=order)
-
-
 class DPNPI0(DPNPUnaryFunc):
     """Class that implements dpnp.i0 unary element-wise functions."""
 
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 3dc5ccb82884..63aee599d9b9 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -66,7 +66,6 @@
     DPNPBinaryFunc,
     DPNPBinaryFuncOutKw,
     DPNPBinaryTwoOutputsFunc,
-    DPNPFix,
     DPNPImag,
     DPNPReal,
     DPNPRound,
@@ -1867,11 +1866,14 @@ def ediff1d(ary, to_end=None, to_begin=None):
 
 """
 
-fix = DPNPFix(
+# reuse trunc backend implementation for fix
+fix = DPNPUnaryFunc(
     "fix",
-    ufi._fix_result_type,
-    ufi._fix,
+    ti._trunc_result_type,
+    ti._trunc,
     _FIX_DOCSTRING,
+    mkl_fn_to_call="_mkl_trunc_to_call",
+    mkl_impl_fn="_trunc",
 )
 
 

From 0ecd18eee5a1b95ff5e28199aa835b751be98baf Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Mon, 19 Jan 2026 19:10:41 +0100
Subject: [PATCH 07/16] Remove use of `python setup.py develop/install` (#2716)

A warning that `python setup.py develop` is deprecated and will no
longer be supported at the end of October, 2025 has been ongoing for
some time when using dpnp build driver scripts, and can be seen in the
CI.

The PR proposes instead relying on `pip` for installing dpnp in scripts,
and reworks the scripts to maintain use of `scikit-build`.
In the future, this will also simplify a transition to
`scikit-build-core`.

This PR also introduces options
* `--clean` and `--skip-editable` to `build_locally` driver
* `--clean` and `--skip-pytest` to `gen_coverage` driver

Also it includes update of the documentation with new build instruction.
---
 CHANGELOG.md              |   1 +
 CMakeLists.txt            |  44 ++++-
 doc/0.builddoc.sh         |  11 --
 doc/quick_start_guide.rst |   6 +-
 scripts/_build_helper.py  | 249 ++++++++++++++++++++++++
 scripts/build_locally.py  | 383 +++++++++++++++----------------------
 scripts/gen_coverage.py   | 385 +++++++++++++++++++++-----------------
 7 files changed, 658 insertions(+), 421 deletions(-)
 delete mode 100755 doc/0.builddoc.sh
 create mode 100644 scripts/_build_helper.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e00afb6b1c65..0c9d2c973763 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -43,6 +43,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Updated tests to reflect the new scalar conversion rules for non-0D `usm_ndarray` [#2694](https://github.com/IntelPython/dpnp/pull/2694)
 * Compile indexing extension with `-fno-sycl-id-queries-fit-in-int` to support huge arrays [#2721](https://github.com/IntelPython/dpnp/pull/2721)
 * Updated `dpnp.fix` to reuse `dpnp.trunc` internally [#2722](https://github.com/IntelPython/dpnp/pull/2722)
+* Changed the build scripts and documentation due to `python setup.py develop` deprecation notice [#2716](https://github.com/IntelPython/dpnp/pull/2716)
 
 ### Deprecated
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 66f5c776b52d..9d676232f08e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -107,7 +107,7 @@ set(DPNP_TARGET_CUDA
 Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), \
 or to a specific architecture like sm_80."
 )
-set(HIP_TARGETS "" CACHE STRING "HIP architecture for target")
+set(DPNP_TARGET_HIP "" CACHE STRING "HIP architecture for target")
 
 set(_dpnp_sycl_targets)
 set(_use_onemath OFF)
@@ -135,18 +135,28 @@ if("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
         set(_use_onemath_cuda ON)
     endif()
 
-    if(HIP_TARGETS)
-        if(HIP_TARGETS MATCHES "^gfx")
+    if(DPNP_TARGET_HIP)
+        if(DPNP_TARGET_HIP MATCHES "^gfx")
             if("x${_dpnp_sycl_targets}" STREQUAL "x")
-                set(_dpnp_sycl_targets "amd_gpu_${HIP_TARGETS},spir64-unknown-unknown")
+                set(_dpnp_sycl_targets
+                    "amd_gpu_${DPNP_TARGET_HIP},spir64-unknown-unknown"
+                )
             else()
-                set(_dpnp_sycl_targets "amd_gpu_${HIP_TARGETS},${_dpnp_sycl_targets}")
+                set(_dpnp_sycl_targets
+                    "amd_gpu_${DPNP_TARGET_HIP},${_dpnp_sycl_targets}"
+                )
             endif()
+
             set(_use_onemath_hip ON)
+            set(HIP_TARGETS
+                ${DPNP_TARGET_HIP}
+                CACHE STRING
+                "HIP GPU targets for oneMath"
+            )
         else()
             message(
                 FATAL_ERROR
-                "Invalid value for HIP_TARGETS: \"${HIP_TARGETS}\". "
+                "Invalid value for DPNP_TARGET_HIP: \"${DPNP_TARGET_HIP}\". "
                 "Expected an architecture name starting with 'gfx', e.g. 'gfx1030'."
             )
         endif()
@@ -159,11 +169,15 @@ else()
     endif()
 
     if("${DPNP_SYCL_TARGETS}" MATCHES "amd_gpu_")
-        set(_use_onemath_hip ON)
-
-        if("x${HIP_TARGETS}" STREQUAL "x")
-            message(FATAL_ERROR "HIP_TARGETS must be specified when using HIP backend")
+        if("x${DPNP_TARGET_HIP}" STREQUAL "x")
+            message(
+                FATAL_ERROR
+                "DPNP_TARGET_HIP must be specified when using HIP backend"
+            )
         endif()
+
+        set(_use_onemath_hip ON)
+        set(HIP_TARGETS ${DPNP_TARGET_HIP} CACHE STRING "HIP GPU targets for oneMath")
     endif()
 
     if("${DPNP_SYCL_TARGETS}" MATCHES "amdgcn-amd-amdhsa")
@@ -295,6 +309,16 @@ else()
     message(FATAL_ERROR "Unsupported system.")
 endif()
 
+# Define flags for CMAKE_BUILD_TYPE=Coverage
+set(CMAKE_C_FLAGS_COVERAGE "${CMAKE_C_FLAGS_DEBUG} -O1 -g1 -DDEBUG")
+set(CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG} -O1 -g1 -DDEBUG")
+set(CMAKE_MODULE_LINKER_FLAGS_COVERAGE "${CMAKE_MODULE_LINKER_FLAGS_DEBUG}")
+mark_as_advanced(
+    CMAKE_C_FLAGS_COVERAGE
+    CMAKE_CXX_FLAGS_COVERAGE
+    CMAKE_MODULE_LINKER_FLAGS_COVERAGE
+)
+
 if(DPNP_GENERATE_COVERAGE)
     string(
         CONCAT PROFILE_FLAGS
diff --git a/doc/0.builddoc.sh b/doc/0.builddoc.sh
deleted file mode 100755
index f10b4a5cc22d..000000000000
--- a/doc/0.builddoc.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-BUILDDOCDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-ROOTDIR=$BUILDDOCDIR/..
-
-cd "$ROOTDIR" || exit 1
-python setup.py develop
-
-cd "$BUILDDOCDIR" || exit 2
-make clean
-make html
diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst
index 92c506c0fd81..6226a655c333 100644
--- a/doc/quick_start_guide.rst
+++ b/doc/quick_start_guide.rst
@@ -112,13 +112,15 @@ To build and install the package on Linux OS, run:
 
 .. code-block:: bash
 
-    python setup.py install -- -G Ninja -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx
+    python setup.py build_ext --inplace -- -G Ninja -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx
+    python -m pip install -e .
 
 To build and install the package on Windows OS, run:
 
 .. code-block:: bash
 
-    python setup.py install -- -G Ninja -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx
+    python setup.py build_ext --inplace -- -G Ninja -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx
+    python -m pip install -e .
 
 Alternatively, to develop on Linux OS, you can use the driver script:
 
diff --git a/scripts/_build_helper.py b/scripts/_build_helper.py
new file mode 100644
index 000000000000..f9e303454848
--- /dev/null
+++ b/scripts/_build_helper.py
@@ -0,0 +1,249 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+#   may be used to endorse or promote products derived from this software
+#   without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+import os
+import shutil
+import subprocess
+import sys
+import warnings
+
+
+def get_dpctl_cmake_dir():
+    """
+    If dpctl is locally built using `script/build_locally.py`, it is needed
+    to pass the -DDpctl_ROOT=$(python -m dpctl --cmakedir) during the build.
+    If dpctl is conda installed, it is optional to pass this parameter.
+
+    """
+
+    process = subprocess.Popen(
+        [sys.executable, "-m", "dpctl", "--cmakedir"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    output, error = process.communicate()
+    if process.returncode == 0:
+        return output.decode("utf-8").strip()
+
+    raise RuntimeError(
+        "Failed to retrieve dpctl cmake directory: "
+        + error.decode("utf-8").strip()
+    )
+
+
+def resolve_compilers(
+    oneapi: bool,
+    c_compiler: str,
+    cxx_compiler: str,
+    compiler_root: str,
+):
+    is_linux = "linux" in sys.platform
+
+    if oneapi or (
+        c_compiler is None and cxx_compiler is None and compiler_root is None
+    ):
+        return "icx", ("icpx" if is_linux else "icx")
+
+    if (
+        (c_compiler is None or not os.path.isabs(c_compiler))
+        and (cxx_compiler is None or not os.path.isabs(cxx_compiler))
+        and (not compiler_root or not os.path.exists(compiler_root))
+    ):
+        raise RuntimeError(
+            "--compiler-root option must be set when using non-default DPC++ "
+            "layout unless absolute paths are provided for both compilers"
+        )
+
+    # default values
+    if c_compiler is None:
+        c_compiler = "icx"
+    if cxx_compiler is None:
+        cxx_compiler = "icpx" if is_linux else "icx"
+
+    compiler_paths = []
+    for name, opt_name in (
+        (c_compiler, "--c-compiler"),
+        (cxx_compiler, "--cxx-compiler"),
+    ):
+        if os.path.isabs(name):
+            path = name
+        else:
+            path = os.path.join(compiler_root, name)
+
+        if not os.path.exists(path):
+            raise RuntimeError(
+                f"{opt_name} value {name} not found and {path} not exist"
+            )
+
+        compiler_paths.append(path)
+    return tuple(compiler_paths)
+
+
+def resolve_onemath(
+    onemath: bool,
+    onemath_dir: str,
+    target_cuda: str = None,
+    target_hip: str = None,
+    onemkl_interfaces: bool = False,
+    onemkl_interfaces_dir: str = None,
+):
+    # always enable build with oneMath i/f when oneMath path is passed
+    if onemath_dir:
+        onemath = True
+
+    # always enable build with oneMath i/f for CUDA or HIP target
+    if target_cuda or target_hip:
+        onemath = True
+
+    # TODO: onemkl_interfaces and onemkl_interfaces_dir are deprecated in
+    # dpnp-0.19.0 and should be removed in dpnp-0.20.0.
+    if onemkl_interfaces:
+        warnings.warn(
+            "Using 'onemkl_interfaces' is deprecated. Please use 'onemath' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        onemath = True
+    if onemkl_interfaces_dir is not None:
+        warnings.warn(
+            "Using 'onemkl_interfaces_dir' is deprecated. Please use 'onemath_dir' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        onemath_dir = onemkl_interfaces_dir
+    return onemath, onemath_dir
+
+
+def run(cmd: list[str], env: dict[str, str] = None, cwd: str = None):
+    print("+", " ".join(cmd))
+    subprocess.check_call(
+        cmd, env=env or os.environ.copy(), cwd=cwd or os.getcwd()
+    )
+
+
+def capture_cmd_output(cmd: list[str], cwd: str = None):
+    print("+", " ".join(cmd))
+    return (
+        subprocess.check_output(cmd, cwd=cwd or os.getcwd())
+        .decode("utf-8")
+        .strip("\n")
+    )
+
+
+def err(msg: str, script: str):
+    raise RuntimeError(f"[{script}] error: {msg}")
+
+
+def log_cmake_args(cmake_args: list[str], script: str):
+    print(f"[{script}] Using CMake args:\n{' '.join(cmake_args)}")
+
+
+def make_cmake_args(
+    c_compiler: str = None,
+    cxx_compiler: str = None,
+    dpctl_cmake_dir: str = None,
+    onemath: bool = False,
+    onemath_dir: str = None,
+    verbose: bool = False,
+    other_opts: str = None,
+):
+    args = [
+        f"-DCMAKE_C_COMPILER:PATH={c_compiler}" if c_compiler else "",
+        f"-DCMAKE_CXX_COMPILER:PATH={cxx_compiler}" if cxx_compiler else "",
+        f"-DDpctl_ROOT={dpctl_cmake_dir}" if dpctl_cmake_dir else "",
+    ]
+
+    if onemath:
+        args.append("-DDPNP_USE_ONEMATH=ON")
+        if onemath_dir:
+            args.append(f"-DDPNP_ONEMATH_DIR={onemath_dir}")
+
+    if verbose:
+        args.append("-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON")
+    if other_opts:
+        args.extend(other_opts.split())
+
+    return args
+
+
+def build_extension(
+    setup_dir: str,
+    env: dict[str, str],
+    cmake_args: list[str],
+    cmake_executable: str = None,
+    generator: str = None,
+    build_type: str = None,
+):
+    cmd = [sys.executable, "setup.py", "build_ext", "--inplace"]
+    if cmake_executable:
+        cmd.append(f"--cmake-executable={cmake_executable}")
+    if generator:
+        cmd.append(f"--generator={generator}")
+    if build_type:
+        cmd.append(f"--build-type={build_type}")
+    if cmake_args:
+        cmd.append("--")
+        cmd += cmake_args
+    run(
+        cmd,
+        env=env,
+        cwd=setup_dir,
+    )
+
+
+def install_editable(setup_dir: str, env: dict[str, str]):
+    run(
+        [
+            sys.executable,
+            "-m",
+            "pip",
+            "install",
+            "-e",
+            ".",
+            "--no-build-isolation",
+        ],
+        env=env,
+        cwd=setup_dir,
+    )
+
+
+def clean_build_dir(setup_dir: str):
+    if (
+        not isinstance(setup_dir, str)
+        or not setup_dir
+        or not os.path.isdir(setup_dir)
+    ):
+        raise RuntimeError(f"Invalid setup directory provided: '{setup_dir}'")
+    target = os.path.join(setup_dir, "_skbuild")
+    if os.path.exists(target):
+        print(f"Cleaning build directory: {target}")
+        try:
+            shutil.rmtree(target)
+        except Exception as e:
+            print(f"Failed to remove build directory: '{target}'")
+            raise e
diff --git a/scripts/build_locally.py b/scripts/build_locally.py
index 1197de9d9455..e17fc8e478fe 100644
--- a/scripts/build_locally.py
+++ b/scripts/build_locally.py
@@ -26,290 +26,211 @@
 # THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
 
+import argparse
 import os
-import subprocess
 import sys
-import warnings
 
-warnings.simplefilter("default", DeprecationWarning)
-
-
-def run(
-    use_oneapi=True,
-    build_type="Release",
-    c_compiler=None,
-    cxx_compiler=None,
-    compiler_root=None,
-    cmake_executable=None,
-    verbose=False,
-    cmake_opts="",
-    target_cuda=None,
-    target_hip=None,
-    onemkl_interfaces=False,
-    onemkl_interfaces_dir=None,
-    onemath=False,
-    onemath_dir=None,
-):
-    build_system = None
-
-    if "linux" in sys.platform:
-        build_system = "Ninja"
-    elif sys.platform in ["win32", "cygwin"]:
-        build_system = "Ninja"
-    else:
-        raise AssertionError(sys.platform + " not supported")
-
-    setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    cmake_args = [
-        sys.executable,
-        "setup.py",
-        "develop",
-    ]
-    if cmake_executable:
-        cmake_args += [
-            "--cmake-executable=" + cmake_executable,
-        ]
-
-    # if dpctl is locally built using `script/build_locally.py`, it is needed
-    # to pass the -DDpctl_ROOT=$(python -m dpctl --cmakedir)
-    # if dpctl is conda installed, it is optional to pass this parameter
-    process = subprocess.Popen(
-        ["python", "-m", "dpctl", "--cmakedir"],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-    )
-    output, error = process.communicate()
-    if process.returncode == 0:
-        cmake_dir = output.decode("utf-8").strip()
-    else:
-        raise RuntimeError(
-            "Failed to retrieve dpctl cmake directory: "
-            + error.decode("utf-8").strip()
-        )
-
-    cmake_args += [
-        "--build-type=" + build_type,
-        "--generator=" + build_system,
-        "--",
-        "-DCMAKE_C_COMPILER:PATH=" + c_compiler,
-        "-DCMAKE_CXX_COMPILER:PATH=" + cxx_compiler,
-        "-DDpctl_ROOT=" + cmake_dir,
-    ]
-    if verbose:
-        cmake_args += [
-            "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON",
-        ]
-    if cmake_opts:
-        cmake_args += cmake_opts.split()
-    if use_oneapi:
-        if "DPL_ROOT" in os.environ:
-            os.environ["DPL_ROOT_HINT"] = os.environ["DPL_ROOT"]
-
-    # TODO: onemkl_interfaces and onemkl_interfaces_dir are deprecated in
-    # dpnp-0.19.0 and should be removed in dpnp-0.20.0.
-    if onemkl_interfaces:
-        warnings.warn(
-            "Using 'onemkl_interfaces' is deprecated. Please use 'onemath' instead.",
-            DeprecationWarning,
-            stacklevel=1,
-        )
-        onemath = True
-    if onemkl_interfaces_dir is not None:
-        warnings.warn(
-            "Using 'onemkl_interfaces_dir' is deprecated. Please use 'onemath_dir' instead.",
-            DeprecationWarning,
-            stacklevel=1,
-        )
-        onemath_dir = onemkl_interfaces_dir
-
-    if target_cuda is not None:
-        if not target_cuda.strip():
-            raise ValueError(
-                "--target-cuda can not be an empty string. "
-                "Use --target-cuda=<arch> or --target-cuda"
-            )
-        cmake_args += [
-            f"-DDPNP_TARGET_CUDA={target_cuda}",
-        ]
-        # Always builds using oneMath for the cuda target
-        onemath = True
-
-    if target_hip is not None:
-        if not target_hip.strip():
-            raise ValueError(
-                "--target-hip requires an architecture (e.g., gfx90a)"
-            )
-        cmake_args += [
-            f"-DHIP_TARGETS={target_hip}",
-        ]
-        # Always builds using oneMath for the hip target
-        onemath = True
-
-    if onemath:
-        cmake_args += [
-            "-DDPNP_USE_ONEMATH=ON",
-        ]
-
-        if onemath_dir:
-            cmake_args += [
-                f"-DDPNP_ONEMATH_DIR={onemath_dir}",
-            ]
-    elif onemath_dir:
-        raise RuntimeError("--onemath-dir option is not supported")
-
-    subprocess.check_call(
-        cmake_args, shell=False, cwd=setup_dir, env=os.environ
+from _build_helper import (
+    build_extension,
+    clean_build_dir,
+    err,
+    get_dpctl_cmake_dir,
+    install_editable,
+    log_cmake_args,
+    make_cmake_args,
+    resolve_compilers,
+    resolve_onemath,
+)
+
+
+def parse_args():
+    p = argparse.ArgumentParser(description="Local dpnp build driver")
+
+    # compiler and oneAPI relating options
+    p.add_argument(
+        "--c-compiler",
+        type=str,
+        default=None,
+        help="Path or name of C compiler",
     )
-
-
-if __name__ == "__main__":
-    import argparse
-
-    parser = argparse.ArgumentParser(
-        description="Driver to build dpnp for in-place installation"
+    p.add_argument(
+        "--cxx-compiler",
+        type=str,
+        default=None,
+        help="Path or name of C++ compiler",
     )
-    driver = parser.add_argument_group(title="Coverage driver arguments")
-    driver.add_argument("--c-compiler", help="Name of C compiler", default=None)
-    driver.add_argument(
-        "--cxx-compiler", help="Name of C++ compiler", default=None
+    p.add_argument(
+        "--compiler-root",
+        type=str,
+        default=None,
+        help="Path to compiler installation root",
     )
-    driver.add_argument(
+    p.add_argument(
         "--oneapi",
-        help="Set if using one-API installation",
         dest="oneapi",
         action="store_true",
+        help="Use default oneAPI compiler layout",
     )
-    driver.add_argument(
-        "--debug",
-        default="Release",
-        const="Debug",
-        action="store_const",
-        help="Set the compilation mode to debugging",
+
+    # CMake relating options
+    p.add_argument(
+        "--generator", type=str, default="Ninja", help="CMake generator"
     )
-    driver.add_argument(
-        "--compiler-root",
+    p.add_argument(
+        "--cmake-executable",
         type=str,
-        help="Path to compiler home directory",
         default=None,
+        help="Path to CMake executable used by build",
     )
-    driver.add_argument(
-        "--cmake-executable",
+    p.add_argument(
+        "--cmake-opts",
         type=str,
-        help="Path to cmake executable",
-        default=None,
+        default="",
+        help="Additional options to pass directly to CMake",
     )
-    driver.add_argument(
+    p.add_argument(
+        "--debug",
+        dest="build_type",
+        const="Debug",
+        action="store_const",
+        default="Release",
+        help="Set build type to Debug (defaults to Release)",
+    )
+    p.add_argument(
         "--verbose",
-        help="Build using vebose makefile mode",
         dest="verbose",
         action="store_true",
+        help="Enable verbose makefile output",
     )
-    driver.add_argument(
-        "--cmake-opts",
-        help="Channels through additional cmake options",
-        dest="cmake_opts",
-        default="",
-        type=str,
-    )
-    driver.add_argument(
+
+    # platform target relating options
+    p.add_argument(
         "--target-cuda",
         nargs="?",
         const="ON",
-        help="Enable CUDA target for build; "
-        "optionally specify architecture (e.g., --target-cuda=sm_80)",
         default=None,
-        type=str,
+        help="Enable CUDA build. Architecture is optional to specify (e.g., --target-cuda=sm_80).",
     )
-    driver.add_argument(
+    p.add_argument(
         "--target-hip",
         required=False,
-        help="Enable HIP target for build. "
-        "Must specify HIP architecture (e.g., --target-hip=gfx90a)",
         type=str,
+        help="Enable HIP backend. Architecture required to be specified  (e.g., --target-hip=gfx90a).",
     )
-    driver.add_argument(
+
+    # oneMath relating options
+    p.add_argument(
         "--onemkl_interfaces",
         help="(DEPRECATED) Build using oneMath",
         dest="onemkl_interfaces",
         action="store_true",
     )
-    driver.add_argument(
+    p.add_argument(
         "--onemkl_interfaces_dir",
         help="(DEPRECATED) Local directory with source of oneMath",
         dest="onemkl_interfaces_dir",
         default=None,
         type=str,
     )
-    driver.add_argument(
+    p.add_argument(
         "--onemath",
         help="Build using oneMath",
         dest="onemath",
         action="store_true",
     )
-    driver.add_argument(
+    p.add_argument(
         "--onemath-dir",
         help="Local directory with source of oneMath",
         dest="onemath_dir",
         default=None,
         type=str,
     )
-    args = parser.parse_args()
 
-    args_to_validate = [
-        "c_compiler",
-        "cxx_compiler",
-        "compiler_root",
-    ]
+    # build relating options
+    p.add_argument(
+        "--clean",
+        action="store_true",
+        help="Remove build dir before rebuild",
+    )
+    p.add_argument(
+        "--skip-editable",
+        action="store_true",
+        help="Skip pip editable install step",
+    )
 
-    if args.oneapi or (
-        args.c_compiler is None
-        and args.cxx_compiler is None
-        and args.compiler_root is None
-    ):
-        args.c_compiler = "icx"
-        args.cxx_compiler = "icpx" if "linux" in sys.platform else "icx"
-        args.compiler_root = None
-    else:
-        cr = args.compiler_root
-        if isinstance(cr, str) and os.path.exists(cr):
-            if args.c_compiler is None:
-                args.c_compiler = "icx"
-            if args.cxx_compiler is None:
-                args.cxx_compiler = "icpx" if "linux" in sys.platform else "icx"
-        else:
-            raise RuntimeError(
-                "Option 'compiler-root' must be provided when "
-                "using non-default DPC++ layout."
-            )
-        args_to_validate = [
-            "c_compiler",
-            "cxx_compiler",
-        ]
-        for p in args_to_validate:
-            arg = getattr(args, p)
-            assert isinstance(arg, str)
-            if not os.path.exists(arg):
-                arg2 = os.path.join(cr, arg)
-                if os.path.exists(arg2):
-                    arg = arg2
-                    setattr(args, p, arg)
-            if not os.path.exists(arg):
-                opt_name = p.replace("_", "-")
-                raise RuntimeError(f"Option {opt_name} value {arg} must exist.")
+    return p.parse_args()
 
-    run(
-        use_oneapi=args.oneapi,
-        build_type=args.debug,
-        c_compiler=args.c_compiler,
-        cxx_compiler=args.cxx_compiler,
-        compiler_root=args.compiler_root,
-        cmake_executable=args.cmake_executable,
+
+def main():
+    if sys.platform not in ["cygwin", "win32", "linux"]:
+        err(f"{sys.platform} not supported", "build_locally")
+
+    args = parse_args()
+    setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+    c_compiler, cxx_compiler = resolve_compilers(
+        args.oneapi, args.c_compiler, args.cxx_compiler, args.compiler_root
+    )
+
+    dpctl_cmake_dir = get_dpctl_cmake_dir()
+    print(f"[build_locally] Found DPCTL CMake dir: {dpctl_cmake_dir}")
+
+    onemath, onemath_dir = resolve_onemath(
+        args.onemath,
+        args.onemath_dir,
+        args.target_cuda,
+        args.target_hip,
+        args.onemkl_interfaces,
+        args.onemkl_interfaces_dir,
+    )
+
+    # clean build dir if --clean set
+    if args.clean:
+        clean_build_dir(setup_dir)
+
+    cmake_args = make_cmake_args(
+        c_compiler=c_compiler,
+        cxx_compiler=cxx_compiler,
+        dpctl_cmake_dir=dpctl_cmake_dir,
+        onemath=onemath,
+        onemath_dir=onemath_dir,
         verbose=args.verbose,
-        cmake_opts=args.cmake_opts,
-        target_cuda=args.target_cuda,
-        target_hip=args.target_hip,
-        onemkl_interfaces=args.onemkl_interfaces,
-        onemkl_interfaces_dir=args.onemkl_interfaces_dir,
-        onemath=args.onemath,
-        onemath_dir=args.onemath_dir,
+        other_opts=args.cmake_opts,
     )
+
+    # handle architecture conflicts
+    if args.target_hip is not None and not args.target_hip.strip():
+        err("--target-hip requires an explicit architecture", "build_locally")
+
+    # CUDA/HIP targets
+    if args.target_cuda:
+        cmake_args += [f"-DDPNP_TARGET_CUDA={args.target_cuda}"]
+    if args.target_hip:
+        cmake_args += [f"-DDPNP_TARGET_HIP={args.target_hip}"]
+
+    log_cmake_args(cmake_args, "build_locally")
+
+    print("[build_locally] Building extensions in-place...")
+
+    env = os.environ.copy()
+    if args.oneapi and "DPL_ROOT" in env:
+        env["DPL_ROOT_HINT"] = env["DPL_ROOT"]
+
+    build_extension(
+        setup_dir,
+        env,
+        cmake_args,
+        cmake_executable=args.cmake_executable,
+        generator=args.generator,
+        build_type=args.build_type,
+    )
+    if not args.skip_editable:
+        install_editable(setup_dir, env)
+    else:
+        print("[build_locally] Skipping editable install (--skip-editable)")
+
+    print("[build_locally] Build complete")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py
index a624bd570e2d..588345d91b2e 100644
--- a/scripts/gen_coverage.py
+++ b/scripts/gen_coverage.py
@@ -26,67 +26,185 @@
 # THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
 
+import argparse
 import os
 import subprocess
 import sys
 
+from _build_helper import (
+    build_extension,
+    capture_cmd_output,
+    clean_build_dir,
+    err,
+    get_dpctl_cmake_dir,
+    install_editable,
+    log_cmake_args,
+    make_cmake_args,
+    resolve_compilers,
+    run,
+)
 
-def run(
-    use_oneapi=True,
-    c_compiler=None,
-    cxx_compiler=None,
-    compiler_root=None,
-    bin_llvm=None,
-    pytest_opts="",
-    verbose=False,
-):
-    IS_LIN = False
-
-    if "linux" in sys.platform:
-        IS_LIN = True
-    elif sys.platform in ["win32", "cygwin"]:
-        pass
+
+def parse_args():
+    p = argparse.ArgumentParser(description="Build dpnp and generate coverage")
+
+    # compiler and oneAPI relating options
+    p.add_argument(
+        "--c-compiler", default=None, help="Path or name of C compiler"
+    )
+    p.add_argument(
+        "--cxx-compiler", default=None, help="Path or name of C++ compiler"
+    )
+    p.add_argument(
+        "--compiler-root",
+        type=str,
+        default=None,
+        help="Path to compiler installation root",
+    )
+    p.add_argument(
+        "--oneapi",
+        dest="oneapi",
+        action="store_true",
+        help="Use default oneAPI compiler layout",
+    )
+    p.add_argument(
+        "--bin-llvm",
+        type=str,
+        default=None,
+        help="Path to folder where llvm-cov/llvm-profdata can be found",
+    )
+
+    # CMake relating options
+    p.add_argument(
+        "--generator", type=str, default="Ninja", help="CMake generator"
+    )
+    p.add_argument(
+        "--cmake-executable",
+        type=str,
+        default=None,
+        help="Path to CMake executable used by build",
+    )
+
+    p.add_argument(
+        "--cmake-opts",
+        type=str,
+        default="",
+        help="Additional options to pass directly to CMake",
+    )
+    p.add_argument(
+        "--verbose",
+        dest="verbose",
+        action="store_true",
+        help="Enable verbose makefile output",
+    )
+
+    # test relating options
+    p.add_argument(
+        "--skip-pytest",
+        dest="run_pytest",
+        action="store_false",
+        help="Skip running pytest and coverage generation",
+    )
+    p.add_argument(
+        "--pytest-opts",
+        help="Channels through additional pytest options",
+        dest="pytest_opts",
+        default="",
+        type=str,
+    )
+
+    # build relating options
+    p.add_argument(
+        "--clean",
+        action="store_true",
+        help="Remove build dir before rebuild (default: False)",
+    )
+
+    return p.parse_args()
+
+
+def find_bin_llvm(compiler):
+    if os.path.isabs(compiler):
+        bin_dir = os.path.dirname(compiler)
     else:
-        raise AssertionError(sys.platform + " not supported")
+        compiler_path = capture_cmd_output(["which", compiler])
+        if not compiler_path:
+            raise RuntimeError(f"Compiler {compiler} not found in PATH")
+        bin_dir = os.path.dirname(compiler_path)
 
-    if not IS_LIN:
+    compiler_dir = os.path.join(bin_dir, "compiler")
+    if os.path.exists(compiler_dir):
+        bin_llvm = compiler_dir
+    else:
+        bin_dir = os.path.dirname(bin_dir)
+        bin_llvm = os.path.join(bin_dir, "bin-llvm")
+
+    if not os.path.exists(bin_llvm):
         raise RuntimeError(
-            "This scripts only supports coverage collection on Linux"
+            f"Path to folder with llvm-cov/llvm-profdata={bin_llvm} "
+            "seems to not exist"
         )
+    return bin_llvm
+
 
+def main():
+    is_linux = "linux" in sys.platform
+    if not is_linux:
+        err(f"{sys.platform} not supported", "gen_coverage")
+
+    args = parse_args()
     setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
-    cmake_args = [
-        sys.executable,
-        "setup.py",
-        "develop",
-        "--generator=Ninja",
-        "--",
-        "-DCMAKE_C_COMPILER:PATH=" + c_compiler,
-        "-DCMAKE_CXX_COMPILER:PATH=" + cxx_compiler,
-        "-DDPNP_GENERATE_COVERAGE=ON",
-    ]
-
-    env = {}
-    if bin_llvm:
-        env = {
-            "PATH": ":".join((os.environ.get("PATH", ""), bin_llvm)),
-            "LLVM_TOOLS_HOME": bin_llvm,
-        }
+    c_compiler, cxx_compiler = resolve_compilers(
+        args.oneapi,
+        args.c_compiler,
+        args.cxx_compiler,
+        args.compiler_root,
+    )
 
-    # extend with global environment variables
-    env.update({k: v for k, v in os.environ.items() if k != "PATH"})
+    dpctl_cmake_dir = get_dpctl_cmake_dir()
+    print(f"[gen_coverage] Found DPCTL CMake dir: {dpctl_cmake_dir}")
 
-    if verbose:
-        cmake_args += [
-            "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON",
-        ]
+    if args.clean:
+        clean_build_dir(setup_dir)
+
+    cmake_args = make_cmake_args(
+        c_compiler=c_compiler,
+        cxx_compiler=cxx_compiler,
+        dpctl_cmake_dir=dpctl_cmake_dir,
+        verbose=args.verbose,
+    )
+    cmake_args.append("-DDPNP_GENERATE_COVERAGE=ON")
+
+    env = os.environ.copy()
+
+    if args.bin_llvm:
+        bin_llvm = args.bin_llvm
+    else:
+        bin_llvm = find_bin_llvm(c_compiler)
+    print(
+        f"[gen_coverage] Path to folder with llvm-cov/llvm-profdata: {bin_llvm}"
+    )
+
+    if bin_llvm:
+        env["PATH"] = ":".join((env.get("PATH", ""), bin_llvm))
+        env["LLVM_TOOLS_HOME"] = bin_llvm
 
-    subprocess.check_call(cmake_args, shell=False, cwd=setup_dir, env=env)
+    log_cmake_args(cmake_args, "gen_coverage")
 
-    env["LLVM_PROFILE_FILE"] = "dpnp_pytest.profraw"
-    subprocess.check_call(
-        [
+    build_extension(
+        setup_dir,
+        env,
+        cmake_args,
+        cmake_executable=args.cmake_executable,
+        generator=args.generator,
+        build_type="Coverage",
+    )
+    install_editable(setup_dir, env)
+
+    if args.run_pytest:
+        env["LLVM_PROFILE_FILE"] = "dpnp_pytest.profraw"
+        pytest_cmd = [
             "pytest",
             "-q",
             "-ra",
@@ -99,133 +217,66 @@ def run(
             "--cov-report=lcov:coverage-python.lcov",
             "--pyargs",
             "dpnp",
-            *pytest_opts.split(),
-        ],
-        cwd=setup_dir,
-        shell=False,
-        env=env,
-    )
-
-    def find_objects():
-        objects = []
-        dpnp_path = os.getcwd()
-        search_path = os.path.join(dpnp_path, "dpnp")
-        for root, _, files in os.walk(search_path):
-            for file in files:
-                if (
-                    file.endswith("_c.so")
-                    or root.find("extensions") != -1
-                    and file.find("_impl.cpython") != -1
-                ):
-                    objects.extend(["-object", os.path.join(root, file)])
-        return objects
-
-    objects = find_objects()
-    instr_profile_fn = "dpnp_pytest.profdata"
-    # generate instrumentation profile data
-    subprocess.check_call(
-        [
-            os.path.join(bin_llvm, "llvm-profdata"),
-            "merge",
-            "-sparse",
-            env["LLVM_PROFILE_FILE"],
-            "-o",
-            instr_profile_fn,
+            *args.pytest_opts.split(),
         ]
-    )
+        run(pytest_cmd, env=env, cwd=setup_dir)
 
-    # export lcov
-    with open("coverage-cpp.lcov", "w") as fh:
-        subprocess.check_call(
+        def find_objects():
+            objects = []
+            dpnp_path = os.getcwd()
+            search_path = os.path.join(dpnp_path, "dpnp")
+            for root, _, files in os.walk(search_path):
+                for file in files:
+                    if (
+                        file.endswith("_c.so")
+                        or root.find("extensions") != -1
+                        and file.find("_impl.cpython") != -1
+                    ):
+                        objects.extend(["-object", os.path.join(root, file)])
+            return objects
+
+        objects = find_objects()
+        instr_profile_fn = "dpnp_pytest.profdata"
+
+        # generate instrumentation profile data
+        run(
             [
-                os.path.join(bin_llvm, "llvm-cov"),
-                "export",
-                "-format=lcov",
-                "-ignore-filename-regex=/tmp/icpx*",
-                r"-ignore-filename-regex=.*/backend/kernels/elementwise_functions/.*\.hpp$",
-                "-instr-profile=" + instr_profile_fn,
+                os.path.join(bin_llvm, "llvm-profdata"),
+                "merge",
+                "-sparse",
+                env["LLVM_PROFILE_FILE"],
+                "-o",
+                instr_profile_fn,
             ]
-            + objects
-            + ["-sources", "dpnp"],
-            stdout=fh,
         )
 
+        # export lcov
+        with open("coverage-cpp.lcov", "w") as fh:
+            subprocess.check_call(
+                [
+                    os.path.join(bin_llvm, "llvm-cov"),
+                    "export",
+                    "-format=lcov",
+                    "-ignore-filename-regex=/tmp/icpx*",
+                    r"-ignore-filename-regex=.*/backend/kernels/elementwise_functions/.*\.hpp$",
+                    "-instr-profile=" + instr_profile_fn,
+                ]
+                + objects
+                + ["-sources", "dpnp"],
+                cwd=setup_dir,
+                env=env,
+                stdout=fh,
+            )
 
-if __name__ == "__main__":
-    import argparse
-
-    parser = argparse.ArgumentParser(
-        description="Driver to build dpnp and generate coverage"
-    )
-    driver = parser.add_argument_group(title="Coverage driver arguments")
-    driver.add_argument("--c-compiler", help="Name of C compiler", default=None)
-    driver.add_argument(
-        "--cxx-compiler", help="Name of C++ compiler", default=None
-    )
-    driver.add_argument(
-        "--not-oneapi",
-        help="Is one-API installation",
-        dest="oneapi",
-        action="store_false",
-    )
-    driver.add_argument(
-        "--compiler-root", type=str, help="Path to compiler home directory"
-    )
-    driver.add_argument(
-        "--bin-llvm", help="Path to folder where llvm-cov can be found"
-    )
-    driver.add_argument(
-        "--pytest-opts",
-        help="Channels through additional pytest options",
-        dest="pytest_opts",
-        default="",
-        type=str,
-    )
-    driver.add_argument(
-        "--verbose",
-        help="Build using vebose makefile mode",
-        dest="verbose",
-        action="store_true",
-    )
-    args = parser.parse_args()
-
-    if args.oneapi:
-        args.c_compiler = "icx"
-        args.cxx_compiler = "icpx"
-        args.compiler_root = None
-        icx_path = subprocess.check_output(["which", "icx"])
-        bin_dir = os.path.dirname(icx_path)
-        compiler_dir = os.path.join(bin_dir.decode("utf-8"), "compiler")
-        if os.path.exists(compiler_dir):
-            args.bin_llvm = os.path.join(bin_dir.decode("utf-8"), "compiler")
-        else:
-            bin_dir = os.path.dirname(bin_dir)
-            args.bin_llvm = os.path.join(bin_dir.decode("utf-8"), "bin-llvm")
-        assert os.path.exists(args.bin_llvm)
+        print("[gen_coverage] Coverage export is completed")
     else:
-        args_to_validate = [
-            "c_compiler",
-            "cxx_compiler",
-            "compiler_root",
-            "bin_llvm",
-        ]
-        for p in args_to_validate:
-            arg = getattr(args, p, None)
-            if not isinstance(arg, str):
-                opt_name = p.replace("_", "-")
-                raise RuntimeError(
-                    f"Option {opt_name} must be provided is "
-                    "using non-default DPC++ layout"
-                )
-            if not os.path.exists(arg):
-                raise RuntimeError(f"Path {arg} must exist")
-
-    run(
-        use_oneapi=args.oneapi,
-        c_compiler=args.c_compiler,
-        cxx_compiler=args.cxx_compiler,
-        compiler_root=args.compiler_root,
-        bin_llvm=args.bin_llvm,
-        pytest_opts=args.pytest_opts,
-        verbose=args.verbose,
-    )
+        print(
+            "[gen_coverage] Skipping pytest and coverage collection "
+            "(--skip-pytest)"
+        )
+
+    print("[gen_coverage] Done")
+
+
+if __name__ == "__main__":
+    main()

From f0a397c35d0fe573f6968a87e437b8cfa98f596b Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 20 Jan 2026 12:29:25 +0100
Subject: [PATCH 08/16] Mute tests with `dpnp.cumlogsumexp` (#2728)

The PR proposes temporary disable tests with `dpnp.cumlogsumexp` when
`axis is None` and running on PTL GPU device.
The tests must be enabled back once the issue in internal CI is
resolved.
---
 dpnp/tests/test_mathematical.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index 77c65991e9cd..1418a5a7fed1 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -33,6 +33,7 @@
     has_support_aspect16,
     has_support_aspect64,
     is_intel_numpy,
+    is_ptl,
     numpy_version,
 )
 from .third_party.cupy import testing
@@ -217,6 +218,9 @@ def _get_exp_array(self, a, axis, dtype):
     @pytest.mark.parametrize("axis", [None, 2, -1])
     @pytest.mark.parametrize("include_initial", [True, False])
     def test_basic(self, dtype, axis, include_initial):
+        if axis is None and is_ptl():
+            pytest.skip("due to SAT-8336")
+
         a = dpnp.ones((3, 4, 5, 6, 7), dtype=dtype)
         res = dpnp.cumlogsumexp(a, axis=axis, include_initial=include_initial)
 
@@ -234,6 +238,9 @@ def test_basic(self, dtype, axis, include_initial):
     @pytest.mark.parametrize("axis", [None, 2, -1])
     @pytest.mark.parametrize("include_initial", [True, False])
     def test_include_initial(self, dtype, axis, include_initial):
+        if axis is None and is_ptl():
+            pytest.skip("due to SAT-8336")
+
         a = dpnp.ones((3, 4, 5, 6, 7), dtype=dtype)
 
         if dpnp.issubdtype(a, dpnp.float32):

From a97953e89dc93912c605f708767128904d0b7f2c Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 20 Jan 2026 15:25:49 +0100
Subject: [PATCH 09/16] Remove unused configuration from `pyproject.toml`
 (#2729)

The PR removes pytest configuration from `pyproject.toml` since it was
never used due to invalid name used `tool.pytest.ini.options` which
never supported by pytest.

The latest pytest warns about that explicitly:
> PytestConfigWarning: Unknown config option: ini

since the correct name is `tool.pytest.ini_options`.

The configuration might be returned back to `pyproject.toml` when really
needed.
---
 CHANGELOG.md   |  1 +
 pyproject.toml | 20 --------------------
 2 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0c9d2c973763..3507b51efc6f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -56,6 +56,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Dropped support for Python 3.9 [#2626](https://github.com/IntelPython/dpnp/pull/2626)
 * Removed the obsolete interface from DPNP to Numba JIT [#2647](https://github.com/IntelPython/dpnp/pull/2647)
 * Removed the `newshape` parameter from `dpnp.reshape`, which has been deprecated since dpnp 0.17.0. Pass it positionally or use `shape=` on newer versions [#2670](https://github.com/IntelPython/dpnp/pull/2670)
+* Removed unused `pytest` configuration from `pyproject.toml` [#2729](https://github.com/IntelPython/dpnp/pull/2729)
 
 ### Fixed
 
diff --git a/pyproject.toml b/pyproject.toml
index 6fbc23ac1b88..42e142f266e9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -163,26 +163,6 @@ allow-wildcard-with-all = true
 [tool.pylint.miscellaneous]
 notes = ["FIXME", "XXX"]
 
-[tool.pytest.ini.options]
-addopts = [
-  "--junitxml=junit.xml",
-  "--ignore setup.py",
-  "--ignore run_test.py",
-  "--cov-report term-missing",
-  "--tb native",
-  "--strict",
-  "--durations=20",
-  "-q -ra"
-]
-minversion = "6.0"
-norecursedirs = [
-  ".*",
-  "*.egg*",
-  "build",
-  "dist",
-  "conda-recipe"
-]
-
 [tool.versioneer]
 VCS = "git"
 parentdir_prefix = "dpnp-"

From a3ff897833eb5d61d845102fff10356942bdd815 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 20 Jan 2026 22:03:50 +0100
Subject: [PATCH 10/16] Extend third party tests (#2727)

The PR includes
* migration of some tests from unittest to pytest
* updates in FFT tests
* new tests for `random.choice`

This PR is intended to reflect alignment with the recent changes in
origin repo of third party tests.
---
 .../cupy/binary_tests/test_elementwise.py     |   4 +-
 .../cupy/binary_tests/test_packing.py         |   4 +-
 .../cupy/core_tests/test_array_function.py    |   4 +-
 .../third_party/cupy/core_tests/test_core.py  |   9 +-
 .../cupy/core_tests/test_internal.py          | 179 ++--
 .../third_party/cupy/fft_tests/test_cache.py  |  74 +-
 .../cupy/fft_tests/test_callback.py           | 965 +++++++++++++++---
 .../third_party/cupy/fft_tests/test_fft.py    | 143 ++-
 .../cupy/random_tests/test_generator.py       | 160 +++
 9 files changed, 1183 insertions(+), 359 deletions(-)

diff --git a/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py b/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py
index 431f1f27d8c7..95ec1d4374c8 100644
--- a/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py
+++ b/dpnp/tests/third_party/cupy/binary_tests/test_elementwise.py
@@ -1,11 +1,9 @@
 from __future__ import annotations
 
-import unittest
-
 from dpnp.tests.third_party.cupy import testing
 
 
-class TestElementwise(unittest.TestCase):
+class TestElementwise:
 
     @testing.for_int_dtypes()
     @testing.numpy_cupy_array_equal()
diff --git a/dpnp/tests/third_party/cupy/binary_tests/test_packing.py b/dpnp/tests/third_party/cupy/binary_tests/test_packing.py
index a72a8a558b08..518e74d98868 100644
--- a/dpnp/tests/third_party/cupy/binary_tests/test_packing.py
+++ b/dpnp/tests/third_party/cupy/binary_tests/test_packing.py
@@ -1,4 +1,4 @@
-import unittest
+from __future__ import annotations
 
 import numpy
 import pytest
@@ -11,7 +11,7 @@
 )
 
 
-class TestPacking(unittest.TestCase):
+class TestPacking:
 
     @testing.for_int_dtypes()
     @testing.numpy_cupy_array_equal()
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_array_function.py b/dpnp/tests/third_party/cupy/core_tests/test_array_function.py
index 7878a5a0aaff..f0c45900bcda 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_array_function.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_array_function.py
@@ -1,4 +1,4 @@
-import unittest
+from __future__ import annotations
 
 import numpy
 import pytest
@@ -11,7 +11,7 @@
 )
 
 
-class TestArrayFunction(unittest.TestCase):
+class TestArrayFunction:
 
     @testing.with_requires("numpy>=1.17.0")
     def test_array_function(self):
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_core.py b/dpnp/tests/third_party/cupy/core_tests/test_core.py
index c959b4f24954..d9f8196db562 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_core.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_core.py
@@ -1,20 +1,23 @@
 from __future__ import annotations
 
 import sys
-import unittest
 
 import numpy
 import pytest
 
 import dpnp as cupy
+
+# from cupy._core import core
 from dpnp.tests.third_party.cupy import testing
 from dpnp.tests.third_party.cupy.testing._protocol_helpers import (
     DummyObjectWithCudaArrayInterface,
     DummyObjectWithCuPyGetNDArray,
 )
 
+# from cupy_tests.core_tests import test_raw
+
 
-class TestSize(unittest.TestCase):
+class TestSize:
 
     # def tearDown(self):
     #     # Free huge memory for slow test
@@ -58,7 +61,7 @@ def test_size_huge(self, xp):
 
 
 @pytest.mark.skip("no cupy._core submodule")
-class TestOrder(unittest.TestCase):
+class TestOrder:
 
     @testing.for_orders(_orders.keys())
     def test_ndarray(self, order):
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_internal.py b/dpnp/tests/third_party/cupy/core_tests/test_internal.py
index 205661e80d75..2f4239a2e220 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_internal.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_internal.py
@@ -1,18 +1,16 @@
-import math
-import unittest
+from __future__ import annotations
 
 import numpy
 import pytest
 
 # from cupy._core import internal
-from dpnp.tests.third_party.cupy import testing
 
 pytest.skip(
     "CuPy internal functions are not supported", allow_module_level=True
 )
 
 
-class TestProd(unittest.TestCase):
+class TestProd:
 
     def test_empty(self):
         assert internal.prod([]) == 1
@@ -24,7 +22,7 @@ def test_two(self):
         assert internal.prod([2, 3]) == 6
 
 
-class TestProdSequence(unittest.TestCase):
+class TestProdSequence:
 
     def test_empty(self):
         assert internal.prod_sequence(()) == 1
@@ -74,7 +72,7 @@ def test_float(self):
         assert internal.get_size(1.0) == (1.0,)
 
 
-class TestVectorEqual(unittest.TestCase):
+class TestVectorEqual:
 
     def test_empty(self):
         assert internal.vector_equal([], []) is True
@@ -89,7 +87,7 @@ def test_different_size(self):
         assert internal.vector_equal([1, 2, 3], [1, 2]) is False
 
 
-class TestGetCContiguity(unittest.TestCase):
+class TestGetCContiguity:
 
     def test_zero_in_shape(self):
         assert internal.get_c_contiguity((1, 0, 1), (1, 1, 1), 3)
@@ -122,134 +120,101 @@ def test_no_contiguous3(self):
         assert not internal.get_c_contiguity((3, 1, 3), (6, 6, 4), 2)
 
 
-class TestInferUnknownDimension(unittest.TestCase):
+class TestInferUnknownDimension:
 
     def test_known_all(self):
         assert internal.infer_unknown_dimension((1, 2, 3), 6) == [1, 2, 3]
 
     def test_multiple_unknown(self):
-        with self.assertRaises(ValueError):
+        with pytest.raises(ValueError):
             internal.infer_unknown_dimension((-1, 1, -1), 10)
 
     def test_infer(self):
         assert internal.infer_unknown_dimension((-1, 2, 3), 12) == [2, 2, 3]
 
 
-@testing.parameterize(
-    {"slice": (2, 8, 1), "expect": (2, 8, 1)},
-    {"slice": (2, None, 1), "expect": (2, 10, 1)},
-    {"slice": (2, 1, 1), "expect": (2, 2, 1)},
-    {"slice": (2, -1, 1), "expect": (2, 9, 1)},
-    {"slice": (None, 8, 1), "expect": (0, 8, 1)},
-    {"slice": (-3, 8, 1), "expect": (7, 8, 1)},
-    {"slice": (11, 8, 1), "expect": (10, 10, 1)},
-    {"slice": (11, 11, 1), "expect": (10, 10, 1)},
-    {"slice": (-11, 8, 1), "expect": (0, 8, 1)},
-    {"slice": (-11, -11, 1), "expect": (0, 0, 1)},
-    {"slice": (8, 2, -1), "expect": (8, 2, -1)},
-    {"slice": (8, None, -1), "expect": (8, -1, -1)},
-    {"slice": (8, 9, -1), "expect": (8, 8, -1)},
-    {"slice": (8, -3, -1), "expect": (8, 7, -1)},
-    {"slice": (None, 8, -1), "expect": (9, 8, -1)},
-    {"slice": (-3, 6, -1), "expect": (7, 6, -1)},
-    {"slice": (10, 10, -1), "expect": (9, 9, -1)},
-    {"slice": (10, 8, -1), "expect": (9, 8, -1)},
-    {"slice": (9, 10, -1), "expect": (9, 9, -1)},
-    {"slice": (9, 9, -1), "expect": (9, 9, -1)},
-    {"slice": (9, 8, -1), "expect": (9, 8, -1)},
-    {"slice": (8, 8, -1), "expect": (8, 8, -1)},
-    {"slice": (-9, -8, -1), "expect": (1, 1, -1)},
-    {"slice": (-9, -9, -1), "expect": (1, 1, -1)},
-    {"slice": (-9, -10, -1), "expect": (1, 0, -1)},
-    {"slice": (-9, -11, -1), "expect": (1, -1, -1)},
-    {"slice": (-9, -12, -1), "expect": (1, -1, -1)},
-    {"slice": (-10, -9, -1), "expect": (0, 0, -1)},
-    {"slice": (-10, -10, -1), "expect": (0, 0, -1)},
-    {"slice": (-10, -11, -1), "expect": (0, -1, -1)},
-    {"slice": (-10, -12, -1), "expect": (0, -1, -1)},
-    {"slice": (-11, 8, -1), "expect": (-1, -1, -1)},
-    {"slice": (-11, -9, -1), "expect": (-1, -1, -1)},
-    {"slice": (-11, -10, -1), "expect": (-1, -1, -1)},
-    {"slice": (-11, -11, -1), "expect": (-1, -1, -1)},
-    {"slice": (-11, -12, -1), "expect": (-1, -1, -1)},
+@pytest.mark.parametrize(
+    ("slice_", "expect"),
+    [
+        ((2, 8, 1), (2, 8, 1)),
+        ((2, None, 1), (2, 10, 1)),
+        ((2, 1, 1), (2, 2, 1)),
+        ((2, -1, 1), (2, 9, 1)),
+        ((None, 8, 1), (0, 8, 1)),
+        ((-3, 8, 1), (7, 8, 1)),
+        ((11, 8, 1), (10, 10, 1)),
+        ((11, 11, 1), (10, 10, 1)),
+        ((-11, 8, 1), (0, 8, 1)),
+        ((-11, -11, 1), (0, 0, 1)),
+        ((8, 2, -1), (8, 2, -1)),
+        ((8, None, -1), (8, -1, -1)),
+        ((8, 9, -1), (8, 8, -1)),
+        ((8, -3, -1), (8, 7, -1)),
+        ((None, 8, -1), (9, 8, -1)),
+        ((-3, 6, -1), (7, 6, -1)),
+        ((10, 10, -1), (9, 9, -1)),
+        ((10, 8, -1), (9, 8, -1)),
+        ((9, 10, -1), (9, 9, -1)),
+        ((9, 9, -1), (9, 9, -1)),
+        ((9, 8, -1), (9, 8, -1)),
+        ((8, 8, -1), (8, 8, -1)),
+        ((-9, -8, -1), (1, 1, -1)),
+        ((-9, -9, -1), (1, 1, -1)),
+        ((-9, -10, -1), (1, 0, -1)),
+        ((-9, -11, -1), (1, -1, -1)),
+        ((-9, -12, -1), (1, -1, -1)),
+        ((-10, -9, -1), (0, 0, -1)),
+        ((-10, -10, -1), (0, 0, -1)),
+        ((-10, -11, -1), (0, -1, -1)),
+        ((-10, -12, -1), (0, -1, -1)),
+        ((-11, 8, -1), (-1, -1, -1)),
+        ((-11, -9, -1), (-1, -1, -1)),
+        ((-11, -10, -1), (-1, -1, -1)),
+        ((-11, -11, -1), (-1, -1, -1)),
+        ((-11, -12, -1), (-1, -1, -1)),
+    ],
 )
-class TestCompleteSlice(unittest.TestCase):
+def test_complete_slice(slice_, expect):
+    assert internal.complete_slice(slice(*slice_), 10) == slice(*expect)
 
-    def test_complete_slice(self):
-        assert internal.complete_slice(slice(*self.slice), 10) == slice(
-            *self.expect
-        )
 
-
-class TestCompleteSliceError(unittest.TestCase):
+class TestCompleteSliceError:
 
     def test_invalid_step_value(self):
-        with self.assertRaises(ValueError):
+        with pytest.raises(ValueError):
             internal.complete_slice(slice(1, 1, 0), 1)
 
     def test_invalid_step_type(self):
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             internal.complete_slice(slice(1, 1, (1, 2)), 1)
 
     def test_invalid_start_type(self):
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             internal.complete_slice(slice((1, 2), 1, 1), 1)
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             internal.complete_slice(slice((1, 2), 1, -1), 1)
 
     def test_invalid_stop_type(self):
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             internal.complete_slice(slice((1, 2), 1, 1), 1)
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             internal.complete_slice(slice((1, 2), 1, -1), 1)
 
 
-@testing.parameterize(
-    {"x": 0, "expect": 0},
-    {"x": 1, "expect": 1},
-    {"x": 2, "expect": 2},
-    {"x": 3, "expect": 4},
-    {"x": 2**10, "expect": 2**10},
-    {"x": 2**10 - 1, "expect": 2**10},
-    {"x": 2**10 + 1, "expect": 2**11},
-    {"x": 2**40, "expect": 2**40},
-    {"x": 2**40 - 1, "expect": 2**40},
-    {"x": 2**40 + 1, "expect": 2**41},
+@pytest.mark.parametrize(
+    ("x", "expect"),
+    [
+        (0, 0),
+        (1, 1),
+        (2, 2),
+        (3, 4),
+        (2**10, 2**10),
+        (2**10 - 1, 2**10),
+        (2**10 + 1, 2**11),
+        (2**40, 2**40),
+        (2**40 - 1, 2**40),
+        (2**40 + 1, 2**41),
+    ],
 )
-class TestClp2(unittest.TestCase):
-
-    def test_clp2(self):
-        assert internal.clp2(self.x) == self.expect
-
-
-@testing.parameterize(
-    *testing.product(
-        {
-            "value": [
-                0.0,
-                1.0,
-                -1.0,
-                0.25,
-                -0.25,
-                11.0,
-                -11.0,
-                2**-15,
-                -(2**-15),  # Denormalized Number
-                float("inf"),
-                float("-inf"),
-            ],
-        }
-    )
-)
-class TestConvertFloat16(unittest.TestCase):
-
-    def test_conversion(self):
-        half = internal.to_float16(self.value)
-        assert internal.from_float16(half) == self.value
-
-
-class TestConvertFloat16Nan(unittest.TestCase):
-
-    def test_conversion(self):
-        half = internal.to_float16(float("nan"))
-        assert math.isnan(internal.from_float16(half))
+def test_clp2(x, expect):
+    assert internal.clp2(x) == expect
diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_cache.py b/dpnp/tests/third_party/cupy/fft_tests/test_cache.py
index e32f8e8305a0..29e0a7724edf 100644
--- a/dpnp/tests/third_party/cupy/fft_tests/test_cache.py
+++ b/dpnp/tests/third_party/cupy/fft_tests/test_cache.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import contextlib
 import io
 import queue
@@ -14,7 +16,7 @@
 # from cupy.cuda import runtime
 # from cupy.fft import config
 
-# from .test_fft import (multi_gpu_config, _skip_multi_gpu_bug)
+# from .test_fft import multi_gpu_config
 
 pytest.skip("FFT cache functions are not supported", allow_module_level=True)
 
@@ -30,26 +32,29 @@ def intercept_stdout(func):
 
 
 class TestPlanCache(unittest.TestCase):
-    def setUp(self):
-        self.caches = []
-        self.old_sizes = []
+    @contextlib.contextmanager
+    @staticmethod
+    def prepare_and_restore_caches():
+        old_sizes = []
         for i in range(n_devices):
             with device.Device(i):
                 cache = config.get_plan_cache()
-                self.old_sizes.append(cache.get_size())
+                old_sizes.append(cache.get_size())
                 cache.clear()
                 cache.set_memsize(-1)
                 cache.set_size(2)
-            self.caches.append(cache)
 
-    def tearDown(self):
-        for i in range(n_devices):
-            with device.Device(i):
-                cache = config.get_plan_cache()
-                cache.clear()
-                cache.set_size(self.old_sizes[i])
-                cache.set_memsize(-1)
+        try:
+            yield
+        finally:
+            for i in range(n_devices):
+                with device.Device(i):
+                    cache = config.get_plan_cache()
+                    cache.clear()
+                    cache.set_size(old_sizes[i])
+                    cache.set_memsize(-1)
 
+    @prepare_and_restore_caches()
     def test_LRU_cache1(self):
         # test if insertion and clean-up works
         cache = config.get_plan_cache()
@@ -62,6 +67,7 @@ def test_LRU_cache1(self):
         cache.clear()
         assert cache.get_curr_size() == 0 <= cache.get_size()
 
+    @prepare_and_restore_caches()
     def test_LRU_cache2(self):
         # test if plan is reused
         cache = config.get_plan_cache()
@@ -83,6 +89,7 @@ def test_LRU_cache2(self):
         # we should get the same plan
         assert plan0 is plan1
 
+    @prepare_and_restore_caches()
     def test_LRU_cache3(self):
         # test if cache size is limited
         cache = config.get_plan_cache()
@@ -108,6 +115,7 @@ def test_LRU_cache3(self):
         for _, node in cache:
             assert plan is not node.plan
 
+    @prepare_and_restore_caches()
     def test_LRU_cache4(self):
         # test if fetching the plan will reorder it to the top
         cache = config.get_plan_cache()
@@ -149,6 +157,8 @@ def test_LRU_cache4(self):
             cache[next(iterator)[0]]
 
     @testing.multi_gpu(2)
+    @prepare_and_restore_caches()
+    @pytest.mark.thread_unsafe(reason="intercepts stdout")
     def test_LRU_cache5(self):
         # test if the LRU cache is thread-local
 
@@ -210,10 +220,13 @@ def thread_init_caches(gpus, queue):
         assert stdout.count("uninitialized") == n_devices - 2
 
     @testing.multi_gpu(2)
-    def test_LRU_cache6(self):
+    @prepare_and_restore_caches()
+    def test_LRU_cache6(self, gpus=None):
         # test if each device has a separate cache
-        cache0 = self.caches[0]
-        cache1 = self.caches[1]
+        with device.Device(0):
+            cache0 = config.get_plan_cache()
+        with device.Device(1):
+            cache1 = config.get_plan_cache()
 
         # ensure a fresh state
         assert cache0.get_curr_size() == 0 <= cache0.get_size()
@@ -247,10 +260,13 @@ def test_LRU_cache6(self):
     @pytest.mark.skipif(
         runtime.is_hip, reason="hipFFT doesn't support multi-GPU"
     )
-    def test_LRU_cache7(self):
+    @prepare_and_restore_caches()
+    def test_LRU_cache7(self, gpus=None):
         # test accessing a multi-GPU plan
-        cache0 = self.caches[0]
-        cache1 = self.caches[1]
+        with device.Device(0):
+            cache0 = config.get_plan_cache()
+        with device.Device(1):
+            cache1 = config.get_plan_cache()
 
         # ensure a fresh state
         assert cache0.get_curr_size() == 0 <= cache0.get_size()
@@ -319,6 +335,7 @@ def test_LRU_cache7(self):
         assert cache0.get_curr_size() == 1 <= cache0.get_size()
         assert cache1.get_curr_size() == 2 <= cache1.get_size()
 
+    @prepare_and_restore_caches()
     def test_LRU_cache8(self):
         # test if Plan1d and PlanNd can coexist in the same cache
         cache = config.get_plan_cache()
@@ -340,6 +357,7 @@ def test_LRU_cache8(self):
         assert isinstance(next(iterator)[1].plan, cufft.PlanNd)
         assert isinstance(next(iterator)[1].plan, cufft.Plan1d)
 
+    @prepare_and_restore_caches()
     def test_LRU_cache9(self):
         # test if memsizes in the cache adds up
         cache = config.get_plan_cache()
@@ -358,6 +376,8 @@ def test_LRU_cache9(self):
 
         assert memsize == cache.get_curr_memsize()
 
+    @prepare_and_restore_caches()
+    @pytest.mark.thread_unsafe(reason="intercepts stdout")
     def test_LRU_cache10(self):
         # test if deletion works and if show_info() is consistent with data
         cache = config.get_plan_cache()
@@ -406,11 +426,13 @@ def test_LRU_cache10(self):
     @pytest.mark.skipif(
         runtime.is_hip, reason="hipFFT doesn't support multi-GPU"
     )
+    @prepare_and_restore_caches()
     def test_LRU_cache11(self):
         # test if collectively deleting a multi-GPU plan works
-        _skip_multi_gpu_bug((128,), self.gpus)
-        cache0 = self.caches[0]
-        cache1 = self.caches[1]
+        with device.Device(0):
+            cache0 = config.get_plan_cache()
+        with device.Device(1):
+            cache1 = config.get_plan_cache()
 
         # ensure a fresh state
         assert cache0.get_curr_size() == 0 <= cache0.get_size()
@@ -441,11 +463,14 @@ def test_LRU_cache11(self):
     @pytest.mark.skipif(
         runtime.is_hip, reason="hipFFT doesn't support multi-GPU"
     )
+    @prepare_and_restore_caches()
     def test_LRU_cache12(self):
         # test if an error is raise when one of the caches is unable
         # to fit it a multi-GPU plan
-        cache0 = self.caches[0]
-        cache1 = self.caches[1]
+        with device.Device(0):
+            cache0 = config.get_plan_cache()
+        with device.Device(1):
+            cache1 = config.get_plan_cache()
 
         # ensure a fresh state
         assert cache0.get_curr_size() == 0 <= cache0.get_size()
@@ -467,6 +492,7 @@ def test_LRU_cache12(self):
         runtime.runtimeGetVersion() >= 11080,
         "CUDA 11.8 has different plan size",
     )
+    @prepare_and_restore_caches()
     def test_LRU_cache13(self):
         # test if plan insertion respect the memory size limit
         cache = config.get_plan_cache()
diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_callback.py b/dpnp/tests/third_party/cupy/fft_tests/test_callback.py
index 12bfd4aa73df..e2682d0aaa6e 100644
--- a/dpnp/tests/third_party/cupy/fft_tests/test_callback.py
+++ b/dpnp/tests/third_party/cupy/fft_tests/test_callback.py
@@ -1,18 +1,62 @@
+from __future__ import annotations
+
 import contextlib
+import os
 import string
 import sys
 import tempfile
 from unittest import mock
 
+try:
+    import Cython
+except ImportError:
+    Cython = None
+else:
+    if Cython.__version__ < "0.29.0":
+        Cython = None
 import numpy as np
 import pytest
 
 import dpnp as cupy
 from dpnp.tests.third_party.cupy import testing
 
+# from cupy.cuda import cufft
+# from cupy.cuda.device import get_compute_capability
+
 pytest.skip("FFT callbacks are not supported", allow_module_level=True)
 
 
+def cuda_version():
+    return cupy.cuda.runtime.runtimeGetVersion()
+
+
+cb_ver_for_test = ("legacy", "jit")
+
+
+def check_should_skip_legacy_test():
+    if not sys.platform.startswith("linux"):
+        pytest.skip("legacy callbacks are only supported on Linux")
+    if Cython is None:
+        pytest.skip("no working Cython")
+    if "LD_PRELOAD" in os.environ:
+        pytest.skip(
+            "legacy callback does not work if libcufft.so " "is preloaded"
+        )
+    if cufft.getVersion() >= 12000 and get_compute_capability() == "75":
+        pytest.skip(
+            "cuFFT legacy callbacks in CUDA 13.0+ do not support " "cc 7.5"
+        )
+    if cufft.getVersion() == 11303 and get_compute_capability() == "120":
+        pytest.skip(
+            "cuFFT legacy callbacks in CUDA 12.8.0 do not support " "cc 12.0"
+        )
+
+
+def check_should_skip_jit_test():
+    if cufft.getVersion() < 11303:
+        pytest.skip("JIT callbacks require cuFFT from CUDA 12.8+")
+
+
 @contextlib.contextmanager
 def use_temporary_cache_dir():
     target = "cupy.fft._callback.get_cache_dir"
@@ -21,45 +65,50 @@ def use_temporary_cache_dir():
             yield path
 
 
+suppress_legacy_warning = pytest.mark.filterwarnings(
+    "ignore:.*legacy callback.*:DeprecationWarning"
+)
+
+
 _load_callback = r"""
-__device__ ${data_type} CB_ConvertInput(
-    void* dataIn, size_t offset, void* callerInfo, void* sharedPtr)
+__device__ ${data_type} ${cb_name}(
+    void* dataIn, ${offset_type} offset, void* callerInfo, void* sharedPtr)
 {
     ${data_type} x = ((${data_type}*)dataIn)[offset];
     ${element} *= 2.5;
     return x;
 }
 
-__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput;
+__device__ ${load_type} d_loadCallbackPtr = ${cb_name};
 """
 
 _load_callback_with_aux = r"""
-__device__ ${data_type} CB_ConvertInput(
-    void* dataIn, size_t offset, void* callerInfo, void* sharedPtr)
+__device__ ${data_type} ${cb_name}(
+    void* dataIn, ${offset_type} offset, void* callerInfo, void* sharedPtr)
 {
     ${data_type} x = ((${data_type}*)dataIn)[offset];
     ${element} *= *((${aux_type}*)callerInfo);
     return x;
 }
 
-__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput;
+__device__ ${load_type} d_loadCallbackPtr = ${cb_name};
 """
 
 _load_callback_with_aux2 = r"""
-__device__ ${data_type} CB_ConvertInput(
-    void* dataIn, size_t offset, void* callerInfo, void* sharedPtr)
+__device__ ${data_type} ${cb_name}(
+    void* dataIn, ${offset_type} offset, void* callerInfo, void* sharedPtr)
 {
     ${data_type} x = ((${data_type}*)dataIn)[offset];
     ${element} *= ((${aux_type}*)callerInfo)[offset];
     return x;
 }
 
-__device__ ${load_type} d_loadCallbackPtr = CB_ConvertInput;
+__device__ ${load_type} d_loadCallbackPtr = ${cb_name};
 """
 
 _store_callback = r"""
-__device__ void CB_ConvertOutput(
-    void *dataOut, size_t offset, ${data_type} element,
+__device__ void ${cb_name}(
+    void *dataOut, ${offset_type} offset, ${data_type} element,
     void *callerInfo, void *sharedPointer)
 {
     ${data_type} x = element;
@@ -67,12 +116,12 @@ def use_temporary_cache_dir():
     ((${data_type}*)dataOut)[offset] = x;
 }
 
-__device__ ${store_type} d_storeCallbackPtr = CB_ConvertOutput;
+__device__ ${store_type} d_storeCallbackPtr = ${cb_name};
 """
 
 _store_callback_with_aux = r"""
-__device__ void CB_ConvertOutput(
-    void *dataOut, size_t offset, ${data_type} element,
+__device__ void ${cb_name}(
+    void *dataOut, ${offset_type} offset, ${data_type} element,
     void *callerInfo, void *sharedPointer)
 {
     ${data_type} x = element;
@@ -80,26 +129,209 @@ def use_temporary_cache_dir():
     ((${data_type}*)dataOut)[offset] = x;
 }
 
-__device__ ${store_type} d_storeCallbackPtr = CB_ConvertOutput;
+__device__ ${store_type} d_storeCallbackPtr = ${cb_name};
 """
 
 
-def _set_load_cb(code, element, data_type, callback_type, aux_type=None):
-    return string.Template(code).substitute(
+def _set_load_cb(
+    code,
+    element,
+    data_type,
+    callback_type,
+    callback_name,
+    aux_type=None,
+    cb_ver="",
+):
+    if cb_ver == "jit":
+        callback_type = callback_type.replace(
+            "cufftCallback", "cufftJITCallback"
+        )
+    callback = string.Template(code).substitute(
         data_type=data_type,
         aux_type=aux_type,
         load_type=callback_type,
+        cb_name=callback_name,
         element=element,
+        offset_type=("size_t" if cb_ver == "legacy" else "unsigned long long"),
     )
-
-
-def _set_store_cb(code, element, data_type, callback_type, aux_type=None):
-    return string.Template(code).substitute(
+    if cb_ver == "jit":
+        callback = "#include <cufftXt.h>\n\n" + callback
+    return callback
+
+
+def _set_store_cb(
+    code,
+    element,
+    data_type,
+    callback_type,
+    callback_name,
+    aux_type=None,
+    cb_ver="",
+):
+    if cb_ver == "jit":
+        callback_type = callback_type.replace(
+            "cufftCallback", "cufftJITCallback"
+        )
+    callback = string.Template(code).substitute(
         data_type=data_type,
         aux_type=aux_type,
         store_type=callback_type,
+        cb_name=callback_name,
         element=element,
+        offset_type=("size_t" if cb_ver == "legacy" else "unsigned long long"),
     )
+    if cb_ver == "jit":
+        callback = "#include <cufftXt.h>\n\n" + callback
+    return callback
+
+
+# Note: this class is place here instead of at the end of this file, because
+# pytest does not reset warnings internally, and other tests would suppress
+# the warnings such that at the end we have no warnings to capture, but we want
+# to ensure warnings are raised.
+@pytest.mark.skipif(
+    cupy.cuda.runtime.is_hip, reason="hipFFT does not support callbacks"
+)
+class TestInputValidationWith1dCallbacks:
+
+    shape = (10,)
+    norm = "ortho"
+    dtype = np.complex64
+
+    @classmethod
+    def setup_class(cls):
+        # All tests in this class use a temporary cache dir (also if threaded)
+        with use_temporary_cache_dir():
+            yield
+
+    def test_fft_load_legacy(self):
+        check_should_skip_legacy_test()
+
+        fft = cupy.fft.fft
+        code = _load_callback
+        types = (
+            "x.x",
+            "cufftComplex",
+            "cufftCallbackLoadC",
+            "cufftJITCallbackLoadComplex",
+        )
+        cb_load = _set_load_cb(code, *types, cb_ver="legacy")
+
+        a = testing.shaped_random(self.shape, cupy, self.dtype)
+        with pytest.deprecated_call(
+            match="legacy callback is considered deprecated"
+        ):
+            with cupy.fft.config.set_cufft_callbacks(
+                cb_load=cb_load, cb_ver="legacy"
+            ):
+                fft(a, norm=self.norm)
+
+    def test_fft_load_jit_no_name(self):
+        check_should_skip_jit_test()
+
+        fft = cupy.fft.fft
+        code = _load_callback
+        types = (
+            "x.x",
+            "cufftComplex",
+            "cufftCallbackLoadC",
+            "cufftJITCallbackLoadComplex",
+        )
+        cb_load = _set_load_cb(code, *types, cb_ver="jit")
+
+        a = testing.shaped_random(self.shape, cupy, self.dtype)
+        # We omit passing cb_load_name. The test infra setup would check
+        # if we can infer it correctly.
+        with cupy.fft.config.set_cufft_callbacks(cb_load=cb_load, cb_ver="jit"):
+            fft(a, norm=self.norm)
+
+    def test_fft_store_legacy(self):
+        check_should_skip_legacy_test()
+
+        fft = cupy.fft.fft
+        code = _store_callback
+        types = (
+            "x.y",
+            "cufftComplex",
+            "cufftCallbackStoreC",
+            "cufftJITCallbackStoreComplex",
+        )
+        cb_store = _set_store_cb(code, *types, cb_ver="legacy")
+
+        a = testing.shaped_random(self.shape, cupy, self.dtype)
+        with pytest.deprecated_call(
+            match="legacy callback is considered deprecated"
+        ):
+            with cupy.fft.config.set_cufft_callbacks(
+                cb_store=cb_store, cb_ver="legacy"
+            ):
+                fft(a, norm=self.norm)
+
+    def test_fft_store_jit_no_name(self):
+        check_should_skip_jit_test()
+
+        fft = cupy.fft.fft
+        code = _store_callback
+        types = (
+            "x.y",
+            "cufftComplex",
+            "cufftCallbackStoreC",
+            "cufftJITCallbackStoreComplex",
+        )
+        cb_store = _set_store_cb(code, *types, cb_ver="jit")
+
+        a = testing.shaped_random(self.shape, cupy, self.dtype)
+        # We omit passing cb_store_name. The test infra setup would check
+        # if we can infer it correctly.
+        with cupy.fft.config.set_cufft_callbacks(
+            cb_store=cb_store, cb_ver="jit"
+        ):
+            fft(a, norm=self.norm)
+
+    def test_fft_load_store_legacy_aux(self):
+        check_should_skip_legacy_test()
+
+        fft = cupy.fft.fft
+        dtype = self.dtype
+        load_code = _load_callback_with_aux
+        store_code = _store_callback_with_aux
+        load_aux = cupy.asarray(2.5, dtype=cupy.dtype(dtype).char.lower())
+        store_aux = cupy.asarray(3.8, dtype=cupy.dtype(dtype).char.lower())
+
+        load_types = (
+            "x.x",
+            "cufftComplex",
+            "cufftCallbackLoadC",
+            "cufftJITCallbackLoadComplex",
+            "float",
+        )
+        store_types = (
+            "x.y",
+            "cufftComplex",
+            "cufftCallbackStoreC",
+            "cufftJITCallbackStoreComplex",
+            "float",
+        )
+        cb_load = _set_load_cb(load_code, *load_types, cb_ver="legacy")
+        cb_store = _set_store_cb(store_code, *store_types, cb_ver="legacy")
+
+        a = testing.shaped_random(self.shape, cupy, self.dtype)
+        with (
+            pytest.deprecated_call(
+                match="cb_load_aux_arr or cb_store_aux_arr is deprecated"
+            ),
+            pytest.deprecated_call(
+                match="legacy callback is considered deprecated"
+            ),
+        ):
+            with cupy.fft.config.set_cufft_callbacks(
+                cb_load=cb_load,
+                cb_store=cb_store,
+                cb_load_aux_arr=load_aux,
+                cb_store_aux_arr=store_aux,
+                cb_ver="legacy",
+            ):
+                fft(a, norm=self.norm)
 
 
 @testing.parameterize(
@@ -108,31 +340,60 @@ def _set_store_cb(code, element, data_type, callback_type, aux_type=None):
             "n": [None, 5, 10, 15],
             "shape": [(10, 7), (10,), (10, 10)],
             "norm": [None, "ortho"],
+            "cb_ver": cb_ver_for_test,
         }
     )
 )
-@testing.with_requires("cython>=0.29.0")
-@pytest.mark.skipif(
-    not sys.platform.startswith("linux"),
-    reason="callbacks are only supported on Linux",
-)
 @pytest.mark.skipif(
     cupy.cuda.runtime.is_hip, reason="hipFFT does not support callbacks"
 )
 class Test1dCallbacks:
 
+    @classmethod
+    def setup_class(cls):
+        # All tests in this class use a temporary cache dir (also if threaded)
+        with use_temporary_cache_dir():
+            yield
+
     def _test_load_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
+        # for simplicity we use the JIT callback names for both legacy/jit
         fft = getattr(xp.fft, fft_func)
         code = _load_callback
         if dtype == np.complex64:
-            types = ("x.x", "cufftComplex", "cufftCallbackLoadC")
+            types = (
+                "x.x",
+                "cufftComplex",
+                "cufftCallbackLoadC",
+                "cufftJITCallbackLoadComplex",
+            )
         elif dtype == np.complex128:
-            types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ")
+            types = (
+                "x.x",
+                "cufftDoubleComplex",
+                "cufftCallbackLoadZ",
+                "cufftJITCallbackLoadDoubleComplex",
+            )
         elif dtype == np.float32:
-            types = ("x", "cufftReal", "cufftCallbackLoadR")
-        else:
-            types = ("x", "cufftDoubleReal", "cufftCallbackLoadD")
-        cb_load = _set_load_cb(code, *types)
+            types = (
+                "x",
+                "cufftReal",
+                "cufftCallbackLoadR",
+                "cufftJITCallbackLoadReal",
+            )
+        else:  # float64
+            types = (
+                "x",
+                "cufftDoubleReal",
+                "cufftCallbackLoadD",
+                "cufftJITCallbackLoadDoubleReal",
+            )
+        cb_load = _set_load_cb(code, *types, cb_ver=self.cb_ver)
+        cb_load_name = types[-1] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -144,50 +405,93 @@ def _test_load_helper(self, xp, dtype, fft_func):
                 else:
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(cb_load=cb_load):
-                    out = fft(a, n=self.n, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_load=cb_load, cb_load_name=cb_load_name, cb_ver=self.cb_ver
+            ):
+                out = fft(a, n=self.n, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_fft_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "fft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_ifft_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "ifft")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_rfft_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "rfft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_irfft_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "irfft")
 
     def _test_store_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
         fft = getattr(xp.fft, fft_func)
         code = _store_callback
+
+        # for simplicity we use the JIT callback names for both legacy/jit
         if dtype == np.complex64:
             if fft_func != "irfft":
-                types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
-            else:
-                types = ("x", "cufftReal", "cufftCallbackStoreR")
+                types = (
+                    "x.y",
+                    "cufftComplex",
+                    "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
+                )
+            else:  # float32 for irfft
+                types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackStoreR",
+                    "cufftJITCallbackStoreReal",
+                )
         elif dtype == np.complex128:
             if fft_func != "irfft":
-                types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ")
-            else:
-                types = ("x", "cufftDoubleReal", "cufftCallbackStoreD")
+                types = (
+                    "x.y",
+                    "cufftDoubleComplex",
+                    "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
+                )
+            else:  # float64 for irfft
+                types = (
+                    "x",
+                    "cufftDoubleReal",
+                    "cufftCallbackStoreD",
+                    "cufftJITCallbackStoreDoubleReal",
+                )
         elif dtype == np.float32:
-            types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
+            types = (
+                "x.y",
+                "cufftComplex",
+                "cufftCallbackStoreC",
+                "cufftJITCallbackStoreComplex",
+            )
         elif dtype == np.float64:
-            types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ")
-        cb_store = _set_store_cb(code, *types)
+            types = (
+                "x.y",
+                "cufftDoubleComplex",
+                "cufftCallbackStoreZ",
+                "cufftJITCallbackStoreDoubleComplex",
+            )
+        cb_store = _set_store_cb(code, *types, cb_ver=self.cb_ver)
+        cb_store_name = types[-1] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -201,67 +505,134 @@ def _test_store_helper(self, xp, dtype, fft_func):
                 if dtype in (np.float32, np.complex64):
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(cb_store=cb_store):
-                    out = fft(a, n=self.n, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_store=cb_store,
+                cb_store_name=cb_store_name,
+                cb_ver=self.cb_ver,
+            ):
+                out = fft(a, n=self.n, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_fft_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "fft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_ifft_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "ifft")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_rfft_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "rfft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_irfft_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "irfft")
 
     def _test_load_store_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
+        # for simplicity we use the JIT callback names for both legacy/jit
         fft = getattr(xp.fft, fft_func)
         load_code = _load_callback
         store_code = _store_callback
         if fft_func in ("fft", "ifft"):
             if dtype == np.complex64:
-                load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC")
-                store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
-            else:
-                load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ")
+                load_types = (
+                    "x.x",
+                    "cufftComplex",
+                    "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
+                )
+                store_types = (
+                    "x.y",
+                    "cufftComplex",
+                    "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
+                )
+            else:  # complex128
+                load_types = (
+                    "x.x",
+                    "cufftDoubleComplex",
+                    "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
+                )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                 )
         elif fft_func == "rfft":
             if dtype == np.float32:
-                load_types = ("x", "cufftReal", "cufftCallbackLoadR")
-                store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
-            else:
-                load_types = ("x", "cufftDoubleReal", "cufftCallbackLoadD")
+                load_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackLoadR",
+                    "cufftJITCallbackLoadReal",
+                )
+                store_types = (
+                    "x.y",
+                    "cufftComplex",
+                    "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
+                )
+            else:  # float64
+                load_types = (
+                    "x",
+                    "cufftDoubleReal",
+                    "cufftCallbackLoadD",
+                    "cufftJITCallbackLoadDoubleReal",
+                )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                 )
         else:  # irfft
             if dtype == np.complex64:
-                load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC")
-                store_types = ("x", "cufftReal", "cufftCallbackStoreR")
-            else:
-                load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ")
-                store_types = ("x", "cufftDoubleReal", "cufftCallbackStoreD")
-        cb_load = _set_load_cb(load_code, *load_types)
-        cb_store = _set_store_cb(store_code, *store_types)
+                load_types = (
+                    "x.x",
+                    "cufftComplex",
+                    "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
+                )
+                store_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackStoreR",
+                    "cufftJITCallbackStoreReal",
+                )
+            else:  # complex128
+                load_types = (
+                    "x.x",
+                    "cufftDoubleComplex",
+                    "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
+                )
+                store_types = (
+                    "x",
+                    "cufftDoubleReal",
+                    "cufftCallbackStoreD",
+                    "cufftJITCallbackStoreDoubleReal",
+                )
+        cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver)
+        cb_load_name = load_types[-1] if self.cb_ver == "jit" else None
+        cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver)
+        cb_store_name = store_types[-1] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -276,47 +647,71 @@ def _test_load_store_helper(self, xp, dtype, fft_func):
                 if dtype in (np.float32, np.complex64):
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(
-                    cb_load=cb_load, cb_store=cb_store
-                ):
-                    out = fft(a, n=self.n, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_load=cb_load,
+                cb_load_name=cb_load_name,
+                cb_store=cb_store,
+                cb_store_name=cb_store_name,
+                cb_ver=self.cb_ver,
+            ):
+                out = fft(a, n=self.n, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_fft_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "fft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_ifft_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "ifft")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_rfft_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "rfft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_irfft_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "irfft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_fft_load_aux(self, xp, dtype):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
         fft = xp.fft.fft
         c = _load_callback_with_aux2
+        # for simplicity we use the JIT callback names for both legacy/jit
         if dtype == np.complex64:
-            cb_load = _set_load_cb(
-                c, "x.x", "cufftComplex", "cufftCallbackLoadC", "float"
+            types = (
+                "x.x",
+                "cufftComplex",
+                "cufftCallbackLoadC",
+                "cufftJITCallbackLoadComplex",
+                "float",
             )
-        else:
-            cb_load = _set_load_cb(
-                c, "x.x", "cufftDoubleComplex", "cufftCallbackLoadZ", "double"
+        else:  # complex128
+            types = (
+                "x.x",
+                "cufftDoubleComplex",
+                "cufftCallbackLoadZ",
+                "cufftJITCallbackLoadDoubleComplex",
+                "double",
             )
+        cb_load = _set_load_cb(c, *types, cb_ver=self.cb_ver)
+        cb_load_name = types[3] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         out_last = self.n if self.n is not None else self.shape[-1]
@@ -333,15 +728,22 @@ def test_fft_load_aux(self, xp, dtype):
             if dtype in (np.float32, np.complex64):
                 out = out.astype(np.complex64)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(
-                    cb_load=cb_load, cb_load_aux_arr=b
-                ):
-                    out = fft(a, n=self.n, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_load=cb_load,
+                cb_load_name=cb_load_name,
+                cb_load_data=b.data,
+                cb_ver=self.cb_ver,
+            ):
+                out = fft(a, n=self.n, norm=self.norm)
 
         return out
 
     def _test_load_store_aux_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
         fft = getattr(xp.fft, fft_func)
         load_code = _load_callback_with_aux
         store_code = _store_callback_with_aux
@@ -349,53 +751,67 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func):
             load_aux = xp.asarray(2.5, dtype=xp.dtype(dtype).char.lower())
             store_aux = xp.asarray(3.8, dtype=xp.dtype(dtype).char.lower())
 
+        # for simplicity we use the JIT callback names for both legacy/jit
         if fft_func in ("fft", "ifft"):
             if dtype == np.complex64:
                 load_types = (
                     "x.x",
                     "cufftComplex",
                     "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
                     "float",
                 )
                 store_types = (
                     "x.y",
                     "cufftComplex",
                     "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
                     "float",
                 )
-            else:
+            else:  # complex128
                 load_types = (
                     "x.x",
                     "cufftDoubleComplex",
                     "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
                     "double",
                 )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                     "double",
                 )
         elif fft_func == "rfft":
             if dtype == np.float32:
-                load_types = ("x", "cufftReal", "cufftCallbackLoadR", "float")
+                load_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackLoadR",
+                    "cufftJITCallbackLoadReal",
+                    "float",
+                )
                 store_types = (
                     "x.y",
                     "cufftComplex",
                     "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
                     "float",
                 )
-            else:
+            else:  # float64
                 load_types = (
                     "x",
                     "cufftDoubleReal",
                     "cufftCallbackLoadD",
+                    "cufftJITCallbackLoadDoubleReal",
                     "double",
                 )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                     "double",
                 )
         else:  # irfft
@@ -404,24 +820,35 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func):
                     "x.x",
                     "cufftComplex",
                     "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
                     "float",
                 )
-                store_types = ("x", "cufftReal", "cufftCallbackStoreR", "float")
-            else:
+                store_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackStoreR",
+                    "cufftJITCallbackStoreReal",
+                    "float",
+                )
+            else:  # complex128
                 load_types = (
                     "x.x",
                     "cufftDoubleComplex",
                     "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
                     "double",
                 )
                 store_types = (
                     "x",
                     "cufftDoubleReal",
                     "cufftCallbackStoreD",
+                    "cufftJITCallbackStoreDoubleReal",
                     "double",
                 )
-        cb_load = _set_load_cb(load_code, *load_types)
-        cb_store = _set_store_cb(store_code, *store_types)
+        cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver)
+        cb_load_name = load_types[3] if self.cb_ver == "jit" else None
+        cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver)
+        cb_store_name = store_types[3] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -436,32 +863,38 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func):
                 if dtype in (np.float32, np.complex64):
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(
-                    cb_load=cb_load,
-                    cb_store=cb_store,
-                    cb_load_aux_arr=load_aux,
-                    cb_store_aux_arr=store_aux,
-                ):
-                    out = fft(a, n=self.n, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_load=cb_load,
+                cb_load_name=cb_load_name,
+                cb_store=cb_store,
+                cb_store_name=cb_store_name,
+                cb_load_data=load_aux.data,
+                cb_store_data=store_aux.data,
+                cb_ver=self.cb_ver,
+            ):
+                out = fft(a, n=self.n, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_fft_load_store_aux(self, xp, dtype):
         return self._test_load_store_aux_helper(xp, dtype, "fft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_ifft_load_store_aux(self, xp, dtype):
         return self._test_load_store_aux_helper(xp, dtype, "ifft")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_rfft_load_store_aux(self, xp, dtype):
         return self._test_load_store_aux_helper(xp, dtype, "rfft")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4, atol=1e-7, contiguous_check=False)
     def test_irfft_load_store_aux(self, xp, dtype):
@@ -469,38 +902,92 @@ def test_irfft_load_store_aux(self, xp, dtype):
 
 
 @testing.parameterize(
-    {"shape": (3, 4), "s": None, "axes": None, "norm": None},
-    {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1), "norm": None},
-    {"shape": (3, 4), "s": None, "axes": (-2, -1), "norm": None},
-    {"shape": (3, 4), "s": None, "axes": None, "norm": "ortho"},
-    {"shape": (2, 3, 4), "s": None, "axes": None, "norm": None},
-    {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1), "norm": None},
-    {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1), "norm": None},
-    {"shape": (2, 3, 4), "s": None, "axes": None, "norm": "ortho"},
-    {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2), "norm": "ortho"},
-)
-@testing.with_requires("cython>=0.29.0")
-@pytest.mark.skipif(
-    not sys.platform.startswith("linux"),
-    reason="callbacks are only supported on Linux",
+    *(
+        testing.product_dict(
+            [
+                {"shape": (3, 4), "s": None, "axes": None, "norm": None},
+                {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1), "norm": None},
+                {"shape": (3, 4), "s": None, "axes": (-2, -1), "norm": None},
+                {"shape": (3, 4), "s": None, "axes": None, "norm": "ortho"},
+                {"shape": (2, 3, 4), "s": None, "axes": None, "norm": None},
+                {
+                    "shape": (2, 3, 4),
+                    "s": (1, 4, 10),
+                    "axes": (-3, -2, -1),
+                    "norm": None,
+                },
+                {
+                    "shape": (2, 3, 4),
+                    "s": None,
+                    "axes": (-3, -2, -1),
+                    "norm": None,
+                },
+                {"shape": (2, 3, 4), "s": None, "axes": None, "norm": "ortho"},
+                {
+                    "shape": (2, 3, 4),
+                    "s": (2, 3),
+                    "axes": (0, 1, 2),
+                    "norm": "ortho",
+                },
+            ],
+            testing.product(
+                {
+                    "cb_ver": cb_ver_for_test,
+                },
+            ),
+        )
+    )
 )
 @pytest.mark.skipif(
     cupy.cuda.runtime.is_hip, reason="hipFFT does not support callbacks"
 )
 class TestNdCallbacks:
 
+    @classmethod
+    def setup_class(cls):
+        # All tests in this class use a temporary cache dir (also if threaded)
+        with use_temporary_cache_dir():
+            yield
+
     def _test_load_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
+        # for simplicity we use the JIT callback names for both legacy/jit
         fft = getattr(xp.fft, fft_func)
         load_code = _load_callback
         if dtype == np.complex64:
-            types = ("x.x", "cufftComplex", "cufftCallbackLoadC")
+            types = (
+                "x.x",
+                "cufftComplex",
+                "cufftCallbackLoadC",
+                "cufftJITCallbackLoadComplex",
+            )
         elif dtype == np.complex128:
-            types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ")
+            types = (
+                "x.x",
+                "cufftDoubleComplex",
+                "cufftCallbackLoadZ",
+                "cufftJITCallbackLoadDoubleComplex",
+            )
         elif dtype == np.float32:
-            types = ("x", "cufftReal", "cufftCallbackLoadR")
-        else:
-            types = ("x", "cufftDoubleReal", "cufftCallbackLoadD")
-        cb_load = _set_load_cb(load_code, *types)
+            types = (
+                "x",
+                "cufftReal",
+                "cufftCallbackLoadR",
+                "cufftJITCallbackLoadReal",
+            )
+        else:  # float64
+            types = (
+                "x",
+                "cufftDoubleReal",
+                "cufftCallbackLoadD",
+                "cufftJITCallbackLoadDoubleReal",
+            )
+        cb_load = _set_load_cb(load_code, *types, cb_ver=self.cb_ver)
+        cb_load_name = types[3] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -512,12 +999,14 @@ def _test_load_helper(self, xp, dtype, fft_func):
                 else:
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(cb_load=cb_load):
-                    out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_load=cb_load, cb_load_name=cb_load_name, cb_ver=self.cb_ver
+            ):
+                out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -525,6 +1014,7 @@ def _test_load_helper(self, xp, dtype, fft_func):
     def test_fftn_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "fftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -532,6 +1022,7 @@ def test_fftn_load(self, xp, dtype):
     def test_ifftn_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "ifftn")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -539,6 +1030,7 @@ def test_ifftn_load(self, xp, dtype):
     def test_rfftn_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "rfftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -547,23 +1039,61 @@ def test_irfftn_load(self, xp, dtype):
         return self._test_load_helper(xp, dtype, "irfftn")
 
     def _test_store_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
         fft = getattr(xp.fft, fft_func)
         store_code = _store_callback
+
+        # for simplicity we use the JIT callback names for both legacy/jit
         if dtype == np.complex64:
             if fft_func != "irfftn":
-                types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
-            else:
-                types = ("x", "cufftReal", "cufftCallbackStoreR")
+                types = (
+                    "x.y",
+                    "cufftComplex",
+                    "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
+                )
+            else:  # float32 for irfftn
+                types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackStoreR",
+                    "cufftJITCallbackStoreReal",
+                )
         elif dtype == np.complex128:
             if fft_func != "irfftn":
-                types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ")
-            else:
-                types = ("x", "cufftDoubleReal", "cufftCallbackStoreD")
+                types = (
+                    "x.y",
+                    "cufftDoubleComplex",
+                    "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
+                )
+            else:  # float64 for irfftn
+                types = (
+                    "x",
+                    "cufftDoubleReal",
+                    "cufftCallbackStoreD",
+                    "cufftJITCallbackStoreDoubleReal",
+                )
         elif dtype == np.float32:
-            types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
+            types = (
+                "x.y",
+                "cufftComplex",
+                "cufftCallbackStoreC",
+                "cufftJITCallbackStoreComplex",
+            )
         elif dtype == np.float64:
-            types = ("x.y", "cufftDoubleComplex", "cufftCallbackStoreZ")
-        cb_store = _set_store_cb(store_code, *types)
+            types = (
+                "x.y",
+                "cufftDoubleComplex",
+                "cufftCallbackStoreZ",
+                "cufftJITCallbackStoreDoubleComplex",
+            )
+        cb_store = _set_store_cb(store_code, *types, cb_ver=self.cb_ver)
+        cb_store_name = types[3] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -577,12 +1107,16 @@ def _test_store_helper(self, xp, dtype, fft_func):
                 if dtype in (np.float32, np.complex64):
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(cb_store=cb_store):
-                    out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_store=cb_store,
+                cb_store_name=cb_store_name,
+                cb_ver=self.cb_ver,
+            ):
+                out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -590,6 +1124,7 @@ def _test_store_helper(self, xp, dtype, fft_func):
     def test_fftn_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "fftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -597,6 +1132,7 @@ def test_fftn_store(self, xp, dtype):
     def test_ifftn_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "ifftn")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -604,6 +1140,7 @@ def test_ifftn_store(self, xp, dtype):
     def test_rfftn_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "rfftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -612,40 +1149,101 @@ def test_irfftn_store(self, xp, dtype):
         return self._test_store_helper(xp, dtype, "irfftn")
 
     def _test_load_store_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
         fft = getattr(xp.fft, fft_func)
         load_code = _load_callback
         store_code = _store_callback
+
+        # for simplicity we use the JIT callback names for both legacy/jit
         if fft_func in ("fftn", "ifftn"):
             if dtype == np.complex64:
-                load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC")
-                store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
-            else:
-                load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ")
+                load_types = (
+                    "x.x",
+                    "cufftComplex",
+                    "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
+                )
+                store_types = (
+                    "x.y",
+                    "cufftComplex",
+                    "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
+                )
+            else:  # complex128
+                load_types = (
+                    "x.x",
+                    "cufftDoubleComplex",
+                    "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
+                )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                 )
         elif fft_func == "rfftn":
             if dtype == np.float32:
-                load_types = ("x", "cufftReal", "cufftCallbackLoadR")
-                store_types = ("x.y", "cufftComplex", "cufftCallbackStoreC")
-            else:
-                load_types = ("x", "cufftDoubleReal", "cufftCallbackLoadD")
+                load_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackLoadR",
+                    "cufftJITCallbackLoadReal",
+                )
+                store_types = (
+                    "x.y",
+                    "cufftComplex",
+                    "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
+                )
+            else:  # float64
+                load_types = (
+                    "x",
+                    "cufftDoubleReal",
+                    "cufftCallbackLoadD",
+                    "cufftJITCallbackLoadDoubleReal",
+                )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                 )
         else:  # irfft
             if dtype == np.complex64:
-                load_types = ("x.x", "cufftComplex", "cufftCallbackLoadC")
-                store_types = ("x", "cufftReal", "cufftCallbackStoreR")
-            else:
-                load_types = ("x.x", "cufftDoubleComplex", "cufftCallbackLoadZ")
-                store_types = ("x", "cufftDoubleReal", "cufftCallbackStoreD")
-        cb_load = _set_load_cb(load_code, *load_types)
-        cb_store = _set_store_cb(store_code, *store_types)
+                load_types = (
+                    "x.x",
+                    "cufftComplex",
+                    "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
+                )
+                store_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackStoreR",
+                    "cufftJITCallbackStoreReal",
+                )
+            else:  # complex128
+                load_types = (
+                    "x.x",
+                    "cufftDoubleComplex",
+                    "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
+                )
+                store_types = (
+                    "x",
+                    "cufftDoubleReal",
+                    "cufftCallbackStoreD",
+                    "cufftJITCallbackStoreDoubleReal",
+                )
+        cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver)
+        cb_load_name = load_types[3] if self.cb_ver == "jit" else None
+        cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver)
+        cb_store_name = store_types[3] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -660,14 +1258,18 @@ def _test_load_store_helper(self, xp, dtype, fft_func):
                 if dtype in (np.float32, np.complex64):
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(
-                    cb_load=cb_load, cb_store=cb_store
-                ):
-                    out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_load=cb_load,
+                cb_load_name=cb_load_name,
+                cb_store=cb_store,
+                cb_store_name=cb_store_name,
+                cb_ver=self.cb_ver,
+            ):
+                out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -675,6 +1277,7 @@ def _test_load_store_helper(self, xp, dtype, fft_func):
     def test_fftn_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "fftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -682,6 +1285,7 @@ def test_fftn_load_store(self, xp, dtype):
     def test_ifftn_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "ifftn")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -689,6 +1293,7 @@ def test_ifftn_load_store(self, xp, dtype):
     def test_rfftn_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "rfftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -697,6 +1302,11 @@ def test_irfftn_load_store(self, xp, dtype):
         return self._test_load_store_helper(xp, dtype, "irfftn")
 
     def _test_load_store_aux_helper(self, xp, dtype, fft_func):
+        if self.cb_ver == "legacy":
+            check_should_skip_legacy_test()
+        else:
+            check_should_skip_jit_test()
+
         fft = getattr(xp.fft, fft_func)
         load_code = _load_callback_with_aux
         store_code = _store_callback_with_aux
@@ -704,53 +1314,67 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func):
             load_aux = xp.asarray(2.5, dtype=xp.dtype(dtype).char.lower())
             store_aux = xp.asarray(3.8, dtype=xp.dtype(dtype).char.lower())
 
+        # for simplicity we use the JIT callback names for both legacy/jit
         if fft_func in ("fftn", "ifftn"):
             if dtype == np.complex64:
                 load_types = (
                     "x.x",
                     "cufftComplex",
                     "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
                     "float",
                 )
                 store_types = (
                     "x.y",
                     "cufftComplex",
                     "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
                     "float",
                 )
-            else:
+            else:  # complex128
                 load_types = (
                     "x.x",
                     "cufftDoubleComplex",
                     "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
                     "double",
                 )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                     "double",
                 )
         elif fft_func == "rfftn":
             if dtype == np.float32:
-                load_types = ("x", "cufftReal", "cufftCallbackLoadR", "float")
+                load_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackLoadR",
+                    "cufftJITCallbackLoadReal",
+                    "float",
+                )
                 store_types = (
                     "x.y",
                     "cufftComplex",
                     "cufftCallbackStoreC",
+                    "cufftJITCallbackStoreComplex",
                     "float",
                 )
-            else:
+            else:  # float64
                 load_types = (
                     "x",
                     "cufftDoubleReal",
                     "cufftCallbackLoadD",
+                    "cufftJITCallbackLoadDoubleReal",
                     "double",
                 )
                 store_types = (
                     "x.y",
                     "cufftDoubleComplex",
                     "cufftCallbackStoreZ",
+                    "cufftJITCallbackStoreDoubleComplex",
                     "double",
                 )
         else:  # irfftn
@@ -759,24 +1383,35 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func):
                     "x.x",
                     "cufftComplex",
                     "cufftCallbackLoadC",
+                    "cufftJITCallbackLoadComplex",
                     "float",
                 )
-                store_types = ("x", "cufftReal", "cufftCallbackStoreR", "float")
-            else:
+                store_types = (
+                    "x",
+                    "cufftReal",
+                    "cufftCallbackStoreR",
+                    "cufftJITCallbackStoreReal",
+                    "float",
+                )
+            else:  # complex128
                 load_types = (
                     "x.x",
                     "cufftDoubleComplex",
                     "cufftCallbackLoadZ",
+                    "cufftJITCallbackLoadDoubleComplex",
                     "double",
                 )
                 store_types = (
                     "x",
                     "cufftDoubleReal",
                     "cufftCallbackStoreD",
+                    "cufftJITCallbackStoreDoubleReal",
                     "double",
                 )
-        cb_load = _set_load_cb(load_code, *load_types)
-        cb_store = _set_store_cb(store_code, *store_types)
+        cb_load = _set_load_cb(load_code, *load_types, cb_ver=self.cb_ver)
+        cb_load_name = load_types[3] if self.cb_ver == "jit" else None
+        cb_store = _set_store_cb(store_code, *store_types, cb_ver=self.cb_ver)
+        cb_store_name = store_types[3] if self.cb_ver == "jit" else None
 
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
@@ -791,17 +1426,20 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func):
                 if dtype in (np.float32, np.complex64):
                     out = out.astype(np.float32)
         else:
-            with use_temporary_cache_dir():
-                with xp.fft.config.set_cufft_callbacks(
-                    cb_load=cb_load,
-                    cb_store=cb_store,
-                    cb_load_aux_arr=load_aux,
-                    cb_store_aux_arr=store_aux,
-                ):
-                    out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
+            with xp.fft.config.set_cufft_callbacks(
+                cb_load=cb_load,
+                cb_load_name=cb_load_name,
+                cb_store=cb_store,
+                cb_store_name=cb_store_name,
+                cb_load_data=load_aux.data,
+                cb_store_data=store_aux.data,
+                cb_ver=self.cb_ver,
+            ):
+                out = fft(a, s=self.s, axes=self.axes, norm=self.norm)
 
         return out
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -809,6 +1447,7 @@ def _test_load_store_aux_helper(self, xp, dtype, fft_func):
     def test_fftn_load_store_aux(self, xp, dtype):
         return self._test_load_store_aux_helper(xp, dtype, "fftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -816,6 +1455,7 @@ def test_fftn_load_store_aux(self, xp, dtype):
     def test_ifftn_load_store_aux(self, xp, dtype):
         return self._test_load_store_aux_helper(xp, dtype, "ifftn")
 
+    @suppress_legacy_warning
     @testing.for_float_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
@@ -823,6 +1463,7 @@ def test_ifftn_load_store_aux(self, xp, dtype):
     def test_rfftn_load_store_aux(self, xp, dtype):
         return self._test_load_store_aux_helper(xp, dtype, "rfftn")
 
+    @suppress_legacy_warning
     @testing.for_complex_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-4, atol=1e-7, accept_error=ValueError, contiguous_check=False
diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_fft.py b/dpnp/tests/third_party/cupy/fft_tests/test_fft.py
index 534b474363f1..369409ba001c 100644
--- a/dpnp/tests/third_party/cupy/fft_tests/test_fft.py
+++ b/dpnp/tests/third_party/cupy/fft_tests/test_fft.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import functools
 import warnings
 
@@ -6,6 +8,14 @@
 
 import dpnp as cupy
 from dpnp.tests.helper import has_support_aspect64
+
+# from cupy.fft import config
+# from cupy.fft._fft import (
+#     _default_fft_func,
+#     _fft,
+#     _fftn,
+#     _size_last_transform_axis,
+# )
 from dpnp.tests.third_party.cupy import testing
 from dpnp.tests.third_party.cupy.testing._loops import _wraps_partial
 
@@ -36,12 +46,16 @@ def decorator(impl):
         @_wraps_partial(impl, name)
         def test_func(self, *args, **kw):
             # get original global planning state
-            # planning_state = config.enable_nd_planning
+            # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+            #     planning_state = config.enable_nd_planning
             try:
                 for nd_planning in states:
                     try:
                         # enable or disable nd planning
-                        # config.enable_nd_planning = nd_planning
+                        # with pytest.warns(
+                        #     DeprecationWarning, match="enable_nd_planning"
+                        # ):
+                        #     config.enable_nd_planning = nd_planning
 
                         kw[name] = nd_planning
                         impl(self, *args, **kw)
@@ -50,7 +64,10 @@ def test_func(self, *args, **kw):
                         raise
             finally:
                 # restore original global planning state
-                # config.enable_nd_planning = planning_state
+                # with pytest.warns(
+                #     DeprecationWarning, match="enable_nd_planning"
+                # ):
+                #     config.enable_nd_planning = planning_state
                 pass
 
         return test_func
@@ -71,8 +88,8 @@ def multi_gpu_config(gpu_configs=None):
     def decorator(impl):
         @functools.wraps(impl)
         def test_func(self, *args, **kw):
-            use_multi_gpus = config.use_multi_gpus
-            _devices = config._devices
+            use_multi_gpus = config._use_multi_gpus.get()
+            _devices = config._devices.get()
 
             try:
                 for gpus in gpu_configs:
@@ -81,23 +98,21 @@ def test_func(self, *args, **kw):
                         assert nGPUs >= 2, "Must use at least two gpus"
                         config.use_multi_gpus = True
                         config.set_cufft_gpus(gpus)
-                        self.gpus = gpus
 
                         impl(self, *args, **kw)
                     except Exception:
                         print("GPU config is:", gpus)
                         raise
             finally:
-                config.use_multi_gpus = use_multi_gpus
-                config._devices = _devices
-                del self.gpus
+                config._use_multi_gpus.set(use_multi_gpus)
+                config._devices.set(_devices)
 
         return test_func
 
     return decorator
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @pytest.mark.usefixtures("skip_forward_backward")
 @testing.parameterize(
     *testing.product(
@@ -138,7 +153,7 @@ def test_ifft(self, xp, dtype):
         return xp.fft.ifft(a, n=self.n, norm=self.norm)
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @testing.parameterize(
     *testing.product(
         {
@@ -179,19 +194,6 @@ def test_ifft(self, xp, dtype):
         return xp.fft.ifft(a, axis=self.axis)
 
 
-# See #3757 and NVIDIA internal ticket 3093094
-def _skip_multi_gpu_bug(shape, gpus):
-    # avoid CUDA 11.0 (will be fixed by CUDA 11.2) bug triggered by
-    # - batch = 1
-    # - gpus = [1, 0]
-    if (
-        11000 <= cupy.cuda.runtime.runtimeGetVersion() < 11020
-        and len(shape) == 1
-        and gpus == [1, 0]
-    ):
-        pytest.skip("avoid CUDA 11 bug")
-
-
 # Almost identical to the TestFft class, except that
 # 1. multi-GPU cuFFT is used
 # 2. the tested parameter combinations are adjusted to meet the requirements
@@ -208,6 +210,9 @@ def _skip_multi_gpu_bug(shape, gpus):
 )
 @pytest.mark.skip("multi GPU is not supported")
 @testing.multi_gpu(2)
+# @pytest.mark.skipif(
+#     cupy.cuda.runtime.is_hip, reason="hipFFT does not support multi-GPU FFT"
+# )
 class TestMultiGpuFft:
 
     @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]])
@@ -216,8 +221,6 @@ class TestMultiGpuFft:
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_fft(self, xp, dtype):
-        _skip_multi_gpu_bug(self.shape, self.gpus)
-
         a = testing.shaped_random(self.shape, xp, dtype)
         return xp.fft.fft(a, n=self.n, norm=self.norm)
 
@@ -230,8 +233,6 @@ def test_fft(self, xp, dtype):
     @testing.with_requires("numpy!=1.17.0")
     @testing.with_requires("numpy!=1.17.1")
     def test_ifft(self, xp, dtype):
-        _skip_multi_gpu_bug(self.shape, self.gpus)
-
         a = testing.shaped_random(self.shape, xp, dtype)
         return xp.fft.ifft(a, n=self.n, norm=self.norm)
 
@@ -251,6 +252,9 @@ def test_ifft(self, xp, dtype):
 )
 @pytest.mark.skip("multi GPU is not supported")
 @testing.multi_gpu(2)
+# @pytest.mark.skipif(
+#     cupy.cuda.runtime.is_hip, reason="hipFFT does not support multi-GPU FFT"
+# )
 class TestMultiGpuFftOrder:
     @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]])
     @testing.for_complex_dtypes()
@@ -258,8 +262,6 @@ class TestMultiGpuFftOrder:
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_fft(self, xp, dtype):
-        _skip_multi_gpu_bug(self.shape, self.gpus)
-
         a = testing.shaped_random(self.shape, xp, dtype)
         if self.data_order == "F":
             a = xp.asfortranarray(a)
@@ -271,8 +273,6 @@ def test_fft(self, xp, dtype):
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_ifft(self, xp, dtype):
-        _skip_multi_gpu_bug(self.shape, self.gpus)
-
         a = testing.shaped_random(self.shape, xp, dtype)
         if self.data_order == "F":
             a = xp.asfortranarray(a)
@@ -348,9 +348,13 @@ def test_default_fft_func(self, enable_nd):
 
 @pytest.mark.skip("memory management is not supported")
 @testing.with_requires("numpy>=2.0")
+# @pytest.mark.skipif(
+#     10010 <= cupy.cuda.runtime.runtimeGetVersion() <= 11010,
+#     reason="avoid a cuFFT bug (cupy/cupy#3777)",
+# )
 @testing.slow
 class TestFftAllocate:
-
+    # @pytest.mark.thread_unsafe(reason="does large allocations")
     def test_fft_allocate(self):
         # Check CuFFTError is not raised when the GPU memory is enough.
         # See https://github.com/cupy/cupy/issues/1063
@@ -368,7 +372,7 @@ def test_fft_allocate(self):
         cupy.fft.config.clear_plan_cache()
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @pytest.mark.usefixtures("skip_forward_backward")
 @testing.parameterize(
     *(
@@ -413,7 +417,8 @@ class TestFft2:
         type_check=has_support_aspect64(),
     )
     def test_fft2(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
@@ -440,7 +445,8 @@ def test_fft2(self, xp, dtype, order, enable_nd):
         type_check=has_support_aspect64(),
     )
     def test_ifft2(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
@@ -457,7 +463,7 @@ def test_ifft2(self, xp, dtype, order, enable_nd):
         return out
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @pytest.mark.usefixtures("skip_forward_backward")
 @testing.parameterize(
     *(
@@ -503,7 +509,8 @@ class TestFftn:
         type_check=has_support_aspect64(),
     )
     def test_fftn(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
@@ -530,7 +537,8 @@ def test_fftn(self, xp, dtype, order, enable_nd):
         type_check=has_support_aspect64(),
     )
     def test_ifftn(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
@@ -595,7 +603,8 @@ def skip_buggy(self):
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_fftn(self, xp, dtype, enable_nd):
-        assert config.enable_nd_planning == enable_nd
+        with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+            assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
 
         if xp is np:
@@ -613,7 +622,8 @@ def test_fftn(self, xp, dtype, enable_nd):
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_ifftn(self, xp, dtype, enable_nd):
-        assert config.enable_nd_planning == enable_nd
+        with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+            assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
 
         if xp is np:
@@ -635,7 +645,8 @@ def test_fftn_error_on_wrong_plan(self, dtype, enable_nd):
         from cupy.fft import fftn
         from cupyx.scipy.fftpack import get_fft_plan
 
-        assert config.enable_nd_planning == enable_nd
+        with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+            assert config.enable_nd_planning == enable_nd
 
         # can't get a plan, so skip
         if self.axes is not None:
@@ -747,6 +758,9 @@ def test_fft_error_on_wrong_plan(self, dtype):
 )
 @pytest.mark.skip("get_fft_plan() is not supported")
 @testing.multi_gpu(2)
+# @pytest.mark.skipif(
+#     cupy.cuda.runtime.is_hip, reason="hipFFT does not support multi-GPU FFT"
+# )
 class TestMultiGpuPlanCtxManagerFft:
 
     @multi_gpu_config(gpu_configs=[[0, 1], [1, 0]])
@@ -755,8 +769,6 @@ class TestMultiGpuPlanCtxManagerFft:
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_fft(self, xp, dtype):
-        _skip_multi_gpu_bug(self.shape, self.gpus)
-
         a = testing.shaped_random(self.shape, xp, dtype)
 
         if xp is np:
@@ -776,8 +788,6 @@ def test_fft(self, xp, dtype):
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_ifft(self, xp, dtype):
-        _skip_multi_gpu_bug(self.shape, self.gpus)
-
         a = testing.shaped_random(self.shape, xp, dtype)
 
         if xp is np:
@@ -1029,7 +1039,8 @@ class TestRfft2:
         type_check=has_support_aspect64(),
     )
     def test_rfft2(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
@@ -1046,14 +1057,21 @@ def test_rfft2(self, xp, dtype, order, enable_nd):
         type_check=has_support_aspect64(),
     )
     def test_irfft2(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
+        # if (
+        #     10020 >= cupy.cuda.runtime.runtimeGetVersion() >= 10010
+        #     and int(cupy.cuda.device.get_compute_capability()) < 70
+        #     and _size_last_transform_axis(self.shape, self.s, self.axes) == 2
+        # ):
+        #     pytest.skip("work-around for cuFFT issue")
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
         return xp.fft.irfft2(a, s=self.s, axes=self.axes, norm=self.norm)
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @testing.parameterize(
     {"shape": (3, 4), "s": None, "axes": (), "norm": None},
     {"shape": (2, 3, 4), "s": None, "axes": (), "norm": None},
@@ -1115,7 +1133,8 @@ class TestRfftn:
         type_check=has_support_aspect64(),
     )
     def test_rfftn(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
@@ -1132,7 +1151,14 @@ def test_rfftn(self, xp, dtype, order, enable_nd):
         type_check=has_support_aspect64(),
     )
     def test_irfftn(self, xp, dtype, order, enable_nd):
-        # assert config.enable_nd_planning == enable_nd
+        # with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+        #     assert config.enable_nd_planning == enable_nd
+        # if (
+        #     10020 >= cupy.cuda.runtime.runtimeGetVersion() >= 10010
+        #     and int(cupy.cuda.device.get_compute_capability()) < 70
+        #     and _size_last_transform_axis(self.shape, self.s, self.axes) == 2
+        # ):
+        #     pytest.skip("work-around for cuFFT issue")
         a = testing.shaped_random(self.shape, xp, dtype)
         if order == "F":
             a = xp.asfortranarray(a)
@@ -1182,7 +1208,8 @@ def skip_buggy(self):
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_rfftn(self, xp, dtype, enable_nd):
-        assert config.enable_nd_planning == enable_nd
+        with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+            assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
 
         if xp is np:
@@ -1194,13 +1221,17 @@ def test_rfftn(self, xp, dtype, enable_nd):
         with plan:
             return xp.fft.rfftn(a, s=self.s, axes=self.axes, norm=self.norm)
 
+    # @pytest.mark.skipif(
+    #     cupy.cuda.runtime.is_hip, reason="hipFFT's PlanNd for C2R is buggy"
+    # )
     @nd_planning_states()
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(
         rtol=1e-3, atol=1e-7, accept_error=ValueError, contiguous_check=False
     )
     def test_irfftn(self, xp, dtype, enable_nd):
-        assert config.enable_nd_planning == enable_nd
+        with pytest.warns(DeprecationWarning, match="enable_nd_planning"):
+            assert config.enable_nd_planning == enable_nd
         a = testing.shaped_random(self.shape, xp, dtype)
         if xp is np:
             return xp.fft.irfftn(a, s=self.s, axes=self.axes, norm=self.norm)
@@ -1282,7 +1313,7 @@ def test_ifftn_orders(self, dtype, enable_nd):
                 pass
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @testing.parameterize(
     {"shape": (3, 4), "s": None, "axes": (), "norm": None},
     {"shape": (2, 3, 4), "s": None, "axes": (), "norm": None},
@@ -1343,7 +1374,7 @@ def test_ihfft(self, xp, dtype):
         return xp.fft.ihfft(a, n=self.n, norm=self.norm)
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @testing.parameterize(
     {"n": 1, "d": 1},
     {"n": 10, "d": 0.5},
@@ -1372,7 +1403,7 @@ def test_rfftfreq(self, xp, dtype):
         return xp.fft.rfftfreq(self.n, self.d)
 
 
-# @testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.0")
 @testing.parameterize(
     {"shape": (5,), "axes": None},
     {"shape": (5,), "axes": 0},
diff --git a/dpnp/tests/third_party/cupy/random_tests/test_generator.py b/dpnp/tests/third_party/cupy/random_tests/test_generator.py
index 9d1a3d233473..abb58df07af9 100644
--- a/dpnp/tests/third_party/cupy/random_tests/test_generator.py
+++ b/dpnp/tests/third_party/cupy/random_tests/test_generator.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import functools
 import os
 import threading
@@ -850,6 +852,7 @@ def test_goodness_of_fit(self):
         assert _hypothesis.chi_square_test(counts, expected)
 
     @_condition.repeat(3, 10)
+    # @pytest.mark.xfail(runtime.is_hip, reason="ROCm/HIP may have a bug")
     def test_goodness_of_fit_2(self):
         vals = self.generate(3, (5, 20), True, [0.3, 0.3, 0.4]).get()
         counts = numpy.histogram(vals, bins=numpy.arange(4))[0]
@@ -929,6 +932,163 @@ def test_bound(self):
         assert numpy.unique(val).size == val.size
 
 
+@testing.parameterize(
+    # Edge cases with small domain sizes
+    {"a": 0, "size": 0},
+    {"a": 1, "size": 1},
+    {"a": 2, "size": 1},
+    {"a": 256, "size": 100},  # Minimum cipher bits threshold
+    {"a": 257, "size": 100},
+    # large scalare uniqueness
+    {"a": 100, "size": 50},
+    {"a": 1000, "size": 500},
+    {"a": 10000, "size": 5000},
+    {"a": 100000, "size": 50000},
+    # full inpupt permutation
+    {"a": 10, "size": 10},
+    {"a": 100, "size": 100},
+    {"a": 1000, "size": 1000},
+    # Power of 2
+    {"a": 2**8, "size": 100},
+    {"a": 2**10, "size": 500},
+    {"a": 2**16, "size": 1000},
+    {"a": 2**20, "size": 5000},
+    {"a": 2**24, "size": 10000},
+    # Just below power of 2
+    {"a": 2**8 - 1, "size": 100},
+    {"a": 2**16 - 1, "size": 1000},
+    {"a": 2**20 - 1, "size": 5000},
+    # Just above power of 2
+    {"a": 2**8 + 1, "size": 100},
+    {"a": 2**16 + 1, "size": 1000},
+    {"a": 2**20 + 1, "size": 5000},
+    # Test multi-dimensional shapes.
+    {"a": 6, "size": (2, 3)},
+    {"a": 32, "size": (4, 5)},
+    {"a": 120, "size": (5, 4, 5)},
+)
+@testing.fix_random()
+class TestChoiceReplaceFalseLargeScale(RandomGeneratorTestCase):
+    """Test large-scale uniqueness for Feistel bijection implementation."""
+
+    target_method = "choice"
+
+    def test_uniqueness_and_bounds(self):
+        """Test that samples have no duplicates and correct bounds."""
+        val = self.generate(a=self.a, size=self.size, replace=False).get()
+        size = self.size if isinstance(self.size, tuple) else (self.size,)
+
+        # Check shape
+        assert val.shape == size
+
+        # Check bounds
+        assert (0 <= val).all()
+        assert (val < self.a).all()
+
+        # Check uniqueness
+        val_flat = numpy.asarray(val).flatten()
+        assert (
+            numpy.unique(val_flat).size == val_flat.size
+        ), "Found duplicate values in replace=False sample"
+
+
+@testing.fix_random()
+class TestChoiceReplaceFalseStatistical(RandomGeneratorTestCase):
+    """Statistical tests for uniformity of Feistel bijection."""
+
+    target_method = "choice"
+
+    @_condition.repeat(3)
+    def test_small_domain_uniformity(self):
+        """Chi-square test for uniform sampling in small domain."""
+        # Sample from domain of size 10, taking 5 elements
+        # Repeat many times and check each index appears uniformly
+        n = 10
+        sample_size = 5
+        n_trials = 1000
+
+        counts = cupy.zeros(n, dtype=int)
+        vals = self.generate_many(
+            n, size=sample_size, replace=False, _count=n_trials
+        )
+        for val in vals:
+            counts[val] += 1
+        counts = counts.get()
+
+        # Each index should appear ~500 times (5/10 * 1000)
+        expected = numpy.ones(n, dtype=int) * (sample_size * n_trials // n)
+        assert _hypothesis.chi_square_test(counts, expected)
+
+    @_condition.repeat(3, 10)
+    def test_permutation_variability(self):
+        """Test that repeated full permutations are different."""
+        n = 20
+        n_trials = 10
+
+        vals = self.generate_many(n, size=n, replace=False, _count=n_trials)
+        perms = cupy.vstack(vals)
+
+        # Should have multiple unique permutations
+        unique_perms = cupy.unique(perms, axis=0)
+        assert (
+            len(unique_perms) == n_trials
+        ), "Permutations should vary across multiple calls"
+
+
+@testing.slow
+@testing.fix_random()
+class TestChoiceReplaceFalseVeryLargeDomain(unittest.TestCase):
+    """Test memory efficiency with very large domains."""
+
+    def setUp(self):
+        self.rs = _generator.RandomState(seed=testing.generate_seed())
+
+    def test_large_domain_memory_efficiency(self):
+        """Test that very large domains don't allocate full arrays."""
+        # This should NOT allocate a 2^30 element array
+        # If it did, it would require ~8GB of memory
+        a = 2**30
+        size = 1000
+
+        val = self.rs.choice(a=a, size=size, replace=False).get()
+
+        # Check bounds
+        assert (0 <= val).all()
+        assert (val < a).all()
+
+        # Check uniqueness
+        assert numpy.unique(val).size == size
+
+    def test_near_32bit_limit(self):
+        """Test at the 32-bit boundary."""
+        # Current implementation supports up to 2^32
+        a = 2**31
+        size = 500
+
+        val = self.rs.choice(a=a, size=size, replace=False).get()
+
+        # Check bounds
+        assert (0 <= val).all()
+        assert (val < a).all()
+
+        # Check uniqueness
+        assert numpy.unique(val).size == size
+
+
+@testing.fix_random()
+class TestChoiceReplaceFalseDtypeConsistency(RandomGeneratorTestCase):
+    """Test output dtype consistency."""
+
+    target_method = "choice"
+
+    def test_integer_input_dtype(self):
+        """Integer input should produce int64/long dtype."""
+        val = self.generate(a=100, size=50, replace=False)
+
+        # Should be 'l' (long) dtype, which is int64 on most platforms
+        assert val.dtype == numpy.dtype("l") or val.dtype == numpy.int64
+
+
 @testing.fix_random()
 class TestGumbel(RandomGeneratorTestCase):
 

From e9a79f474ca92b1ca608f354fe9cb5892f70e71e Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Wed, 21 Jan 2026 11:28:09 +0100
Subject: [PATCH 11/16] Bump required dpctl version up to `0.22.0*` (#2731)

The PR bumps dependency on dpctl package.
---
 conda-recipe/meta.yaml          |  2 +-
 dpnp/tests/helper.py            | 10 ++++++++--
 dpnp/tests/test_mathematical.py |  7 ++++---
 environments/dpctl_pkg.txt      |  2 +-
 environments/dpctl_pkg.yml      |  2 +-
 pyproject.toml                  |  2 +-
 6 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 2fd55e07eb4b..661f44b50ed9 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -1,6 +1,6 @@
 {% set max_compiler_and_mkl_version = environ.get("MAX_BUILD_CMPL_MKL_VERSION", "2026.0a0") %}
 {% set required_compiler_and_mkl_version = "2025.0" %}
-{% set required_dpctl_version = "0.21.0" %}
+{% set required_dpctl_version = "0.22.0*" %}
 
 {% set pyproject = load_file_data('pyproject.toml') %}
 {% set py_build_deps = pyproject.get('build-system', {}).get('requires', []) %}
diff --git a/dpnp/tests/helper.py b/dpnp/tests/helper.py
index 7678028d4880..65917ee6c340 100644
--- a/dpnp/tests/helper.py
+++ b/dpnp/tests/helper.py
@@ -1,4 +1,5 @@
 import importlib.util
+from enum import Enum
 from sys import platform
 
 import dpctl
@@ -11,6 +12,11 @@
 from . import config
 
 
+class LTS_VERSION(Enum):
+    V1_3 = "1.3"
+    V1_6 = "1.6"
+
+
 def _assert_dtype(a_dt, b_dt, check_only_type_kind=False):
     if check_only_type_kind:
         assert a_dt.kind == b_dt.kind, f"{a_dt.kind} != {b_dt.kind}"
@@ -475,13 +481,13 @@ def is_lnl(device=None):
     return _get_dev_mask(device) == 0x6400
 
 
-def is_lts_driver(device=None):
+def is_lts_driver(version=LTS_VERSION.V1_3, device=None):
     """
     Return True if a test is running on a GPU device with LTS driver version,
     False otherwise.
     """
     dev = dpctl.select_default_device() if device is None else device
-    return dev.has_aspect_gpu and "1.3" in dev.driver_version
+    return dev.has_aspect_gpu and version.value in dev.driver_version
 
 
 def is_ptl(device=None):
diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index 1418a5a7fed1..e38cb149c303 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -20,6 +20,7 @@
 from dpnp.dpnp_utils import map_dtype_to_device
 
 from .helper import (
+    LTS_VERSION,
     assert_dtype_allclose,
     generate_random_numpy_array,
     get_abs_array,
@@ -33,7 +34,7 @@
     has_support_aspect16,
     has_support_aspect64,
     is_intel_numpy,
-    is_ptl,
+    is_lts_driver,
     numpy_version,
 )
 from .third_party.cupy import testing
@@ -218,7 +219,7 @@ def _get_exp_array(self, a, axis, dtype):
     @pytest.mark.parametrize("axis", [None, 2, -1])
     @pytest.mark.parametrize("include_initial", [True, False])
     def test_basic(self, dtype, axis, include_initial):
-        if axis is None and is_ptl():
+        if axis is None and not is_lts_driver(version=LTS_VERSION.V1_6):
             pytest.skip("due to SAT-8336")
 
         a = dpnp.ones((3, 4, 5, 6, 7), dtype=dtype)
@@ -238,7 +239,7 @@ def test_basic(self, dtype, axis, include_initial):
     @pytest.mark.parametrize("axis", [None, 2, -1])
     @pytest.mark.parametrize("include_initial", [True, False])
     def test_include_initial(self, dtype, axis, include_initial):
-        if axis is None and is_ptl():
+        if axis is None and not is_lts_driver(version=LTS_VERSION.V1_6):
             pytest.skip("due to SAT-8336")
 
         a = dpnp.ones((3, 4, 5, 6, 7), dtype=dtype)
diff --git a/environments/dpctl_pkg.txt b/environments/dpctl_pkg.txt
index 29fdceb21e82..9d585f1ec230 100644
--- a/environments/dpctl_pkg.txt
+++ b/environments/dpctl_pkg.txt
@@ -1,2 +1,2 @@
 --index-url https://pypi.anaconda.org/dppy/label/dev/simple
-dpctl>=0.21.0dev0
+dpctl>=0.22.0dev0
diff --git a/environments/dpctl_pkg.yml b/environments/dpctl_pkg.yml
index 6cea30d0e65a..16d295cdc09c 100644
--- a/environments/dpctl_pkg.yml
+++ b/environments/dpctl_pkg.yml
@@ -2,4 +2,4 @@ name: Install dpctl package
 channels:
   - dppy/label/dev
 dependencies:
-  - dpctl>=0.21.0dev0
+  - dpctl>=0.22.0dev0
diff --git a/pyproject.toml b/pyproject.toml
index 42e142f266e9..d659428877fc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,7 +50,7 @@ dependencies = [
   # "dpcpp-cpp-rt>=0.59.0",
   # "intel-cmplr-lib-rt>=0.59.0"
   # WARNING: use the latest dpctl dev version, otherwise stable w/f will fail
-  "dpctl>=0.21.0dev0",
+  "dpctl>=0.22.0dev0",
   "numpy>=1.26.0"
 ]
 description = "Data Parallel Extension for NumPy"

From 7d4d2a0a4dbb3c4681e43e80f975e2f291c268e0 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Wed, 21 Jan 2026 16:51:36 +0100
Subject: [PATCH 12/16] Clarify `axes` keyword in `tensordot` function (#2733)

The PR extends docstrings to clarify behavior on repeated values passed
in `axes` to `tensordot` functions.
Also an explicit test was added to validate the exception raised on the
repeated axes.
---
 CHANGELOG.md                     |  1 +
 dpnp/dpnp_iface_linearalgebra.py | 14 ++++++++++++--
 dpnp/linalg/dpnp_iface_linalg.py | 12 ++++++++++--
 dpnp/tests/test_product.py       |  7 +++++++
 4 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3507b51efc6f..6e098c365d72 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -44,6 +44,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Compile indexing extension with `-fno-sycl-id-queries-fit-in-int` to support huge arrays [#2721](https://github.com/IntelPython/dpnp/pull/2721)
 * Updated `dpnp.fix` to reuse `dpnp.trunc` internally [#2722](https://github.com/IntelPython/dpnp/pull/2722)
 * Changed the build scripts and documentation due to `python setup.py develop` deprecation notice [#2716](https://github.com/IntelPython/dpnp/pull/2716)
+* Clarified behavior on repeated `axes` in `dpnp.tensordot` and `dpnp.linalg.tensordot` functions [#2733](https://github.com/IntelPython/dpnp/pull/2733)
 
 ### Deprecated
 
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index a0068a3597e5..acb123473482 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -1121,7 +1121,7 @@ def outer(a, b, out=None):
     return result
 
 
-def tensordot(a, b, axes=2):
+def tensordot(a, b, /, *, axes=2):
     r"""
     Compute tensor dot product along specified axes.
 
@@ -1148,7 +1148,10 @@ def tensordot(a, b, axes=2):
           axes must match.
         * (2,) array_like: A list of axes to be summed over, first sequence
           applying to `a`, second to `b`. Both elements array_like must be of
-          the same length.
+          the same length. Each axis may appear at most once; repeated axes are
+          not allowed.
+
+        Default: ``2``.
 
     Returns
     -------
@@ -1178,6 +1181,13 @@ def tensordot(a, b, axes=2):
     two sequences of the same length, with the first axis to sum over given
     first in both sequences, the second axis second, and so forth.
 
+    For example, if ``a.shape == (2, 3, 4)`` and ``b.shape == (3, 4, 5)``, then
+    ``axes=([1, 2], [0, 1])`` sums over the ``(3, 4)`` dimensions of both
+    arrays and produces an output of shape ``(2, 5)``.
+
+    Each summation axis corresponds to a distinct contraction index; repeating
+    an axis (for example ``axes=([1, 1], [0, 0])``) is invalid.
+
     The shape of the result consists of the non-contracted axes of the
     first tensor, followed by the non-contracted axes of the second.
 
diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index 95bb849afa3c..6959565ecf17 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -1975,9 +1975,10 @@ def tensordot(a, b, /, *, axes=2):
           axes must match.
         * (2,) array_like: A list of axes to be summed over, first sequence
           applying to `a`, second to `b`. Both elements array_like must be of
-          the same length.
+          the same length. Each axis may appear at most once; repeated axes are
+          not allowed.
 
-          Default: ``2``.
+        Default: ``2``.
 
     Returns
     -------
@@ -2007,6 +2008,13 @@ def tensordot(a, b, /, *, axes=2):
     two sequences of the same length, with the first axis to sum over given
     first in both sequences, the second axis second, and so forth.
 
+    For example, if ``a.shape == (2, 3, 4)`` and ``b.shape == (3, 4, 5)``, then
+    ``axes=([1, 2], [0, 1])`` sums over the ``(3, 4)`` dimensions of both
+    arrays and produces an output of shape ``(2, 5)``.
+
+    Each summation axis corresponds to a distinct contraction index; repeating
+    an axis (for example ``axes=([1, 1], [0, 0])``) is invalid.
+
     The shape of the result consists of the non-contracted axes of the
     first tensor, followed by the non-contracted axes of the second.
 
diff --git a/dpnp/tests/test_product.py b/dpnp/tests/test_product.py
index 763049e8791e..afe767a5e5d9 100644
--- a/dpnp/tests/test_product.py
+++ b/dpnp/tests/test_product.py
@@ -1842,6 +1842,13 @@ def test_error(self):
         with pytest.raises(ValueError):
             dpnp.tensordot(dpnp.arange(4), dpnp.array(5), axes=-1)
 
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    def test_repeated_axes(self, xp):
+        a = xp.ones((2, 3, 3))
+        b = xp.ones((3, 3, 4))
+        with pytest.raises(ValueError):
+            xp.tensordot(a, b, axes=([1, 1], [0, 0]))
+
 
 class TestVdot:
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))

From 626bd3badf3969c06dcc026877b9c2a94c9d4dd6 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Thu, 22 Jan 2026 11:28:06 +0100
Subject: [PATCH 13/16] Resolve compute follows data issue in logspace and
 geomspace functions (#2723)

The PR closes #2352 and closes #2353.

This PR improves implementation of `dpnp.logspace` and `dpnp.geomspace`
functions based on the latest changes added to NumPy.
And it also updates logic of how input array_like arguments handled to
follow compute follows data approach.
If no `device` or `sycl_queue` keyword passed, then an array creation
function should allocate result based on input arguments (like `start`
and `stop`).
Otherwise all input arrays need to be moved to the specified queue and
resulting array must be allocated there.
---
 CHANGELOG.md                         |   1 +
 dpnp/dpnp_algo/dpnp_arraycreation.py | 155 +++++++++---------
 dpnp/tests/test_arraycreation.py     | 225 +++++++++++++--------------
 dpnp/tests/test_sycl_queue.py        |  11 +-
 4 files changed, 197 insertions(+), 195 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6e098c365d72..61c82dedba5a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -65,6 +65,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Corrected a phonetic spelling issue due to incorrect using of `a nd` in docstrings [#2719](https://github.com/IntelPython/dpnp/pull/2719)
 * Resolved an issue causing `dpnp.linspace` to return an incorrect output shape when inputs were passed as arrays [#2712](https://github.com/IntelPython/dpnp/pull/2712)
 * Resolved an issue where `dpnp` always returns the base allocation pointer, when the view start is expected [#2651](https://github.com/IntelPython/dpnp/pull/2651)
+* Fixed an issue causing an exception in `dpnp.geomspace` and `dpnp.logspace` when called with explicit `device` keyword but any input array is allocated on another device [#2723](https://github.com/IntelPython/dpnp/pull/2723)
 
 ### Security
 
diff --git a/dpnp/dpnp_algo/dpnp_arraycreation.py b/dpnp/dpnp_algo/dpnp_arraycreation.py
index 009586906099..d94a031801f3 100644
--- a/dpnp/dpnp_algo/dpnp_arraycreation.py
+++ b/dpnp/dpnp_algo/dpnp_arraycreation.py
@@ -46,11 +46,46 @@
 
 
 def _as_usm_ndarray(a, usm_type, sycl_queue):
+    """Converts input object to `dpctl.tensor.usm_ndarray`"""
+
     if isinstance(a, dpnp_array):
-        return a.get_array()
+        a = a.get_array()
     return dpt.asarray(a, usm_type=usm_type, sycl_queue=sycl_queue)
 
 
+def _check_has_zero_val(a):
+    """Check if any element in input object is equal to zero"""
+
+    if dpnp.isscalar(a):
+        if a == 0:
+            return True
+    elif hasattr(a, "any"):
+        if (a == 0).any():
+            return True
+    elif (numpy.array(a) == 0).any():
+        return True
+    return False
+
+
+def _get_usm_allocations(objs, device=None, usm_type=None, sycl_queue=None):
+    """
+    Get common USM allocations based on a list of input objects and an explicit
+    device, a SYCL queue, or a USM type if specified.
+
+    """
+
+    alloc_usm_type, alloc_sycl_queue = get_usm_allocations(objs)
+
+    if sycl_queue is None and device is None:
+        sycl_queue = alloc_sycl_queue
+
+    if usm_type is None:
+        usm_type = alloc_usm_type or "device"
+    return usm_type, dpnp.get_normalized_queue_device(
+        sycl_queue=sycl_queue, device=device
+    )
+
+
 def dpnp_geomspace(
     start,
     stop,
@@ -62,76 +97,57 @@ def dpnp_geomspace(
     endpoint=True,
     axis=0,
 ):
-    usm_type_alloc, sycl_queue_alloc = get_usm_allocations([start, stop])
-
-    if sycl_queue is None and device is None:
-        sycl_queue = sycl_queue_alloc
-    sycl_queue_normalized = dpnp.get_normalized_queue_device(
-        sycl_queue=sycl_queue, device=device
+    usm_type, sycl_queue = _get_usm_allocations(
+        [start, stop], device=device, usm_type=usm_type, sycl_queue=sycl_queue
     )
 
-    if usm_type is None:
-        _usm_type = "device" if usm_type_alloc is None else usm_type_alloc
-    else:
-        _usm_type = usm_type
+    if _check_has_zero_val(start) or _check_has_zero_val(stop):
+        raise ValueError("Geometric sequence cannot include zero")
 
-    start = _as_usm_ndarray(start, _usm_type, sycl_queue_normalized)
-    stop = _as_usm_ndarray(stop, _usm_type, sycl_queue_normalized)
+    start = dpnp.array(start, usm_type=usm_type, sycl_queue=sycl_queue)
+    stop = dpnp.array(stop, usm_type=usm_type, sycl_queue=sycl_queue)
 
     dt = numpy.result_type(start, stop, float(num))
-    dt = map_dtype_to_device(dt, sycl_queue_normalized.sycl_device)
+    dt = map_dtype_to_device(dt, sycl_queue.sycl_device)
     if dtype is None:
         dtype = dt
 
-    if dpnp.any(start == 0) or dpnp.any(stop == 0):
-        raise ValueError("Geometric sequence cannot include zero")
+    # promote both arguments to the same dtype
+    start = start.astype(dt, copy=False)
+    stop = stop.astype(dt, copy=False)
 
-    out_sign = dpt.ones(
-        dpt.broadcast_arrays(start, stop)[0].shape,
-        dtype=dt,
-        usm_type=_usm_type,
-        sycl_queue=sycl_queue_normalized,
-    )
-    # Avoid negligible real or imaginary parts in output by rotating to
-    # positive real, calculating, then undoing rotation
-    if dpnp.issubdtype(dt, dpnp.complexfloating):
-        all_imag = (start.real == 0.0) & (stop.real == 0.0)
-        if dpnp.any(all_imag):
-            start[all_imag] = start[all_imag].imag
-            stop[all_imag] = stop[all_imag].imag
-            out_sign[all_imag] = 1j
-
-    both_negative = (dpt.sign(start) == -1) & (dpt.sign(stop) == -1)
-    if dpnp.any(both_negative):
-        dpt.negative(start[both_negative], out=start[both_negative])
-        dpt.negative(stop[both_negative], out=stop[both_negative])
-        dpt.negative(out_sign[both_negative], out=out_sign[both_negative])
-
-    log_start = dpt.log10(start)
-    log_stop = dpt.log10(stop)
+    # Allow negative real values and ensure a consistent result for complex
+    # (including avoiding negligible real or imaginary parts in output) by
+    # rotating start to positive real, calculating, then undoing rotation.
+    out_sign = dpnp.sign(start)
+    start = start / out_sign
+    stop = stop / out_sign
+
+    log_start = dpnp.log10(start)
+    log_stop = dpnp.log10(stop)
     res = dpnp_logspace(
         log_start,
         log_stop,
         num=num,
         endpoint=endpoint,
         base=10.0,
-        dtype=dtype,
-        usm_type=_usm_type,
-        sycl_queue=sycl_queue_normalized,
-    ).get_array()
+        dtype=dt,
+        usm_type=usm_type,
+        sycl_queue=sycl_queue,
+    )
 
+    # Make sure the endpoints match the start and stop arguments. This is
+    # necessary because np.exp(np.log(x)) is not necessarily equal to x.
     if num > 0:
         res[0] = start
         if num > 1 and endpoint:
             res[-1] = stop
 
-    res = out_sign * res
+    res *= out_sign
 
     if axis != 0:
-        res = dpt.moveaxis(res, 0, axis)
-
-    res = dpt.astype(res, dtype, copy=False)
-    return dpnp_array._create_from_usm_ndarray(res)
+        res = dpnp.moveaxis(res, 0, axis)
+    return res.astype(dtype, copy=False)
 
 
 def dpnp_linspace(
@@ -264,45 +280,36 @@ def dpnp_logspace(
     dtype=None,
     axis=0,
 ):
-    if not dpnp.isscalar(base):
-        usm_type_alloc, sycl_queue_alloc = get_usm_allocations(
-            [start, stop, base]
-        )
-
-        if sycl_queue is None and device is None:
-            sycl_queue = sycl_queue_alloc
-        sycl_queue = dpnp.get_normalized_queue_device(
-            sycl_queue=sycl_queue, device=device
-        )
-
-        if usm_type is None:
-            usm_type = "device" if usm_type_alloc is None else usm_type_alloc
-        else:
-            usm_type = usm_type
+    usm_type, sycl_queue = _get_usm_allocations(
+        [start, stop, base],
+        device=device,
+        usm_type=usm_type,
+        sycl_queue=sycl_queue,
+    )
 
-        start = _as_usm_ndarray(start, usm_type, sycl_queue)
-        stop = _as_usm_ndarray(stop, usm_type, sycl_queue)
-        base = _as_usm_ndarray(base, usm_type, sycl_queue)
+    if not dpnp.isscalar(base):
+        base = dpnp.array(base, usm_type=usm_type, sycl_queue=sycl_queue)
+        start = dpnp.array(start, usm_type=usm_type, sycl_queue=sycl_queue)
+        stop = dpnp.array(stop, usm_type=usm_type, sycl_queue=sycl_queue)
 
-        [start, stop, base] = dpt.broadcast_arrays(start, stop, base)
-        base = dpt.expand_dims(base, axis=axis)
+        start, stop, base = dpnp.broadcast_arrays(start, stop, base)
+        base = dpnp.expand_dims(base, axis=axis)
 
-    # assume res as not a tuple, because retstep is False
+    # assume `res` as not a tuple, because retstep is False
     res = dpnp_linspace(
         start,
         stop,
         num=num,
-        device=device,
         usm_type=usm_type,
         sycl_queue=sycl_queue,
         endpoint=endpoint,
         axis=axis,
-    ).get_array()
+    )
 
-    dpt.pow(base, res, out=res)
+    dpnp.pow(base, res, out=res)
     if dtype is not None:
-        res = dpt.astype(res, dtype, copy=False)
-    return dpnp_array._create_from_usm_ndarray(res)
+        res = res.astype(dtype, copy=False)
+    return res
 
 
 class dpnp_nd_grid:
diff --git a/dpnp/tests/test_arraycreation.py b/dpnp/tests/test_arraycreation.py
index 65747488310d..eb20f9b3ffe5 100644
--- a/dpnp/tests/test_arraycreation.py
+++ b/dpnp/tests/test_arraycreation.py
@@ -21,9 +21,6 @@
     get_array,
     get_float_dtypes,
     has_support_aspect64,
-    is_lts_driver,
-    is_tgllp_iris_xe,
-    is_win_platform,
 )
 from .third_party.cupy import testing
 
@@ -85,6 +82,62 @@ def test_validate_positional_args(self, xp):
         )
 
 
+class TestGeomspace:
+    @pytest.mark.parametrize("sign", [-1, 1])
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
+    @pytest.mark.parametrize("num", [2, 4, 8, 3, 9, 27])
+    @pytest.mark.parametrize("endpoint", [True, False])
+    def test_basic(self, sign, dtype, num, endpoint):
+        start = 2 * sign
+        stop = 127 * sign
+
+        func = lambda xp: xp.geomspace(
+            start, stop, num, endpoint=endpoint, dtype=dtype
+        )
+
+        np_res = func(numpy)
+        dpnp_res = func(dpnp)
+
+        assert_allclose(dpnp_res, np_res, rtol=1e-06)
+
+    @pytest.mark.parametrize("start", [1j, 1 + 1j])
+    @pytest.mark.parametrize("stop", [10j, 10 + 10j])
+    def test_complex(self, start, stop):
+        func = lambda xp: xp.geomspace(start, stop, num=10)
+        np_res = func(numpy)
+        dpnp_res = func(dpnp)
+        assert_allclose(dpnp_res, np_res, rtol=1e-06)
+
+    @pytest.mark.parametrize("axis", [0, 1])
+    def test_axis(self, axis):
+        func = lambda xp: xp.geomspace([2, 3], [20, 15], num=10, axis=axis)
+        np_res = func(numpy)
+        dpnp_res = func(dpnp)
+        assert_allclose(dpnp_res, np_res, rtol=1e-06)
+
+    def test_num_zero(self):
+        func = lambda xp: xp.geomspace(1, 10, num=0, endpoint=False)
+        np_res = func(numpy)
+        dpnp_res = func(dpnp)
+        assert_allclose(dpnp_res, np_res)
+
+    @pytest.mark.parametrize(
+        "start, stop, num",
+        [
+            (0, 5, 3),
+            (2, 0, 3),
+            (0, 0, 3),
+            (dpnp.array([0]), 7, 10),
+            (-2, numpy.array([[0]]), 7),
+            ([2, 4, 0], 3, 5),
+            (10, [[1, 0], [2, 3]], 3),
+        ],
+    )
+    def test_zero_error(self, start, stop, num):
+        with pytest.raises(ValueError):
+            dpnp.geomspace(start, stop, num)
+
+
 class TestLinspace:
     @pytest.mark.parametrize("start", [0, -5, 10, -2.5, 9.7])
     @pytest.mark.parametrize("stop", [0, 10, -2, 20.5, 120])
@@ -210,6 +263,61 @@ def test_float_num(self, xp):
             _ = xp.linspace(0, 1, num=2.5)
 
 
+class TestLogspace:
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
+    @pytest.mark.parametrize("num", [2, 4, 8, 3, 9, 27])
+    @pytest.mark.parametrize("endpoint", [True, False])
+    def test_basic(self, dtype, num, endpoint):
+        start = 2
+        stop = 5
+        base = 2
+
+        func = lambda xp: xp.logspace(
+            start, stop, num, endpoint=endpoint, dtype=dtype, base=base
+        )
+
+        np_res = func(numpy)
+        dpnp_res = func(dpnp)
+        assert_allclose(dpnp_res, np_res, rtol=1e-06)
+
+    @testing.with_requires("numpy>=1.25.0")
+    @pytest.mark.parametrize("axis", [0, 1])
+    def test_axis(self, axis):
+        func = lambda xp: xp.logspace(
+            [2, 3], [20, 15], num=2, base=[[1, 3], [5, 7]], axis=axis
+        )
+        assert_dtype_allclose(func(dpnp), func(numpy))
+
+    def test_list_input(self):
+        expected = numpy.logspace([0], [2], base=[5])
+        result = dpnp.logspace([0], [2], base=[5])
+        assert_dtype_allclose(result, expected)
+
+
+class TestSpaceLike:
+    @pytest.mark.parametrize("func", ["geomspace", "linspace", "logspace"])
+    @pytest.mark.parametrize(
+        "start_dtype", [numpy.float64, numpy.float32, numpy.int64, numpy.int32]
+    )
+    @pytest.mark.parametrize(
+        "stop_dtype", [numpy.float64, numpy.float32, numpy.int64, numpy.int32]
+    )
+    def test_numpy_dtype(self, func, start_dtype, stop_dtype):
+        start = numpy.array([1, 2, 3], dtype=start_dtype)
+        stop = numpy.array([11, 7, -2], dtype=stop_dtype)
+        getattr(dpnp, func)(start, stop, 10)
+
+    @pytest.mark.parametrize("xp", [dpnp, numpy])
+    @pytest.mark.parametrize("func", ["geomspace", "logspace"])
+    @pytest.mark.parametrize(
+        "start, stop, num",
+        [(2, 5, -3), ([2, 3], 5, -3)],
+    )
+    def test_space_num_error(self, xp, func, start, stop, num):
+        with pytest.raises(ValueError):
+            getattr(xp, func)(start, stop, num)
+
+
 class TestTrace:
     @pytest.mark.parametrize("a_sh", [(3, 4), (2, 2, 2)])
     @pytest.mark.parametrize(
@@ -871,19 +979,6 @@ def test_dpctl_tensor_input(func, args):
         assert_array_equal(X, Y)
 
 
-@pytest.mark.parametrize("func", ["geomspace", "linspace", "logspace"])
-@pytest.mark.parametrize(
-    "start_dtype", [numpy.float64, numpy.float32, numpy.int64, numpy.int32]
-)
-@pytest.mark.parametrize(
-    "stop_dtype", [numpy.float64, numpy.float32, numpy.int64, numpy.int32]
-)
-def test_space_numpy_dtype(func, start_dtype, stop_dtype):
-    start = numpy.array([1, 2, 3], dtype=start_dtype)
-    stop = numpy.array([11, 7, -2], dtype=stop_dtype)
-    getattr(dpnp, func)(start, stop, 10)
-
-
 @pytest.mark.parametrize(
     "arrays",
     [[], [[1]], [[1, 2, 3], [4, 5, 6]], [[1, 2], [3, 4], [5, 6]]],
@@ -908,104 +1003,6 @@ def test_set_shape(shape):
     assert_array_equal(na, da)
 
 
-def test_geomspace_zero_error():
-    with pytest.raises(ValueError):
-        dpnp.geomspace(0, 5, 3)
-        dpnp.geomspace(2, 0, 3)
-        dpnp.geomspace(0, 0, 3)
-
-
-def test_space_num_error():
-    with pytest.raises(ValueError):
-        dpnp.geomspace(2, 5, -3)
-        dpnp.logspace(2, 5, -3)
-        dpnp.geomspace([2, 3], 5, -3)
-        dpnp.logspace([2, 3], 5, -3)
-
-
-@pytest.mark.parametrize("sign", [-1, 1])
-@pytest.mark.parametrize("dtype", get_all_dtypes())
-@pytest.mark.parametrize("num", [2, 4, 8, 3, 9, 27])
-@pytest.mark.parametrize("endpoint", [True, False])
-def test_geomspace(sign, dtype, num, endpoint):
-    start = 2 * sign
-    stop = 127 * sign
-
-    func = lambda xp: xp.geomspace(
-        start, stop, num, endpoint=endpoint, dtype=dtype
-    )
-
-    np_res = func(numpy)
-    dpnp_res = func(dpnp)
-
-    assert_allclose(dpnp_res, np_res, rtol=1e-06)
-
-
-@pytest.mark.parametrize("start", [1j, 1 + 1j])
-@pytest.mark.parametrize("stop", [10j, 10 + 10j])
-def test_geomspace_complex(start, stop):
-    func = lambda xp: xp.geomspace(start, stop, num=10)
-    np_res = func(numpy)
-    dpnp_res = func(dpnp)
-    assert_allclose(dpnp_res, np_res, rtol=1e-06)
-
-
-@pytest.mark.parametrize("axis", [0, 1])
-def test_geomspace_axis(axis):
-    func = lambda xp: xp.geomspace([2, 3], [20, 15], num=10, axis=axis)
-    np_res = func(numpy)
-    dpnp_res = func(dpnp)
-    assert_allclose(dpnp_res, np_res, rtol=1e-06)
-
-
-def test_geomspace_num0():
-    func = lambda xp: xp.geomspace(1, 10, num=0, endpoint=False)
-    np_res = func(numpy)
-    dpnp_res = func(dpnp)
-    assert_allclose(dpnp_res, np_res)
-
-
-@pytest.mark.parametrize("dtype", get_all_dtypes())
-@pytest.mark.parametrize("num", [2, 4, 8, 3, 9, 27])
-@pytest.mark.parametrize("endpoint", [True, False])
-def test_logspace(dtype, num, endpoint):
-    if not is_win_platform() and is_tgllp_iris_xe() and is_lts_driver():
-        if (
-            dpnp.issubdtype(dtype, dpnp.integer)
-            and num in [8, 27]
-            and endpoint is True
-        ):
-            pytest.skip("SAT-7978")
-
-    start = 2
-    stop = 5
-    base = 2
-
-    func = lambda xp: xp.logspace(
-        start, stop, num, endpoint=endpoint, dtype=dtype, base=base
-    )
-
-    np_res = func(numpy)
-    dpnp_res = func(dpnp)
-
-    assert_allclose(dpnp_res, np_res, rtol=1e-06)
-
-
-@testing.with_requires("numpy>=1.25.0")
-@pytest.mark.parametrize("axis", [0, 1])
-def test_logspace_axis(axis):
-    func = lambda xp: xp.logspace(
-        [2, 3], [20, 15], num=2, base=[[1, 3], [5, 7]], axis=axis
-    )
-    assert_dtype_allclose(func(dpnp), func(numpy))
-
-
-def test_logspace_list_input():
-    expected = numpy.logspace([0], [2], base=[5])
-    result = dpnp.logspace([0], [2], base=[5])
-    assert_dtype_allclose(result, expected)
-
-
 @pytest.mark.parametrize(
     "data", [(), 1, (2, 3), [4], numpy.array(5), numpy.array([6, 7])]
 )
diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py
index 0bd4d6b53337..30256752ba06 100644
--- a/dpnp/tests/test_sycl_queue.py
+++ b/dpnp/tests/test_sycl_queue.py
@@ -133,10 +133,8 @@ def test_array_creation_from_array(func, args, device_x, device_y):
     assert_sycl_queue_equal(y.sycl_queue, x.sycl_queue)
 
     # cross device
-    # TODO: include geomspace when issue dpnp#2352 is resolved
-    if func != "geomspace":
-        y = getattr(dpnp, func)(*args, device=device_y)
-        assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)
+    y = getattr(dpnp, func)(*args, device=device_y)
+    assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)
 
 
 @pytest.mark.parametrize("device_x", valid_dev, ids=dev_ids)
@@ -148,10 +146,9 @@ def test_array_creation_logspace_base(device_x, device_y):
     y = dpnp.logspace(0, 8, 4, base=x[1:3])
     assert_sycl_queue_equal(y.sycl_queue, x.sycl_queue)
 
-    # TODO: include geomspace when issue dpnp#2353 is resolved
     # cross device
-    # y = dpnp.logspace(0, 8, 4, base=x[1:3], device=device_y)
-    # assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)
+    y = dpnp.logspace(0, 8, 4, base=x[1:3], device=device_y)
+    assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)
 
 
 @pytest.mark.parametrize("device", valid_dev + [None], ids=dev_ids + [None])

From 865b53e2584849cfe42094576fb616465f36ff42 Mon Sep 17 00:00:00 2001
From: vlad-perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 22 Jan 2026 14:59:55 +0100
Subject: [PATCH 14/16] Deprecate `dpnp.fix()` (#2730)

This PR deprecates `dpnp.fix()` in favor of `dpnp.trunc` (following
NumPy 2.5)
It adds `DPNPDeprecatedUnaryFunc` wrapper and updates tests to ignore
DeprecationWarning

Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com>
---
 CHANGELOG.md                                  |  1 +
 dpnp/dpnp_algo/dpnp_elementwise_common.py     | 27 +++++++++++++++++++
 dpnp/dpnp_iface_mathematical.py               | 10 ++++++-
 dpnp/tests/test_mathematical.py               | 26 ++++++++++++++++--
 dpnp/tests/test_sycl_queue.py                 |  6 ++++-
 dpnp/tests/test_usm_type.py                   |  6 ++++-
 .../cupy/math_tests/test_rounding.py          |  1 +
 7 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 61c82dedba5a..69b06cb64bf8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -51,6 +51,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * `dpnp.asfarray` is deprecated. Use `dpnp.asarray` with an appropriate dtype instead [#2650](https://github.com/IntelPython/dpnp/pull/2650)
 * Passing the output array ``out`` positionally to `dpnp.minimum` and `dpnp.maximum` is deprecated. Pass the output with the keyword form, e.g. ``dpnp.minimum(a, b, out=c)`` [#2659](https://github.com/IntelPython/dpnp/pull/2659)
 * `dpnp.ndarray.T` property is deprecated for not two-dimensional array to be compatible with the Python array API standard. To achieve a similar behavior when ``a.ndim != 2``, either ``a.transpose()``, or ``a.mT`` (swaps the last two axes only), or ``dpnp.permute_dims(a, range(a.ndim)[::-1])`` can be used [#2681](https://github.com/IntelPython/dpnp/pull/2681)
+* `dpnp.fix` is deprecated. Use `dpnp.trunc` instead, which provides identical functionality [#2730](https://github.com/IntelPython/dpnp/pull/2730)
 
 ### Removed
 
diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
index c404d71dfbc6..57bf50422fa0 100644
--- a/dpnp/dpnp_algo/dpnp_elementwise_common.py
+++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -60,6 +60,7 @@
     "DPNPBinaryFunc",
     "DPNPBinaryFuncOutKw",
     "DPNPBinaryTwoOutputsFunc",
+    "DPNPDeprecatedUnaryFunc",
     "DPNPImag",
     "DPNPReal",
     "DPNPRound",
@@ -230,6 +231,32 @@ def _unpack_out_kw(self, out):
         return out
 
 
+class DPNPDeprecatedUnaryFunc(DPNPUnaryFunc):
+    """
+    Class that implements a deprecated unary element-wise function.
+
+    Parameters
+    ----------
+    deprecated_msg : {str, None}, optional
+        Warning message to emit. If None, no warning is issued.
+
+        Default: ``None``.
+
+    """
+
+    def __init__(self, *args, deprecated_msg=None, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._deprecated_msg = deprecated_msg
+
+    @wraps(DPNPUnaryFunc.__call__)
+    def __call__(self, *args, **kwargs):
+        if self._deprecated_msg:
+            warnings.warn(
+                self._deprecated_msg, DeprecationWarning, stacklevel=2
+            )
+        return super().__call__(*args, **kwargs)
+
+
 class DPNPUnaryTwoOutputsFunc(UnaryElementwiseFunc):
     """
     Class that implements unary element-wise functions with two output arrays.
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 63aee599d9b9..3e6a4b0ed121 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -66,6 +66,7 @@
     DPNPBinaryFunc,
     DPNPBinaryFuncOutKw,
     DPNPBinaryTwoOutputsFunc,
+    DPNPDeprecatedUnaryFunc,
     DPNPImag,
     DPNPReal,
     DPNPRound,
@@ -1853,6 +1854,12 @@ def ediff1d(ary, to_end=None, to_begin=None):
 :obj:`dpnp.floor` : Return the floor of the input, element-wise.
 :obj:`dpnp.ceil` : Return the ceiling of the input, element-wise.
 
+Warning
+-------
+This function is deprecated. It is recommended to use
+:func:`dpnp.trunc` instead, as it provides the same functionality of
+truncating decimal values to their integer parts.
+
 Examples
 --------
 >>> import dpnp as np
@@ -1867,13 +1874,14 @@ def ediff1d(ary, to_end=None, to_begin=None):
 """
 
 # reuse trunc backend implementation for fix
-fix = DPNPUnaryFunc(
+fix = DPNPDeprecatedUnaryFunc(
     "fix",
     ti._trunc_result_type,
     ti._trunc,
     _FIX_DOCSTRING,
     mkl_fn_to_call="_mkl_trunc_to_call",
     mkl_impl_fn="_trunc",
+    deprecated_msg="dpnp.fix is deprecated in favor of dpnp.trunc",
 )
 
 
diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index e38cb149c303..d443b71adff8 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -2029,7 +2029,18 @@ def test_out_dtype(self, func):
 
     @pytest.mark.parametrize("xp", [numpy, dpnp])
     @pytest.mark.parametrize(
-        "func", ["abs", "fix", "round", "add", "frexp", "divmod"]
+        "func",
+        [
+            "abs",
+            pytest.param(
+                "fix",
+                marks=pytest.mark.filterwarnings("ignore::DeprecationWarning"),
+            ),
+            "round",
+            "add",
+            "frexp",
+            "divmod",
+        ],
     )
     def test_out_wrong_tuple_len(self, xp, func):
         if func == "round" and xp is numpy:
@@ -2544,7 +2555,18 @@ def test_projection(self, dtype):
         assert dpnp.allclose(result, expected)
 
 
-@pytest.mark.parametrize("func", ["ceil", "floor", "trunc", "fix"])
+@pytest.mark.parametrize(
+    "func",
+    [
+        "ceil",
+        "floor",
+        "trunc",
+        pytest.param(
+            "fix",
+            marks=pytest.mark.filterwarnings("ignore::DeprecationWarning"),
+        ),
+    ],
+)
 class TestRoundingFuncs:
     @testing.with_requires("numpy>=2.1.0")
     @pytest.mark.parametrize(
diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py
index 30256752ba06..cbd49873f617 100644
--- a/dpnp/tests/test_sycl_queue.py
+++ b/dpnp/tests/test_sycl_queue.py
@@ -261,7 +261,11 @@ def test_meshgrid(device):
         pytest.param("exp2", [0.0, 1.0, 2.0]),
         pytest.param("expm1", [1.0e-10, 1.0, 2.0, 4.0, 7.0]),
         pytest.param("fabs", [-1.2, 1.2]),
-        pytest.param("fix", [2.1, 2.9, -2.1, -2.9]),
+        pytest.param(
+            "fix",
+            [2.1, 2.9, -2.1, -2.9],
+            marks=pytest.mark.filterwarnings("ignore::DeprecationWarning"),
+        ),
         pytest.param("flatnonzero", [-2, -1, 0, 1, 2]),
         pytest.param("floor", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
         pytest.param("gradient", [1.0, 2.0, 4.0, 7.0, 11.0, 16.0]),
diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py
index fa4fc62e34c9..4fc0f2b958fa 100644
--- a/dpnp/tests/test_usm_type.py
+++ b/dpnp/tests/test_usm_type.py
@@ -572,7 +572,11 @@ def test_meshgrid(usm_type_x, usm_type_y):
         pytest.param("exp2", [0.0, 1.0, 2.0]),
         pytest.param("expm1", [1.0e-10, 1.0, 2.0, 4.0, 7.0]),
         pytest.param("fabs", [-1.2, 1.2]),
-        pytest.param("fix", [2.1, 2.9, -2.1, -2.9]),
+        pytest.param(
+            "fix",
+            [2.1, 2.9, -2.1, -2.9],
+            marks=pytest.mark.filterwarnings("ignore::DeprecationWarning"),
+        ),
         pytest.param("flatnonzero", [-2, -1, 0, 1, 2]),
         pytest.param("floor", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
         pytest.param("gradient", [1, 2, 4, 7, 11, 16]),
diff --git a/dpnp/tests/third_party/cupy/math_tests/test_rounding.py b/dpnp/tests/third_party/cupy/math_tests/test_rounding.py
index a2ad717f2500..10e79715dd10 100644
--- a/dpnp/tests/third_party/cupy/math_tests/test_rounding.py
+++ b/dpnp/tests/third_party/cupy/math_tests/test_rounding.py
@@ -66,6 +66,7 @@ def test_trunc(self):
         self.check_unary("trunc")
         self.check_unary_complex_unsupported("trunc")
 
+    @pytest.mark.filterwarnings("ignore::DeprecationWarning")
     @testing.with_requires("numpy>=2.1")
     def test_fix(self):
         self.check_unary("fix")

From dc86084f4926ac0e646a7f53a96c704a48904795 Mon Sep 17 00:00:00 2001
From: vlad-perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 23 Jan 2026 11:24:34 +0100
Subject: [PATCH 15/16] Unskip dlpack tests (#2711)

This PR reverts #2708 due to
[dpctl-2213](https://github.com/IntelPython/dpctl/issues/2213) has been
resolved
---
 dpnp/tests/test_sycl_queue.py                         | 1 -
 dpnp/tests/third_party/cupy/core_tests/test_dlpack.py | 4 ----
 2 files changed, 5 deletions(-)

diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py
index cbd49873f617..d1853579036a 100644
--- a/dpnp/tests/test_sycl_queue.py
+++ b/dpnp/tests/test_sycl_queue.py
@@ -1082,7 +1082,6 @@ def test_array_creation_from_dpctl(copy, device):
     assert isinstance(result, dpnp_array)
 
 
-@pytest.mark.skip("due to dpctl-2213")
 @pytest.mark.parametrize("device", valid_dev, ids=dev_ids)
 @pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True))
 @pytest.mark.parametrize("shape", [tuple(), (2,), (3, 0, 1), (2, 2, 2)])
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py
index 2df8cbfc5f7c..41df0a82e0a0 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py
@@ -82,7 +82,6 @@ def _get_stream(self, stream_name):
             return dpctl.SyclQueue()
         return dpctl.SyclQueue()
 
-    @pytest.mark.skip("due to dpctl-2213")
     @testing.for_all_dtypes(no_bool=False)
     def test_conversion(self, dtype):
         orig_array = _gen_array(dtype)
@@ -102,7 +101,6 @@ def test_from_dlpack_and_conv_errors(self):
             # for host copies.
             cupy.from_dlpack(orig_array, copy=True)
 
-    @pytest.mark.skip("due to dpctl-2213")
     @pytest.mark.parametrize(
         "kwargs, versioned",
         [
@@ -130,7 +128,6 @@ def test_conversion_max_version(self, kwargs, versioned):
         testing.assert_array_equal(orig_array, out_array)
         testing.assert_array_equal(orig_array.data.ptr, out_array.data.ptr)
 
-    @pytest.mark.skip("due to dpctl-2213")
     def test_conversion_device(self):
         orig_array = _gen_array("float32")
 
@@ -202,7 +199,6 @@ def test_conversion_device_to_cpu(self):
             )
             assert numpy.may_share_memory(arr_nocopy, arr1)
 
-    @pytest.mark.skip("due to dpctl-2213")
     def test_stream(self):
         allowed_streams = ["null", True]
         # if not cuda.runtime.is_hip:

From 77c448a954f4b01aac76487d9286efd0aaa1fd45 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sat, 24 Jan 2026 13:56:38 +0100
Subject: [PATCH 16/16] Weekly pre-commit autoupdate (#2737)

This PR updates the `.pre-commit-config.yaml` using `pre-commit
autoupdate`.
---
 .pre-commit-config.yaml                              |  4 ++--
 dpnp/__init__.py                                     |  1 -
 dpnp/dpnp_container.py                               |  1 -
 dpnp/dpnp_iface.py                                   |  1 +
 dpnp/dpnp_iface_arraycreation.py                     |  1 -
 dpnp/dpnp_iface_manipulation.py                      |  3 +--
 dpnp/linalg/__init__.py                              |  1 -
 dpnp/random/dpnp_random_state.py                     |  1 -
 dpnp/scipy/linalg/__init__.py                        |  1 -
 dpnp/scipy/linalg/_decomp_lu.py                      |  3 +--
 dpnp/scipy/linalg/_utils.py                          |  1 -
 dpnp/tests/third_party/cupy/core_tests/test_flags.py |  4 +---
 .../third_party/cupy/core_tests/test_function.py     |  8 ++------
 .../third_party/cupy/core_tests/test_include.py      | 10 ++--------
 dpnp/tests/third_party/cupy/test_init.py             | 12 ++++--------
 dpnp/tests/third_party/cupy/testing/_loops.py        | 10 ++++------
 dpnp/tests/third_party/cupy/testing/_pytest_impl.py  |  8 ++------
 examples/example1.py                                 |  1 -
 examples/example2.py                                 |  1 -
 examples/example4.py                                 |  1 -
 20 files changed, 20 insertions(+), 53 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6be33c740497..50b1175ffebc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 -   repo: https://github.com/PyCQA/bandit
-    rev: '1.9.2'
+    rev: '1.9.3'
     hooks:
     -   id: bandit
         pass_filenames: false
@@ -64,7 +64,7 @@ repos:
         additional_dependencies:
             - tomli
 -   repo: https://github.com/psf/black
-    rev: 25.12.0
+    rev: 26.1.0
     hooks:
     -   id: black
         exclude: "dpnp/_version.py"
diff --git a/dpnp/__init__.py b/dpnp/__init__.py
index 5ec230977f98..02420107972f 100644
--- a/dpnp/__init__.py
+++ b/dpnp/__init__.py
@@ -566,7 +566,6 @@
     kaiser,
 )
 
-
 # =============================================================================
 # Helper functions
 # =============================================================================
diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index f019989a2ca8..4975db17c717 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -35,7 +35,6 @@
 
 """
 
-
 import dpctl.tensor as dpt
 import dpctl.utils as dpu
 
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 4cdc74c75ca0..fba1a215756a 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -38,6 +38,7 @@
  - The functions parameters check
 
 """
+
 # pylint: disable=protected-access
 
 import os
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 50b425003764..8d4ebdd1a6c2 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -39,7 +39,6 @@
 
 """
 
-
 # pylint: disable=duplicate-code
 
 import operator
diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 3cec24a44159..9df5278bd16b 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -39,7 +39,6 @@
 
 """
 
-
 import math
 import operator
 import warnings
@@ -3418,7 +3417,7 @@ def rot90(m, k=1, axes=(0, 1)):
         return dpnp.flip(dpnp.flip(m, axes[0]), axes[1])
 
     axes_list = list(range(0, m_ndim))
-    (axes_list[axes[0]], axes_list[axes[1]]) = (
+    axes_list[axes[0]], axes_list[axes[1]] = (
         axes_list[axes[1]],
         axes_list[axes[0]],
     )
diff --git a/dpnp/linalg/__init__.py b/dpnp/linalg/__init__.py
index e04928393a8c..3fa27db33473 100644
--- a/dpnp/linalg/__init__.py
+++ b/dpnp/linalg/__init__.py
@@ -35,7 +35,6 @@
 
 """
 
-
 from .dpnp_iface_linalg import (
     LinAlgError,
     cholesky,
diff --git a/dpnp/random/dpnp_random_state.py b/dpnp/random/dpnp_random_state.py
index 560e2670932b..e49fe739aedd 100644
--- a/dpnp/random/dpnp_random_state.py
+++ b/dpnp/random/dpnp_random_state.py
@@ -36,7 +36,6 @@
 
 """
 
-
 import dpctl.utils as dpu
 import numpy
 
diff --git a/dpnp/scipy/linalg/__init__.py b/dpnp/scipy/linalg/__init__.py
index a97f2cfb829c..3afc08a6fdb9 100644
--- a/dpnp/scipy/linalg/__init__.py
+++ b/dpnp/scipy/linalg/__init__.py
@@ -35,7 +35,6 @@
 
 """
 
-
 from ._decomp_lu import lu_factor, lu_solve
 
 __all__ = [
diff --git a/dpnp/scipy/linalg/_decomp_lu.py b/dpnp/scipy/linalg/_decomp_lu.py
index d2a58fba14d6..292d7fffe4b4 100644
--- a/dpnp/scipy/linalg/_decomp_lu.py
+++ b/dpnp/scipy/linalg/_decomp_lu.py
@@ -39,7 +39,6 @@
 
 """
 
-
 import dpnp
 from dpnp.linalg.dpnp_utils_linalg import (
     assert_stacked_2d,
@@ -181,7 +180,7 @@ def lu_solve(lu_and_piv, b, trans=0, overwrite_b=False, check_finite=True):
 
     """
 
-    (lu, piv) = lu_and_piv
+    lu, piv = lu_and_piv
     dpnp.check_supported_arrays_type(lu, piv, b)
     assert_stacked_2d(lu)
     assert_stacked_square(lu)
diff --git a/dpnp/scipy/linalg/_utils.py b/dpnp/scipy/linalg/_utils.py
index be736e076d86..282c645d1095 100644
--- a/dpnp/scipy/linalg/_utils.py
+++ b/dpnp/scipy/linalg/_utils.py
@@ -37,7 +37,6 @@
 
 """
 
-
 # pylint: disable=no-name-in-module
 # pylint: disable=protected-access
 
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_flags.py b/dpnp/tests/third_party/cupy/core_tests/test_flags.py
index b2cc4fca59fb..3f416304a789 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_flags.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_flags.py
@@ -29,9 +29,7 @@ def test_key_error(self):
     def test_repr(self):
         assert """  C_CONTIGUOUS : 1
   F_CONTIGUOUS : 2
-  OWNDATA : 3""" == repr(
-            self.flags
-        )
+  OWNDATA : 3""" == repr(self.flags)
 
 
 @testing.parameterize(
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_function.py b/dpnp/tests/third_party/cupy/core_tests/test_function.py
index 2943fc2bcf02..5480cdf6e126 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_function.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_function.py
@@ -154,9 +154,7 @@ def test_custom_user_struct(self):
     offsets[3] = (unsigned long long)&ptr->d;
     offsets[4] = (unsigned long long)&ptr->e;
 }}
-""".format(
-            struct_definition=struct_definition
-        )
+""".format(struct_definition=struct_definition)
 
         itemsize = cupy.ndarray(shape=(1,), dtype=numpy.uint64)
         sizes = cupy.ndarray(shape=(5,), dtype=numpy.uint64)
@@ -213,9 +211,7 @@ def make_packed(basetype, N, itemsize):
     sum += s.e[0] + s.e[1] + s.e[2];
     x[i] = a[i] + sum;
 }}
-""".format(
-            struct_definition=struct_definition
-        )
+""".format(struct_definition=struct_definition)
 
         a_cpu = numpy.arange(24, dtype=numpy.float64)
         a = cupy.array(a_cpu)
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_include.py b/dpnp/tests/third_party/cupy/core_tests/test_include.py
index 9c6372cf2bcd..a45d2b40cbf4 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_include.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_include.py
@@ -18,25 +18,19 @@
 #include <cupy/hip_workaround.cuh>
 """
 
-_code_nvcc = (
-    _code_base
-    + """
+_code_nvcc = _code_base + """
 #include <cupy/type_dispatcher.cuh>
 
 int main() {
     return 0;
 }
 """
-)
 
-_code_nvrtc = (
-    _code_base
-    + """
+_code_nvrtc = _code_base + """
 
 __device__ void kernel() {
 }
 """
-)
 
 
 @pytest.mark.skipif(cupy.cuda.runtime.is_hip, reason="for CUDA")
diff --git a/dpnp/tests/third_party/cupy/test_init.py b/dpnp/tests/third_party/cupy/test_init.py
index 0a841ba28b21..a4f1ad78f7f5 100644
--- a/dpnp/tests/third_party/cupy/test_init.py
+++ b/dpnp/tests/third_party/cupy/test_init.py
@@ -36,11 +36,9 @@ def _run_script(code):
 
 
 def _test_cupy_available(self):
-    returncode, stdoutdata, stderrdata = _run_script(
-        """
+    returncode, stdoutdata, stderrdata = _run_script("""
 import dpnp as cupy
-print(cupy.is_available())"""
-    )
+print(cupy.is_available())""")
     assert returncode == 0, "stderr: {!r}".format(stderrdata)
     assert stdoutdata in (b"True\n", b"True\r\n", b"False\n", b"False\r\n")
     return stdoutdata == b"True\n" or stdoutdata == b"True\r\n"
@@ -49,14 +47,12 @@ def _test_cupy_available(self):
 class TestImportError(unittest.TestCase):
 
     def test_import_error(self):
-        returncode, stdoutdata, stderrdata = _run_script(
-            """
+        returncode, stdoutdata, stderrdata = _run_script("""
 try:
     import dpnp as cupy
 except Exception as e:
     print(type(e).__name__)
-"""
-        )
+""")
         assert returncode == 0, "stderr: {!r}".format(stderrdata)
         assert stdoutdata in (b"", b"RuntimeError\n")
 
diff --git a/dpnp/tests/third_party/cupy/testing/_loops.py b/dpnp/tests/third_party/cupy/testing/_loops.py
index 6a383780b9ca..63cd09147c4b 100644
--- a/dpnp/tests/third_party/cupy/testing/_loops.py
+++ b/dpnp/tests/third_party/cupy/testing/_loops.py
@@ -307,7 +307,7 @@ def decorator(impl):
         @_wraps_partial_xp(impl, name, sp_name, scipy_name)
         def test_func(*args, **kw):
             # Run cupy and numpy
-            (cupy_result, cupy_error, numpy_result, numpy_error) = (
+            cupy_result, cupy_error, numpy_result, numpy_error = (
                 _call_func_numpy_cupy(impl, args, kw, name, sp_name, scipy_name)
             )
             assert cupy_result is not None or cupy_error is not None
@@ -344,9 +344,7 @@ def test_func(*args, **kw):
                         raise AssertionError(
                             """ndarrays of different dtypes are returned.
 cupy: {}
-numpy: {}""".format(
-                                cupy_r.dtype, numpy_r.dtype
-                            )
+numpy: {}""".format(cupy_r.dtype, numpy_r.dtype)
                         )
 
             # Check contiguous
@@ -902,7 +900,7 @@ def decorator(impl):
         @_wraps_partial_xp(impl, name, sp_name, scipy_name)
         def test_func(*args, **kw):
             # Run cupy and numpy
-            (cupy_result, cupy_error, numpy_result, numpy_error) = (
+            cupy_result, cupy_error, numpy_result, numpy_error = (
                 _call_func_numpy_cupy(impl, args, kw, name, sp_name, scipy_name)
             )
 
@@ -958,7 +956,7 @@ def decorator(impl):
         @_wraps_partial_xp(impl, name, sp_name, scipy_name)
         def test_func(*args, **kw):
             # Run cupy and numpy
-            (cupy_result, cupy_error, numpy_result, numpy_error) = (
+            cupy_result, cupy_error, numpy_result, numpy_error = (
                 _call_func_numpy_cupy(impl, args, kw, name, sp_name, scipy_name)
             )
 
diff --git a/dpnp/tests/third_party/cupy/testing/_pytest_impl.py b/dpnp/tests/third_party/cupy/testing/_pytest_impl.py
index 97dfee5e174a..ecc4a6338eb5 100644
--- a/dpnp/tests/third_party/cupy/testing/_pytest_impl.py
+++ b/dpnp/tests/third_party/cupy/testing/_pytest_impl.py
@@ -19,14 +19,10 @@ def is_available():
 
 def check_available(feature):
     if not is_available():
-        raise RuntimeError(
-            """\
+        raise RuntimeError("""\
 cupy.testing: {} is not available.
 
-Reason: {}: {}""".format(
-                feature, type(_error).__name__, _error
-            )
-        )
+Reason: {}: {}""".format(feature, type(_error).__name__, _error))
 
 
 if is_available():
diff --git a/examples/example1.py b/examples/example1.py
index fdcbfbac9249..4d84c43c686c 100644
--- a/examples/example1.py
+++ b/examples/example1.py
@@ -37,7 +37,6 @@
 
 """
 
-
 import time
 
 import dpctl
diff --git a/examples/example2.py b/examples/example2.py
index 853f879a7f11..eb856a2bed80 100644
--- a/examples/example2.py
+++ b/examples/example2.py
@@ -37,7 +37,6 @@
 
 """
 
-
 import time
 
 import numpy
diff --git a/examples/example4.py b/examples/example4.py
index 0149c22f7ced..ea70ef18d612 100644
--- a/examples/example4.py
+++ b/examples/example4.py
@@ -34,7 +34,6 @@
 
 """
 
-
 import numpy
 
 """