From a852c207738838ff2d81217075f9f658c567cda7 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Apr 2026 16:57:00 -0500
Subject: [PATCH] Migrate memory resources from rmm::mr::device_memory_resource
 to CCCL resource concept

RMM 26.06 removes rmm::mr::device_memory_resource and adopts CCCL-native
memory resource concepts. This migrates all cuCascade resource classes:

- null_device_memory_resource: replaced virtual override with CCCL concept
  methods (allocate/deallocate with cuda::stream_ref, allocate_sync,
  deallocate_sync, operator==, get_property).
- numa_region_pinned_host_allocator: same pattern.
- fixed_size_host_memory_resource: same pattern.
- reservation_aware_resource_adaptor: same pattern.
- small_pinned_host_memory_resource: same pattern.
- memory_space: unique_ptr<device_memory_resource> replaced with
  cuda::mr::any_resource<cuda::mr::device_accessible> for owning storage;
  get_default_allocator returns device_async_resource_ref from any_resource.
- memory_reservation.hpp: moved template bodies out-of-line to avoid
  incomplete-type errors with memory_space forward declaration.
- common.hpp/cpp: updated factory typedefs and pool_memory_resource usage
  (no longer a template in 26.06).
- pool_memory_resource upstream passed as device_async_resource_ref.
- Test utilities updated for new resource concept.
---
 include/cucascade/data/common.hpp             |  1 -
 include/cucascade/memory/common.hpp           | 11 +--
 .../fixed_size_host_memory_resource.hpp       | 63 ++++++++---------
 .../cucascade/memory/memory_reservation.hpp   | 29 +++-----
 .../memory/memory_reservation_manager.hpp     |  4 +-
 include/cucascade/memory/memory_space.hpp     | 27 ++++++--
 .../memory/null_device_memory_resource.hpp    | 48 +++++++++----
 .../numa_region_pinned_host_allocator.hpp     | 59 +++++-----------
 .../reservation_aware_resource_adaptor.hpp    | 68 ++++++++++---------
 .../small_pinned_host_memory_resource.hpp     | 47 ++++++-------
 src/data/representation_converter.cpp         | 33 +++++----
 src/memory/common.cpp                         | 16 +++--
 .../fixed_size_host_memory_resource.cpp       | 17 ++---
 src/memory/memory_reservation.cpp             |  2 +-
 src/memory/memory_space.cpp                   | 36 ++++------
 .../numa_region_pinned_host_allocator.cpp     | 34 +++++++---
 .../reservation_aware_resource_adaptor.cpp    | 49 ++++++-------
 .../reservation_manager_configurator.cpp      |  5 +-
 .../small_pinned_host_memory_resource.cpp     | 19 +++---
 .../test_memory_reservation_manager.cpp       |  3 +-
 ...test_small_pinned_host_memory_resource.cpp |  4 +-
 test/unittest.cpp                             | 11 ++-
 test/utils/cudf_test_utils.cpp                | 55 ++++++++-------
 test/utils/mock_test_utils.hpp                | 23 ++++---
 test/utils/test_memory_resources.hpp          | 42 ++++++------
 25 files changed, 364 insertions(+), 342 deletions(-)
diff --git a/include/cucascade/data/common.hpp b/include/cucascade/data/common.hpp
index 6e3e623..6bd0ad0 100644
--- a/include/cucascade/data/common.hpp
+++ b/include/cucascade/data/common.hpp
@@ -20,7 +20,6 @@
 #include <cucascade/memory/memory_space.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
 
 #include <concepts>
 #include <cstddef>
diff --git a/include/cucascade/memory/common.hpp b/include/cucascade/memory/common.hpp
index 988aee0..9975a6c 100644
--- a/include/cucascade/memory/common.hpp
+++ b/include/cucascade/memory/common.hpp
@@ -18,7 +18,8 @@
 #pragma once
 
 #include <rmm/error.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
+
+#include <cuda/memory_resource>
 
 #include <cstdint>
 #include <cstring>
@@ -60,13 +61,13 @@ class memory_space_id {
 };
 
 using DeviceMemoryResourceFactoryFn =
-  std::function<std::unique_ptr<rmm::mr::device_memory_resource>(int device_id,
-                                                                 std::size_t capacity)>;
+  std::function<cuda::mr::any_resource<cuda::mr::device_accessible>(int device_id,
+                                                                    std::size_t capacity)>;
 
-std::unique_ptr<rmm::mr::device_memory_resource> make_default_gpu_memory_resource(
+cuda::mr::any_resource<cuda::mr::device_accessible> make_default_gpu_memory_resource(
   int device_id, std::size_t capacity);
 
-std::unique_ptr<rmm::mr::device_memory_resource> make_default_host_memory_resource(
+cuda::mr::any_resource<cuda::mr::device_accessible> make_default_host_memory_resource(
   int device_id, std::size_t capacity);
 
 DeviceMemoryResourceFactoryFn make_default_allocator_for_tier(Tier tier);
diff --git a/include/cucascade/memory/fixed_size_host_memory_resource.hpp b/include/cucascade/memory/fixed_size_host_memory_resource.hpp
index f211a6f..99c58ee 100644
--- a/include/cucascade/memory/fixed_size_host_memory_resource.hpp
+++ b/include/cucascade/memory/fixed_size_host_memory_resource.hpp
@@ -26,8 +26,6 @@
 #include <rmm/aligned.hpp>
 #include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
-#include <rmm/mr/pinned_host_memory_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cuda/memory_resource>
@@ -65,7 +63,7 @@ namespace memory {
  * Modified to derive from device_memory_resource instead of host_memory_resource for RMM
  * compatibility.
  */
-class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {
+class fixed_size_host_memory_resource {
  public:
   static constexpr std::size_t default_block_size = 1 << 20;  ///< Default block size (1MB)
   static constexpr std::size_t default_pool_size  = 128;      ///< Default number of blocks in pool
@@ -234,7 +232,7 @@ class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {
   /**
    * @brief Destructor - frees all allocated blocks.
    */
-  ~fixed_size_host_memory_resource() override;
+  ~fixed_size_host_memory_resource();
 
   [[nodiscard]] std::size_t get_total_allocated_bytes() const noexcept
   {
@@ -322,6 +320,34 @@ class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {
    */
   std::size_t get_peak_total_allocated_bytes() const;
 
+  void* allocate(cuda::stream_ref stream,
+                 std::size_t bytes,
+                 std::size_t alignment = alignof(std::max_align_t));
+
+  void deallocate(cuda::stream_ref stream,
+                  void* ptr,
+                  std::size_t bytes,
+                  std::size_t alignment = alignof(std::max_align_t)) noexcept;
+
+  void* allocate_sync(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
+  {
+    return allocate(cuda::stream_ref{}, bytes, alignment);
+  }
+
+  void deallocate_sync(void* ptr,
+                       std::size_t bytes,
+                       std::size_t alignment = alignof(std::max_align_t)) noexcept
+  {
+    deallocate(cuda::stream_ref{}, ptr, bytes, alignment);
+  }
+
+  [[nodiscard]] bool operator==(fixed_size_host_memory_resource const& other) const noexcept;
+
+  friend void get_property(fixed_size_host_memory_resource const&,
+                           cuda::mr::device_accessible) noexcept
+  {
+  }
+
  protected:
   /**
    * @brief grows reservation by a `bytes` size
@@ -340,35 +366,6 @@ class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {
 
   std::size_t do_reserve_upto(std::size_t bytes, std::size_t mem_limit);
 
-  /**
-   * @brief Allocate memory of the specified size.
-   *
-   * @param bytes Size in bytes (must be <= block_size_)
-   * @param stream CUDA stream (ignored for host memory)
-   * @return void* Pointer to allocated memory
-   * @throws rmm::logic_error if allocation size exceeds block size
-   * @throws rmm::out_of_memory if no free blocks are available and upstream allocation fails
-   */
-  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override;
-
-  /**
-   * @brief Deallocate memory.
-   *
-   * @param ptr Pointer to deallocate
-   * @param bytes Size in bytes (must be <= block_size_)
-   * @param stream CUDA stream (ignored for host memory)
-   */
-  void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override;
-
-  /**
-   * @brief Check if this resource is equal to another.
-   *
-   * @param other Other resource to compare
-   * @return bool True if equal
-   */
-  [[nodiscard]] bool do_is_equal(
-    const rmm::mr::device_memory_resource& other) const noexcept override;
-
  private:
   /**
    * @brief Expand the pool by allocating more blocks from upstream.
diff --git a/include/cucascade/memory/memory_reservation.hpp b/include/cucascade/memory/memory_reservation.hpp
index b881da3..e4371f4 100644
--- a/include/cucascade/memory/memory_reservation.hpp
+++ b/include/cucascade/memory/memory_reservation.hpp
@@ -22,7 +22,7 @@
 
 #include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <concepts>
 #include <memory>
@@ -42,23 +42,19 @@ class memory_space;
 
 template <Tier TIER>
 struct tier_memory_resource_trait {
-  using upstream_type = rmm::mr::device_memory_resource;
-  using type          = rmm::mr::device_memory_resource;
-  Tier tier           = TIER;
+  Tier tier = TIER;
 };
 
 template <>
 struct tier_memory_resource_trait<Tier::HOST> {
-  using upstream_type = rmm::mr::device_memory_resource;
-  using type          = fixed_size_host_memory_resource;
-  Tier tier           = Tier::HOST;
+  using type = fixed_size_host_memory_resource;
+  Tier tier  = Tier::HOST;
 };
 
 template <>
 struct tier_memory_resource_trait<Tier::GPU> {
-  using upstream_type = rmm::mr::device_memory_resource;
-  using type          = reservation_aware_resource_adaptor;
-  Tier tier           = Tier::GPU;
+  using type = reservation_aware_resource_adaptor;
+  Tier tier  = Tier::GPU;
 };
 
 //===----------------------------------------------------------------------===//
@@ -216,22 +212,15 @@ class reservation {
 
   [[nodiscard]] int device_id() const noexcept;
 
-  [[nodiscard]] rmm::mr::device_memory_resource* get_memory_resource() const noexcept;
+  [[nodiscard]] rmm::device_async_resource_ref get_memory_resource() const noexcept;
 
   [[nodiscard]] const memory_space& get_memory_space() const noexcept;
 
   template <typename T>
-    requires std::derived_from<T, rmm::mr::device_memory_resource>
-  T* get_memory_resource_as() const noexcept
-  {
-    return dynamic_cast<T*>(get_memory_resource());
-  }
+  T* get_memory_resource_as() const noexcept;
 
   template <Tier TIER>
-  auto* get_memory_resource_of() const noexcept
-  {
-    return get_memory_resource_as<typename tier_memory_resource_trait<TIER>::type>();
-  }
+  auto* get_memory_resource_of() const noexcept;
 
   //===----------------------------------------------------------------------===//
   // Reservation Size Management
diff --git a/include/cucascade/memory/memory_reservation_manager.hpp b/include/cucascade/memory/memory_reservation_manager.hpp
index fc8e09f..1beb3fc 100644
--- a/include/cucascade/memory/memory_reservation_manager.hpp
+++ b/include/cucascade/memory/memory_reservation_manager.hpp
@@ -22,6 +22,7 @@
 #include <cucascade/memory/memory_space.hpp>
 
 #include <rmm/cuda_device.hpp>
+#include <rmm/cuda_stream_view.hpp>
 
 #include <condition_variable>
 #include <filesystem>
@@ -34,9 +35,6 @@
 #include <utility>
 #include <vector>
 
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
-
 namespace cucascade {
 namespace memory {
 
diff --git a/include/cucascade/memory/memory_space.hpp b/include/cucascade/memory/memory_space.hpp
index 96d2855..8ddcde8 100644
--- a/include/cucascade/memory/memory_space.hpp
+++ b/include/cucascade/memory/memory_space.hpp
@@ -33,7 +33,6 @@
 #include <rmm/cuda_stream.hpp>
 #include <rmm/cuda_stream_pool.hpp>
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 namespace cucascade {
@@ -100,13 +99,19 @@ class memory_space {
   [[nodiscard]] size_t get_max_memory() const noexcept;
 
   // Allocator management
-  [[nodiscard]] rmm::mr::device_memory_resource* get_default_allocator() const noexcept;
+  [[nodiscard]] rmm::device_async_resource_ref get_default_allocator() const noexcept;
 
   template <typename T>
-    requires std::derived_from<T, rmm::mr::device_memory_resource>
   T* get_memory_resource_as() const noexcept
   {
-    return dynamic_cast<T*>(get_default_allocator());
+    T* result = nullptr;
+    std::visit(
+      [&result](const auto& ptr) {
+        using held_type = std::decay_t<decltype(*ptr)>;
+        if constexpr (std::is_same_v<held_type, T>) { result = ptr.get(); }
+      },
+      _reservation_allocator);
+    return result;
   }
 
   template <Tier TIER>
@@ -136,7 +141,7 @@ class memory_space {
     std::make_shared<notification_channel>();
 
   // Memory resources owned by this memory_space
-  std::unique_ptr<rmm::mr::device_memory_resource> _allocator;
+  cuda::mr::any_resource<cuda::mr::device_accessible> _allocator;
   reserving_adaptor_type _reservation_allocator;
   std::unique_ptr<rmm::cuda_stream_pool> _stream_pool;
 };
@@ -149,5 +154,17 @@ struct memory_space_hash {
   size_t operator()(const memory_space& ms) const;
 };
 
+template <typename T>
+T* reservation::get_memory_resource_as() const noexcept
+{
+  return _space->get_memory_resource_as<T>();
+}
+
+template <Tier TIER>
+auto* reservation::get_memory_resource_of() const noexcept
+{
+  return get_memory_resource_as<typename tier_memory_resource_trait<TIER>::type>();
+}
+
 }  // namespace memory
 }  // namespace cucascade
diff --git a/include/cucascade/memory/null_device_memory_resource.hpp b/include/cucascade/memory/null_device_memory_resource.hpp
index a523fc0..05390aa 100644
--- a/include/cucascade/memory/null_device_memory_resource.hpp
+++ b/include/cucascade/memory/null_device_memory_resource.hpp
@@ -17,38 +17,58 @@
 
 #pragma once
 
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
+#include <cuda/memory_resource>
+#include <cuda/stream_ref>
+
+#include <cstddef>
 
 namespace cucascade {
 namespace memory {
 
 /**
- * A no-op device_memory_resource used for DISK tier to satisfy API requirements.
+ * A no-op memory resource used for DISK tier to satisfy API requirements.
  * - allocate always returns nullptr
  * - deallocate is a no-op
  */
-class null_device_memory_resource : public rmm::mr::device_memory_resource {
+class null_device_memory_resource {
  public:
-  null_device_memory_resource()           = default;
-  ~null_device_memory_resource() override = default;
+  null_device_memory_resource()  = default;
+  ~null_device_memory_resource() = default;
 
- protected:
-  void* do_allocate([[maybe_unused]] std::size_t bytes,
-                    [[maybe_unused]] rmm::cuda_stream_view stream) override
+  void* allocate([[maybe_unused]] cuda::stream_ref stream,
+                 [[maybe_unused]] std::size_t bytes,
+                 [[maybe_unused]] std::size_t alignment = alignof(std::max_align_t))
   {
     return nullptr;
   }
-  void do_deallocate([[maybe_unused]] void* p,
-                     [[maybe_unused]] std::size_t bytes,
-                     [[maybe_unused]] rmm::cuda_stream_view stream) noexcept override
+
+  void deallocate([[maybe_unused]] cuda::stream_ref stream,
+                  [[maybe_unused]] void* p,
+                  [[maybe_unused]] std::size_t bytes,
+                  [[maybe_unused]] std::size_t alignment = alignof(std::max_align_t)) noexcept
   {
   }
-  [[nodiscard]] bool do_is_equal(
-    const rmm::mr::device_memory_resource& other) const noexcept override
+
+  void* allocate_sync([[maybe_unused]] std::size_t bytes,
+                      [[maybe_unused]] std::size_t alignment = alignof(std::max_align_t))
+  {
+    return nullptr;
+  }
+
+  void deallocate_sync([[maybe_unused]] void* p,
+                       [[maybe_unused]] std::size_t bytes,
+                       [[maybe_unused]] std::size_t alignment = alignof(std::max_align_t)) noexcept
+  {
+  }
+
+  bool operator==(null_device_memory_resource const& other) const noexcept
   {
     return this == &other;
   }
+
+  friend void get_property(null_device_memory_resource const&, cuda::mr::device_accessible) noexcept
+  {
+  }
 };
 
 }  // namespace memory
diff --git a/include/cucascade/memory/numa_region_pinned_host_allocator.hpp b/include/cucascade/memory/numa_region_pinned_host_allocator.hpp
index a5f66b5..03ad24d 100644
--- a/include/cucascade/memory/numa_region_pinned_host_allocator.hpp
+++ b/include/cucascade/memory/numa_region_pinned_host_allocator.hpp
@@ -17,8 +17,7 @@
 
 #pragma once
 
-#include <rmm/mr/device_memory_resource.hpp>
-
+#include <cuda/memory_resource>
 #include <cuda/stream_ref>
 
 #include <cstddef>
@@ -26,7 +25,7 @@
 namespace cucascade {
 namespace memory {
 
-class numa_region_pinned_host_memory_resource final : public rmm::mr::device_memory_resource {
+class numa_region_pinned_host_memory_resource final {
  public:
   explicit numa_region_pinned_host_memory_resource(int numa_node) : _numa_node(numa_node) {}
   ~numa_region_pinned_host_memory_resource()                                              = default;
@@ -37,53 +36,32 @@ class numa_region_pinned_host_memory_resource final : public rmm::mr::device_mem
   numa_region_pinned_host_memory_resource& operator=(numa_region_pinned_host_memory_resource&&) =
     default;
 
- private:
   /**
    * @brief Allocates pinned host memory of size at least \p bytes bytes.
-   *
-   * @throws rmm::out_of_memory if the requested allocation could not be fulfilled due to a
-   * CUDA out of memory error.
-   * @throws rmm::bad_alloc if the requested allocation could not be fulfilled due to any other
-   * reason.
-   *
-   * The stream argument is ignored.
-   *
-   * @param bytes The size, in bytes, of the allocation.
-   * @param stream CUDA stream on which to perform the allocation (ignored).
-   *
-   * @return Pointer to the newly allocated memory.
    */
-  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override;
+  void* allocate(cuda::stream_ref stream,
+                 std::size_t bytes,
+                 std::size_t alignment = alignof(std::max_align_t));
 
   /**
    * @brief Deallocate memory pointed to by \p ptr.
-   *
-   * The stream argument is ignored.
-   *
-   * @param ptr Pointer to be deallocated
-   * @param bytes The size in bytes of the allocation. This must be equal to the
-   * value of `bytes` that was passed to the `allocate` call that returned `ptr`.
-   * @param stream This argument is ignored.
    */
-  void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override;
+  void deallocate(cuda::stream_ref stream,
+                  void* ptr,
+                  std::size_t bytes,
+                  std::size_t alignment = alignof(std::max_align_t)) noexcept;
 
-  /**
-   * @brief Compare this resource to another.
-   *
-   * Two pinned_host_memory_resources always compare equal, because they can each
-   * deallocate memory allocated by the other.
-   *
-   * @param other The other resource to compare to
-   * @return true If the two resources are equivalent
-   * @return false If the two resources are not equal
-   */
-  [[nodiscard]] bool do_is_equal(
-    const rmm::mr::device_memory_resource& other) const noexcept override;
+  void* allocate_sync(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t));
+
+  void deallocate_sync(void* ptr,
+                       std::size_t bytes,
+                       std::size_t alignment = alignof(std::max_align_t)) noexcept;
+
+  [[nodiscard]] bool operator==(
+    numa_region_pinned_host_memory_resource const& other) const noexcept;
 
   /**
    * @brief Enables the `cuda::mr::device_accessible` property
-   *
-   * This property declares that a `pinned_host_memory_resource` provides device accessible memory
    */
   friend void get_property(numa_region_pinned_host_memory_resource const&,
                            cuda::mr::device_accessible) noexcept
@@ -92,14 +70,13 @@ class numa_region_pinned_host_memory_resource final : public rmm::mr::device_mem
 
   /**
    * @brief Enables the `cuda::mr::host_accessible` property
-   *
-   * This property declares that a `pinned_host_memory_resource` provides host accessible memory
    */
   friend void get_property(numa_region_pinned_host_memory_resource const&,
                            cuda::mr::host_accessible) noexcept
   {
   }
 
+ private:
   int _numa_node{-1};
 };
 
diff --git a/include/cucascade/memory/reservation_aware_resource_adaptor.hpp b/include/cucascade/memory/reservation_aware_resource_adaptor.hpp
index 73f5de3..ccca680 100644
--- a/include/cucascade/memory/reservation_aware_resource_adaptor.hpp
+++ b/include/cucascade/memory/reservation_aware_resource_adaptor.hpp
@@ -18,9 +18,9 @@
 #pragma once
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
+#include <cuda/memory_resource>
 #include <cuda_runtime_api.h>
 
 #include <atomic>
@@ -54,7 +54,7 @@ namespace memory {
  *
  * Based on RMM's tracking_resource_adaptor but extended for per-stream tracking.
  */
-class reservation_aware_resource_adaptor : public rmm::mr::device_memory_resource {
+class reservation_aware_resource_adaptor {
  public:
   struct device_reserved_arena : public reserved_arena {
     friend class reservation_aware_resource_adaptor;
@@ -163,8 +163,8 @@ class reservation_aware_resource_adaptor : public rmm::mr::device_memory_resourc
     std::size_t capacity,
     std::unique_ptr<reservation_limit_policy> stream_reservation_policy = nullptr,
     std::unique_ptr<oom_handling_policy> default_oom_policy             = nullptr,
-    AllocationTrackingScope tracking_scope                              = AllocationTrackingScope::PER_STREAM,
-    cudaMemPool_t pool_handle                                           = nullptr);
+    AllocationTrackingScope tracking_scope = AllocationTrackingScope::PER_STREAM,
+    cudaMemPool_t pool_handle              = nullptr);
 
   /**
    * @brief Constructs a per-stream tracking resource adaptor.
@@ -186,13 +186,13 @@ class reservation_aware_resource_adaptor : public rmm::mr::device_memory_resourc
     std::size_t capacity,
     std::unique_ptr<reservation_limit_policy> stream_reservation_policy = nullptr,
     std::unique_ptr<oom_handling_policy> default_oom_policy             = nullptr,
-    AllocationTrackingScope tracking_scope                              = AllocationTrackingScope::PER_STREAM,
-    cudaMemPool_t pool_handle                                           = nullptr);
+    AllocationTrackingScope tracking_scope = AllocationTrackingScope::PER_STREAM,
+    cudaMemPool_t pool_handle              = nullptr);
 
   /**
    * @brief Destructor.
    */
-  ~reservation_aware_resource_adaptor() override = default;
+  ~reservation_aware_resource_adaptor() = default;
 
   // Non-copyable and non-movable to ensure resource stability
   reservation_aware_resource_adaptor(const reservation_aware_resource_adaptor&)            = delete;
@@ -326,6 +326,34 @@ class reservation_aware_resource_adaptor : public rmm::mr::device_memory_resourc
    */
   const oom_handling_policy& get_default_oom_handling_policy() const;
 
+  void* allocate(cuda::stream_ref stream,
+                 std::size_t bytes,
+                 std::size_t alignment = alignof(std::max_align_t));
+
+  void deallocate(cuda::stream_ref stream,
+                  void* ptr,
+                  std::size_t bytes,
+                  std::size_t alignment = alignof(std::max_align_t)) noexcept;
+
+  void* allocate_sync(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
+  {
+    return allocate(cuda::stream_ref{}, bytes, alignment);
+  }
+
+  void deallocate_sync(void* ptr,
+                       std::size_t bytes,
+                       std::size_t alignment = alignof(std::max_align_t)) noexcept
+  {
+    deallocate(cuda::stream_ref{}, ptr, bytes, alignment);
+  }
+
+  bool operator==(reservation_aware_resource_adaptor const& other) const noexcept;
+
+  friend void get_property(reservation_aware_resource_adaptor const&,
+                           cuda::mr::device_accessible) noexcept
+  {
+  }
+
  private:
   /**
    * @brief grows reservation by a `bytes` size
@@ -396,32 +424,6 @@ class reservation_aware_resource_adaptor : public rmm::mr::device_memory_resourc
    */
   void do_release_reservation(device_reserved_arena* reservation) noexcept;
 
-  /**
-   * @brief Allocates memory from the upstream resource and tracks it.
-   *
-   * @param bytes The number of bytes to allocate
-   * @param stream The CUDA stream to use for the allocation
-   * @return Pointer to allocated memory
-   */
-  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override;
-
-  /**
-   * @brief Deallocates previously allocated memory and updates tracking.
-   *
-   * @param ptr Pointer to memory to deallocate
-   * @param bytes The number of bytes that were allocated
-   * @param stream The CUDA stream to use for the deallocation
-   */
-  void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override;
-
-  /**
-   * @brief Checks equality with another memory resource.
-   *
-   * @param other The other memory resource to compare with
-   * @return true if this resource is the same as other
-   */
-  bool do_is_equal(const rmm::mr::device_memory_resource& other) const noexcept override;
-
   memory_space_id _space_id;
 
   /// The upstream memory resource
diff --git a/include/cucascade/memory/small_pinned_host_memory_resource.hpp b/include/cucascade/memory/small_pinned_host_memory_resource.hpp
index ddd6c35..003d606 100644
--- a/include/cucascade/memory/small_pinned_host_memory_resource.hpp
+++ b/include/cucascade/memory/small_pinned_host_memory_resource.hpp
@@ -19,8 +19,8 @@
 
 #include <cucascade/memory/fixed_size_host_memory_resource.hpp>
 
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
+#include <cuda/memory_resource>
+#include <cuda/stream_ref>
 
 #include <array>
 #include <cstddef>
@@ -53,7 +53,7 @@ namespace memory {
  * This eliminates the pageable H2D transfers that cuDF would otherwise issue
  * when building column_device_view metadata arrays for cudf::concatenate.
  */
-class small_pinned_host_memory_resource : public rmm::mr::device_memory_resource {
+class small_pinned_host_memory_resource {
  public:
   /// Maximum allocation size handled by the slab pools.
   /// Requests larger than this use pageable memory.
@@ -72,35 +72,36 @@ class small_pinned_host_memory_resource : public rmm::mr::device_memory_resource
   small_pinned_host_memory_resource(small_pinned_host_memory_resource&&)                 = delete;
   small_pinned_host_memory_resource& operator=(small_pinned_host_memory_resource&&)      = delete;
 
-  ~small_pinned_host_memory_resource() override;
+  ~small_pinned_host_memory_resource();
 
- private:
   /**
    * @brief Allocate pinned memory.
-   *
-   * For @p bytes <= MAX_SLAB_SIZE: rounds up to the next slab boundary
-   * (512 / 1 KB / 2 KB / 4 KB / 8 KB) and returns a pointer from the matching
-   * free list, expanding the pool from upstream if the list is empty.
-   *
-   * For @p bytes > MAX_SLAB_SIZE: falls back to std::malloc (pageable).
-   * The cudf::set_allocate_host_as_pinned_threshold is set to MAX_SLAB_SIZE so
-   * that cuDF's make_host_vector path uses the slab pools for metadata buffers.
-   * Larger allocations (e.g. join/sort staging buffers that call
-   * get_pinned_memory_resource() directly) are served from pageable memory.
    */
-  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override;
+  void* allocate(cuda::stream_ref stream,
+                 std::size_t bytes,
+                 std::size_t alignment = alignof(std::max_align_t));
 
   /**
    * @brief Return memory to the appropriate pool.
-   *
-   * Slabs (@p bytes <= MAX_SLAB_SIZE) are returned to the free list.
-   * Pageable allocations (@p bytes > MAX_SLAB_SIZE) are freed via std::free.
-   * @p bytes must equal the value passed to the corresponding do_allocate.
    */
-  void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override;
+  void deallocate(cuda::stream_ref stream,
+                  void* ptr,
+                  std::size_t bytes,
+                  std::size_t alignment = alignof(std::max_align_t)) noexcept;
+
+  void* allocate_sync(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
+  {
+    return allocate(cuda::stream_ref{}, bytes, alignment);
+  }
+
+  void deallocate_sync(void* ptr,
+                       std::size_t bytes,
+                       std::size_t alignment = alignof(std::max_align_t)) noexcept
+  {
+    deallocate(cuda::stream_ref{}, ptr, bytes, alignment);
+  }
 
-  [[nodiscard]] bool do_is_equal(
-    const rmm::mr::device_memory_resource& other) const noexcept override;
+  bool operator==(small_pinned_host_memory_resource const& other) const noexcept;
 
   /**
    * @brief Declares that memory allocated here is accessible from GPU devices.
diff --git a/src/data/representation_converter.cpp b/src/data/representation_converter.cpp
index 117eb75..e1bd351 100644
--- a/src/data/representation_converter.cpp
+++ b/src/data/representation_converter.cpp
@@ -17,6 +17,7 @@
 
 #include "cudf/contiguous_split.hpp"
 
+#include <cucascade/cuda_utils.hpp>
 #include <cucascade/data/cpu_data_representation.hpp>
 #include <cucascade/data/gpu_data_representation.hpp>
 #include <cucascade/data/representation_converter.hpp>
@@ -32,10 +33,9 @@
 #include <cudf/utilities/traits.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
-#include <cucascade/cuda_utils.hpp>
-
 #include <rmm/device_buffer.hpp>
 #include <rmm/device_uvector.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cuda_runtime.h>
 
@@ -156,11 +156,11 @@ std::unique_ptr<idata_representation> convert_gpu_to_gpu(
 
   // Asynchronously copy device->device across GPUs
   CUCASCADE_CUDA_TRY(cudaMemcpyPeerAsync(dst_uvector.data(),
-                                   target_device_id,
-                                   static_cast<const uint8_t*>(packed_data.gpu_data->data()),
-                                   source_device_id,
-                                   bytes_to_copy,
-                                   stream.value()));
+                                         target_device_id,
+                                         static_cast<const uint8_t*>(packed_data.gpu_data->data()),
+                                         source_device_id,
+                                         bytes_to_copy,
+                                         stream.value()));
   stream.synchronize();
   // Unpack on target device to build a cudf::table that lives on the target GPU
   CUCASCADE_CUDA_TRY(cudaSetDevice(target_device_id));
@@ -254,10 +254,10 @@ std::unique_ptr<idata_representation> convert_host_to_gpu(
     size_t bytes_to_copy                = std::min(remaining_bytes, bytes_available_in_src_block);
     auto src_block                      = host_table->allocation->at(src_block_index);
     CUCASCADE_CUDA_TRY(cudaMemcpyAsync(static_cast<uint8_t*>(dst_buffer.data()) + dst_offset,
-                                 src_block.data() + src_block_offset,
-                                 bytes_to_copy,
-                                 cudaMemcpyHostToDevice,
-                                 stream.value()));
+                                       src_block.data() + src_block_offset,
+                                       bytes_to_copy,
+                                       cudaMemcpyHostToDevice,
+                                       stream.value()));
     dst_offset += bytes_to_copy;
     src_block_offset += bytes_to_copy;
     if (src_block_offset == src_block_size) {
@@ -417,7 +417,8 @@ struct BatchCopyAccumulator {
     // cudaMemcpyBatchAsync requires CUDA 12.8+; fall back to individual copies.
     (void)src_order;
     for (std::size_t i = 0; i < count(); ++i) {
-      CUCASCADE_CUDA_TRY(cudaMemcpyAsync(dsts[i], srcs[i], sizes[i], cudaMemcpyDefault, stream.value()));
+      CUCASCADE_CUDA_TRY(
+        cudaMemcpyAsync(dsts[i], srcs[i], sizes[i], cudaMemcpyDefault, stream.value()));
     }
 #endif
   }
@@ -613,7 +614,7 @@ static rmm::device_buffer alloc_and_schedule_h2d(memory::fixed_multiple_blocks_a
                                                  std::size_t alloc_offset,
                                                  std::size_t size,
                                                  rmm::cuda_stream_view stream,
-                                                 rmm::mr::device_memory_resource* mr,
+                                                 rmm::device_async_resource_ref mr,
                                                  BatchCopyAccumulator& batch)
 {
   rmm::device_buffer buf(size, stream, mr);
@@ -655,7 +656,7 @@ static std::unique_ptr<cudf::column> reconstruct_column(
   const memory::column_metadata& meta,
   memory::fixed_multiple_blocks_allocation& alloc,
   rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr,
+  rmm::device_async_resource_ref mr,
   BatchCopyAccumulator& batch)
 {
   // Null mask (shared by all type categories)
@@ -729,9 +730,7 @@ std::unique_ptr<idata_representation> convert_host_fast_to_gpu(
 {
   auto& fast_source      = source.cast<host_data_representation>();
   const auto& fast_table = fast_source.get_host_table();
-  if (!fast_table) {
-    throw std::runtime_error("convert_host_fast_to_gpu: host table is null");
-  }
+  if (!fast_table) { throw std::runtime_error("convert_host_fast_to_gpu: host table is null"); }
   if (!fast_table->allocation) {
     throw std::runtime_error("convert_host_fast_to_gpu: host table allocation is null");
   }
diff --git a/src/memory/common.cpp b/src/memory/common.cpp
index 1cb1eba..fe278d3 100644
--- a/src/memory/common.cpp
+++ b/src/memory/common.cpp
@@ -26,17 +26,19 @@ namespace cucascade {
 
 namespace memory {
 
-std::unique_ptr<rmm::mr::device_memory_resource> make_default_gpu_memory_resource(int device_id,
-                                                                                  size_t capacity)
+cuda::mr::any_resource<cuda::mr::device_accessible> make_default_gpu_memory_resource(
+  int device_id, size_t capacity)
 {
   rmm::cuda_set_device_raii set_device(rmm::cuda_device_id{device_id});
-  return std::make_unique<rmm::mr::cuda_async_memory_resource>(capacity);
+  return cuda::mr::any_resource<cuda::mr::device_accessible>{
+    rmm::mr::cuda_async_memory_resource(capacity)};
 }
 
-std::unique_ptr<rmm::mr::device_memory_resource> make_default_host_memory_resource(
+cuda::mr::any_resource<cuda::mr::device_accessible> make_default_host_memory_resource(
   int numa_node_id, [[maybe_unused]] size_t capacity)
 {
-  return std::make_unique<cucascade::memory::numa_region_pinned_host_memory_resource>(numa_node_id);
+  return cuda::mr::any_resource<cuda::mr::device_accessible>{
+    cucascade::memory::numa_region_pinned_host_memory_resource(numa_node_id)};
 }
 
 DeviceMemoryResourceFactoryFn make_default_allocator_for_tier(Tier tier)
@@ -46,7 +48,9 @@ DeviceMemoryResourceFactoryFn make_default_allocator_for_tier(Tier tier)
   } else if (tier == Tier::HOST) {
     return make_default_host_memory_resource;
   } else {
-    return [](int, size_t) { return std::make_unique<null_device_memory_resource>(); };
+    return [](int, size_t) {
+      return cuda::mr::any_resource<cuda::mr::device_accessible>{null_device_memory_resource{}};
+    };
   }
 }
 
diff --git a/src/memory/fixed_size_host_memory_resource.cpp b/src/memory/fixed_size_host_memory_resource.cpp
index d53f5d6..cff6a40 100644
--- a/src/memory/fixed_size_host_memory_resource.cpp
+++ b/src/memory/fixed_size_host_memory_resource.cpp
@@ -23,7 +23,6 @@
 
 #include <rmm/aligned.hpp>
 #include <rmm/error.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <memory>
@@ -188,21 +187,23 @@ std::vector<std::byte*> fixed_size_host_memory_resource::allocate_multiple_block
   return {};
 }
 
-void* fixed_size_host_memory_resource::do_allocate(std::size_t /*bytes*/,
-                                                   rmm::cuda_stream_view /*stream*/)
+void* fixed_size_host_memory_resource::allocate([[maybe_unused]] cuda::stream_ref stream,
+                                                [[maybe_unused]] std::size_t bytes,
+                                                [[maybe_unused]] std::size_t alignment)
 {
   throw rmm::logic_error(
     "fixed_size_host_memory_resource doesn't support allocate, use allocate_multiple_blocks");
 }
 
-void fixed_size_host_memory_resource::do_deallocate(void* /*ptr*/,
-                                                    std::size_t /*bytes*/,
-                                                    rmm::cuda_stream_view /*stream*/) noexcept
+void fixed_size_host_memory_resource::deallocate([[maybe_unused]] cuda::stream_ref stream,
+                                                 [[maybe_unused]] void* ptr,
+                                                 [[maybe_unused]] std::size_t bytes,
+                                                 [[maybe_unused]] std::size_t alignment) noexcept
 {
 }
 
-bool fixed_size_host_memory_resource::do_is_equal(
-  const rmm::mr::device_memory_resource& other) const noexcept
+bool fixed_size_host_memory_resource::operator==(
+  fixed_size_host_memory_resource const& other) const noexcept
 {
   return this == &other;
 }
diff --git a/src/memory/memory_reservation.cpp b/src/memory/memory_reservation.cpp
index 8e16e8b..58dd9eb 100644
--- a/src/memory/memory_reservation.cpp
+++ b/src/memory/memory_reservation.cpp
@@ -49,7 +49,7 @@ reservation::reservation(const memory_space* space, std::unique_ptr<reserved_are
 
 reservation::~reservation() = default;
 
-rmm::mr::device_memory_resource* reservation::get_memory_resource() const noexcept
+rmm::device_async_resource_ref reservation::get_memory_resource() const noexcept
 {
   return _space->get_default_allocator();
 }
diff --git a/src/memory/memory_space.cpp b/src/memory/memory_space.cpp
index dff87f4..bb199ff 100644
--- a/src/memory/memory_space.cpp
+++ b/src/memory/memory_space.cpp
@@ -49,26 +49,25 @@ memory_space::memory_space(const gpu_memory_space_config& config)
     _memory_limit(config.reservation_limit()),
     _start_downgrading_memory_threshold(config.downgrade_trigger_threshold()),
     _stop_downgrading_memory_threshold(config.downgrade_stop_threshold()),
-    _allocator(config.mr_factory_fn
-                 ? config.mr_factory_fn(config.device_id, config.memory_capacity)
-                 : make_default_gpu_memory_resource(config.device_id, config.memory_capacity)),
     _stream_pool{[&]() -> std::unique_ptr<rmm::cuda_stream_pool> {
       rmm::cuda_set_device_raii guard{rmm::cuda_device_id(config.device_id)};
       return std::make_unique<rmm::cuda_stream_pool>(16, rmm::cuda_stream::flags::non_blocking);
     }()}
 {
-  if (!_allocator) { throw std::invalid_argument("At least one allocator must be provided"); }
-
   cudaMemPool_t pool_handle{nullptr};
-  if (auto* r = dynamic_cast<rmm::mr::cuda_async_memory_resource*>(_allocator.get())) {
-    pool_handle = r->pool_handle();
-  } else if (auto* r = dynamic_cast<rmm::mr::cuda_async_view_memory_resource*>(_allocator.get())) {
-    pool_handle = r->pool_handle();
+
+  if (config.mr_factory_fn) {
+    _allocator = config.mr_factory_fn(config.device_id, config.memory_capacity);
+  } else {
+    rmm::cuda_set_device_raii set_device(rmm::cuda_device_id{config.device_id});
+    rmm::mr::cuda_async_memory_resource concrete_mr(config.memory_capacity);
+    pool_handle = concrete_mr.pool_handle();
+    _allocator  = cuda::mr::any_resource<cuda::mr::device_accessible>(std::move(concrete_mr));
   }
 
   _reservation_allocator = std::make_unique<reservation_aware_resource_adaptor>(
     _id,
-    *_allocator,
+    rmm::device_async_resource_ref(_allocator),
     _memory_limit,
     _capacity,
     nullptr,
@@ -89,10 +88,9 @@ memory_space::memory_space(const host_memory_space_config& config)
                  ? config.mr_factory_fn(config.numa_id, config.memory_capacity)
                  : make_default_host_memory_resource(config.numa_id, config.memory_capacity))
 {
-  if (!_allocator) { throw std::invalid_argument("At least one allocator must be provided"); }
   _reservation_allocator =
     std::make_unique<fixed_size_host_memory_resource>(_id.device_id,
-                                                      *_allocator,
+                                                      rmm::device_async_resource_ref(_allocator),
                                                       _memory_limit,
                                                       _capacity,
                                                       config.block_size,
@@ -106,7 +104,7 @@ memory_space::memory_space(const disk_memory_space_config& config)
     _memory_limit(config.reservation_limit()),
     _start_downgrading_memory_threshold(config.downgrade_trigger_threshold()),
     _stop_downgrading_memory_threshold(config.downgrade_stop_threshold()),
-    _allocator(std::make_unique<null_device_memory_resource>())
+    _allocator(null_device_memory_resource{})
 {
   if (config.mount_paths.empty()) {
     throw std::invalid_argument("Mount path must be provided for disk memory space");
@@ -257,16 +255,10 @@ size_t memory_space::get_total_reserved_memory() const
 
 size_t memory_space::get_max_memory() const noexcept { return _memory_limit; }
 
-rmm::mr::device_memory_resource* memory_space::get_default_allocator() const noexcept
+rmm::device_async_resource_ref memory_space::get_default_allocator() const noexcept
 {
-  return std::visit(
-    utils::overloaded{[this]([[maybe_unused]] const std::unique_ptr<disk_access_limiter>& other)
-                        -> rmm::mr::device_memory_resource* { return _allocator.get(); },
-                      [](const std::unique_ptr<reservation_aware_resource_adaptor>& mr)
-                        -> rmm::mr::device_memory_resource* { return mr.get(); },
-                      [](const std::unique_ptr<fixed_size_host_memory_resource>& mr)
-                        -> rmm::mr::device_memory_resource* { return mr.get(); }},
-    _reservation_allocator);
+  return rmm::device_async_resource_ref(
+    const_cast<cuda::mr::any_resource<cuda::mr::device_accessible>&>(_allocator));
 }
 
 std::string memory_space::to_string() const
diff --git a/src/memory/numa_region_pinned_host_allocator.cpp b/src/memory/numa_region_pinned_host_allocator.cpp
index 9a09504..a1492fd 100644
--- a/src/memory/numa_region_pinned_host_allocator.cpp
+++ b/src/memory/numa_region_pinned_host_allocator.cpp
@@ -15,9 +15,8 @@
  * limitations under the License.
  */
 
-#include <cucascade/memory/numa_region_pinned_host_allocator.hpp>
-
 #include <cucascade/cuda_utils.hpp>
+#include <cucascade/memory/numa_region_pinned_host_allocator.hpp>
 
 #include <numa.h>
 
@@ -26,8 +25,9 @@
 namespace cucascade {
 namespace memory {
 
-void* numa_region_pinned_host_memory_resource::do_allocate(
-  std::size_t bytes, [[maybe_unused]] rmm::cuda_stream_view stream)
+void* numa_region_pinned_host_memory_resource::allocate([[maybe_unused]] cuda::stream_ref stream,
+                                                        std::size_t bytes,
+                                                        [[maybe_unused]] std::size_t alignment)
 {
   CUCASCADE_FUNC_RANGE();
   // don't allocate anything if the user requested zero bytes
@@ -45,8 +45,11 @@ void* numa_region_pinned_host_memory_resource::do_allocate(
   }
 }
 
-void numa_region_pinned_host_memory_resource::do_deallocate(
-  void* ptr, std::size_t bytes, [[maybe_unused]] rmm::cuda_stream_view stream) noexcept
+void numa_region_pinned_host_memory_resource::deallocate(
+  [[maybe_unused]] cuda::stream_ref stream,
+  void* ptr,
+  std::size_t bytes,
+  [[maybe_unused]] std::size_t alignment) noexcept
 {
   CUCASCADE_FUNC_RANGE();
   if (_numa_node == -1) {
@@ -57,11 +60,22 @@ void numa_region_pinned_host_memory_resource::do_deallocate(
   }
 }
 
-bool numa_region_pinned_host_memory_resource::do_is_equal(
-  const rmm::mr::device_memory_resource& other) const noexcept
+void* numa_region_pinned_host_memory_resource::allocate_sync(std::size_t bytes,
+                                                             [[maybe_unused]] std::size_t alignment)
+{
+  return allocate(cuda::stream_ref{}, bytes, alignment);
+}
+
+void numa_region_pinned_host_memory_resource::deallocate_sync(
+  void* ptr, std::size_t bytes, [[maybe_unused]] std::size_t alignment) noexcept
+{
+  deallocate(cuda::stream_ref{}, ptr, bytes, alignment);
+}
+
+bool numa_region_pinned_host_memory_resource::operator==(
+  numa_region_pinned_host_memory_resource const& other) const noexcept
 {
-  auto* mr_ptr = dynamic_cast<numa_region_pinned_host_memory_resource const*>(&other);
-  return mr_ptr == this && mr_ptr->_numa_node == this->_numa_node;
+  return this == &other && _numa_node == other._numa_node;
 }
 
 }  // namespace memory
diff --git a/src/memory/reservation_aware_resource_adaptor.cpp b/src/memory/reservation_aware_resource_adaptor.cpp
index e864ace..5412e81 100644
--- a/src/memory/reservation_aware_resource_adaptor.cpp
+++ b/src/memory/reservation_aware_resource_adaptor.cpp
@@ -15,18 +15,17 @@
  * limitations under the License.
  */
 
+#include <cucascade/cuda_utils.hpp>
 #include <cucascade/memory/common.hpp>
 #include <cucascade/memory/memory_reservation.hpp>
 #include <cucascade/memory/notification_channel.hpp>
 #include <cucascade/memory/reservation_aware_resource_adaptor.hpp>
 
-#include <cucascade/cuda_utils.hpp>
-
-#include <cuda_runtime_api.h>
-
 #include <rmm/aligned.hpp>
 #include <rmm/cuda_stream_view.hpp>
 
+#include <cuda_runtime_api.h>
+
 #include <atomic>
 #include <exception>
 #include <memory>
@@ -44,7 +43,6 @@ using device_reserved_arena = reservation_aware_resource_adaptor::device_reserve
 
 namespace {
 
-
 struct stream_ordered_allocation_tracker
   : public reservation_aware_resource_adaptor::allocation_tracker_iface {
   mutable std::mutex mutex;
@@ -378,14 +376,16 @@ std::size_t reservation_aware_resource_adaptor::get_active_reservation_count() c
   return _number_of_allocations.load();
 }
 
-void* reservation_aware_resource_adaptor::do_allocate(std::size_t bytes,
-                                                      rmm::cuda_stream_view stream)
+void* reservation_aware_resource_adaptor::allocate(cuda::stream_ref stream,
+                                                   std::size_t bytes,
+                                                   [[maybe_unused]] std::size_t alignment)
 {
-  auto* reservation_state = _allocation_tracker->get_tracker_state(stream);
+  rmm::cuda_stream_view stream_view(stream.get());
+  auto* reservation_state = _allocation_tracker->get_tracker_state(stream_view);
   if (reservation_state != nullptr) {
-    return do_allocate_managed(bytes, reservation_state, stream);
+    return do_allocate_managed(bytes, reservation_state, stream_view);
   } else {
-    return do_allocate_managed(bytes, stream);
+    return do_allocate_managed(bytes, stream_view);
   }
 }
 
@@ -445,8 +445,11 @@ void* reservation_aware_resource_adaptor::do_allocate_unmanaged(std::size_t allo
       return _upstream.allocate(stream, allocation_bytes);
     } catch (std::exception& e) {
       _total_allocated_bytes.sub(tracking_bytes);
-      throw cucascade_out_of_memory(
-        e.what(), MemoryError::ALLOCATION_FAILED, allocation_bytes, post_allocation_size, _pool_handle);
+      throw cucascade_out_of_memory(e.what(),
+                                    MemoryError::ALLOCATION_FAILED,
+                                    allocation_bytes,
+                                    post_allocation_size,
+                                    _pool_handle);
     }
   } else {
     throw cucascade_out_of_memory("not enough capacity to allocate memory",
@@ -457,13 +460,15 @@ void* reservation_aware_resource_adaptor::do_allocate_unmanaged(std::size_t allo
   }
 }
 
-void reservation_aware_resource_adaptor::do_deallocate(void* ptr,
-                                                       std::size_t bytes,
-                                                       rmm::cuda_stream_view stream) noexcept
+void reservation_aware_resource_adaptor::deallocate(cuda::stream_ref stream,
+                                                    void* ptr,
+                                                    std::size_t bytes,
+                                                    [[maybe_unused]] std::size_t alignment) noexcept
 {
+  rmm::cuda_stream_view stream_view(stream.get());
   auto tracking_bytes           = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);
   auto upstream_reclaimed_bytes = tracking_bytes;
-  auto* reservation_state       = _allocation_tracker->get_tracker_state(stream);
+  auto* reservation_state       = _allocation_tracker->get_tracker_state(stream_view);
   if (reservation_state != nullptr) {
     auto* reservation     = reservation_state->memory_reservation.get();
     auto reservation_size = static_cast<int64_t>(reservation->size());
@@ -481,22 +486,18 @@ void reservation_aware_resource_adaptor::do_deallocate(void* ptr,
 // Suppress false-positive null-dereference warnings from CCCL library code
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wnull-dereference"
-  _upstream.deallocate(stream, ptr, bytes);
+  _upstream.deallocate(stream_view, ptr, bytes);
 #pragma GCC diagnostic pop
   _total_allocated_bytes.sub(upstream_reclaimed_bytes);
 }
 
-bool reservation_aware_resource_adaptor::do_is_equal(
-  const rmm::mr::device_memory_resource& other) const noexcept
+bool reservation_aware_resource_adaptor::operator==(
+  reservation_aware_resource_adaptor const& other) const noexcept
 {
-  // Check if it's the same type
-  const auto* other_adaptor = dynamic_cast<const reservation_aware_resource_adaptor*>(&other);
-  if (other_adaptor == nullptr) { return false; }
-
 // Suppress false-positive null-dereference warnings from CCCL library code
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wnull-dereference"
-  return _upstream == other_adaptor->get_upstream_resource();
+  return _upstream == other.get_upstream_resource();
 #pragma GCC diagnostic pop
 }
 
diff --git a/src/memory/reservation_manager_configurator.cpp b/src/memory/reservation_manager_configurator.cpp
index d15e4c0..b0dddec 100644
--- a/src/memory/reservation_manager_configurator.cpp
+++ b/src/memory/reservation_manager_configurator.cpp
@@ -21,7 +21,6 @@
 #include <cucascade/memory/topology_discovery.hpp>
 
 #include <rmm/cuda_device.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
 
 #include <numa.h>
 
@@ -209,7 +208,7 @@ std::vector<memory_space_config> reservation_manager_configurator::build(
       (info.space_id == info.hw_id)
         ? _gpu_mr_fn
         : [current_mr_fn = _gpu_mr_fn, hw_id = info.hw_id](
-            int, size_t capacity) -> std::unique_ptr<rmm::mr::device_memory_resource> {
+            int, size_t capacity) -> cuda::mr::any_resource<cuda::mr::device_accessible> {
       return current_mr_fn(hw_id, capacity);
     };
     config.reservation_limit_fraction = _gpu_reservation.get_fraction(info.gpu_capacity);
@@ -232,7 +231,7 @@ std::vector<memory_space_config> reservation_manager_configurator::build(
       (info.space_id == info.numa_id)
         ? _cpu_mr_fn
         : [current_mr_fn = _cpu_mr_fn, numa_id = info.numa_id](
-            int, size_t capacity) -> std::unique_ptr<rmm::mr::device_memory_resource> {
+            int, size_t capacity) -> cuda::mr::any_resource<cuda::mr::device_accessible> {
       return current_mr_fn(numa_id, capacity);
     };
     config.reservation_limit_fraction = _cpu_reservation.get_fraction(per_host_capacity);
diff --git a/src/memory/small_pinned_host_memory_resource.cpp b/src/memory/small_pinned_host_memory_resource.cpp
index 051a3d4..aabd515 100644
--- a/src/memory/small_pinned_host_memory_resource.cpp
+++ b/src/memory/small_pinned_host_memory_resource.cpp
@@ -37,8 +37,9 @@ small_pinned_host_memory_resource::~small_pinned_host_memory_resource()
   // free_lists_ entries are raw pointers into those blocks; no individual cleanup needed.
 }
 
-void* small_pinned_host_memory_resource::do_allocate(std::size_t bytes,
-                                                     [[maybe_unused]] rmm::cuda_stream_view stream)
+void* small_pinned_host_memory_resource::allocate([[maybe_unused]] cuda::stream_ref stream,
+                                                  std::size_t bytes,
+                                                  [[maybe_unused]] std::size_t alignment)
 {
   if (bytes == 0) { return nullptr; }
   // cuDF calls get_pinned_memory_resource() directly from some code paths (e.g. join/sort
@@ -50,9 +51,7 @@ void* small_pinned_host_memory_resource::do_allocate(std::size_t bytes,
   if (bytes > MAX_SLAB_SIZE) {
     void* ptr = nullptr;
     auto err  = ::cudaMallocHost(&ptr, bytes);
-    if (err != cudaSuccess) {
-      throw std::bad_alloc{};
-    }
+    if (err != cudaSuccess) { throw std::bad_alloc{}; }
     return ptr;
   }
 
@@ -64,8 +63,10 @@ void* small_pinned_host_memory_resource::do_allocate(std::size_t bytes,
   return ptr;
 }
 
-void small_pinned_host_memory_resource::do_deallocate(
-  void* ptr, std::size_t bytes, [[maybe_unused]] rmm::cuda_stream_view stream) noexcept
+void small_pinned_host_memory_resource::deallocate([[maybe_unused]] cuda::stream_ref stream,
+                                                   void* ptr,
+                                                   std::size_t bytes,
+                                                   [[maybe_unused]] std::size_t alignment) noexcept
 {
   if (ptr == nullptr || bytes == 0) { return; }
   if (bytes > MAX_SLAB_SIZE) {
@@ -78,8 +79,8 @@ void small_pinned_host_memory_resource::do_deallocate(
   free_lists_[idx].push_back(ptr);
 }
 
-bool small_pinned_host_memory_resource::do_is_equal(
-  const rmm::mr::device_memory_resource& other) const noexcept
+bool small_pinned_host_memory_resource::operator==(
+  small_pinned_host_memory_resource const& other) const noexcept
 {
   return this == &other;
 }
diff --git a/test/memory/test_memory_reservation_manager.cpp b/test/memory/test_memory_reservation_manager.cpp
index 30b7ced..0edc176 100644
--- a/test/memory/test_memory_reservation_manager.cpp
+++ b/test/memory/test_memory_reservation_manager.cpp
@@ -491,8 +491,7 @@ SCENARIO("Reservation On Multi Gpu System", "[memory_space][.multi-device]")
   GIVEN("Dual gpu manager")
   {
     auto* gpu_space = manager->get_memory_space(Tier::GPU, 0);
-    auto* mr =
-      dynamic_cast<reservation_aware_resource_adaptor*>(gpu_space->get_default_allocator());
+    auto* mr        = gpu_space->get_memory_resource_as<reservation_aware_resource_adaptor>();
     REQUIRE(mr != nullptr);
 
     WHEN("a reservation doesn't fit on gpu 0 but fits on gpu 1")
diff --git a/test/memory/test_small_pinned_host_memory_resource.cpp b/test/memory/test_small_pinned_host_memory_resource.cpp
index 7145986..8d7ba70 100644
--- a/test/memory/test_small_pinned_host_memory_resource.cpp
+++ b/test/memory/test_small_pinned_host_memory_resource.cpp
@@ -174,11 +174,11 @@ TEST_CASE("Freed slabs are reused", "[small_pinned]")
 TEST_CASE("do_is_equal identity check", "[small_pinned]")
 {
   test_fixture f;
-  REQUIRE(f.slab_mr.is_equal(f.slab_mr));
+  REQUIRE(f.slab_mr == f.slab_mr);
 
   // A second instance should not be equal
   small_pinned_host_memory_resource other{f.upstream};
-  REQUIRE_FALSE(f.slab_mr.is_equal(other));
+  REQUIRE_FALSE(f.slab_mr == other);
 }
 
 TEST_CASE("Concurrent allocations are thread-safe", "[small_pinned][threading]")
diff --git a/test/unittest.cpp b/test/unittest.cpp
index 1fb5f10..971273a 100644
--- a/test/unittest.cpp
+++ b/test/unittest.cpp
@@ -20,6 +20,7 @@
 #include <rmm/cuda_device.hpp>
 #include <rmm/mr/cuda_async_memory_resource.hpp>
 #include <rmm/mr/per_device_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cuda_runtime_api.h>
 
@@ -48,15 +49,13 @@ class test_gpu_pool {
     if (initial_bytes > max_bytes) { initial_bytes = max_bytes; }
 
     pool_     = std::make_unique<rmm::mr::cuda_async_memory_resource>(initial_bytes, max_bytes);
-    previous_ = rmm::mr::get_current_device_resource();
-    rmm::mr::set_current_device_resource(pool_.get());
+    previous_ = rmm::mr::get_current_device_resource_ref();
+    rmm::mr::set_current_device_resource_ref(*pool_);
   }
 
   ~test_gpu_pool()
   {
-    if (pool_ != nullptr && previous_ != nullptr) {
-      rmm::mr::set_current_device_resource(previous_);
-    }
+    if (pool_ != nullptr) { rmm::mr::set_current_device_resource_ref(previous_); }
   }
 
  private:
@@ -75,7 +74,7 @@ class test_gpu_pool {
   static constexpr std::size_t default_max_bytes     = 10ULL * 1024 * 1024 * 1024;
 
   std::unique_ptr<rmm::mr::cuda_async_memory_resource> pool_;
-  rmm::mr::device_memory_resource* previous_{nullptr};
+  rmm::device_async_resource_ref previous_{rmm::mr::get_current_device_resource_ref()};
 };
 
 test_gpu_pool global_pool;
diff --git a/test/utils/cudf_test_utils.cpp b/test/utils/cudf_test_utils.cpp
index 3a7a6ff..211638b 100644
--- a/test/utils/cudf_test_utils.cpp
+++ b/test/utils/cudf_test_utils.cpp
@@ -15,15 +15,18 @@
  * limitations under the License.
  */
 
+#include <cucascade/cuda_utils.hpp>
+
 #include <cudf/contiguous_split.hpp>
 #include <cudf/table/table.hpp>
 
-#include <cucascade/cuda_utils.hpp>
-
 #include <rmm/cuda_stream.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
+#include <rmm/mr/per_device_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
+#include <cuda/memory_resource>
 #include <cuda_runtime_api.h>
 
 #include <catch2/catch.hpp>
@@ -33,6 +36,7 @@
 #include <iomanip>
 #include <iostream>
 #include <memory>
+#include <optional>
 #include <sstream>
 #include <thread>
 #include <vector>
@@ -113,7 +117,8 @@ bool cudf_tables_have_equal_contents_on_stream(const cudf::table& left,
     std::vector<uint8_t> left_data(data_bytes);
     std::vector<uint8_t> right_data(data_bytes);
 
-    CUCASCADE_CUDA_TRY(cudaMemcpy(left_data.data(), left_col.head(), data_bytes, cudaMemcpyDeviceToHost));
+    CUCASCADE_CUDA_TRY(
+      cudaMemcpy(left_data.data(), left_col.head(), data_bytes, cudaMemcpyDeviceToHost));
     CUCASCADE_CUDA_TRY(
       cudaMemcpy(right_data.data(), right_col.head(), data_bytes, cudaMemcpyDeviceToHost));
 
@@ -150,51 +155,53 @@ void expect_cudf_tables_equal_on_stream(const cudf::table& left,
 }
 
 // Simple logging adaptor to print all RMM device allocations/frees with pointers/sizes/stream/tid
-class logging_device_resource : public rmm::mr::device_memory_resource {
+class logging_device_resource {
  public:
-  explicit logging_device_resource(rmm::mr::device_memory_resource* upstream) : _upstream(upstream)
-  {
-  }
+  explicit logging_device_resource(rmm::device_async_resource_ref upstream) : _upstream(upstream) {}
 
-  ~logging_device_resource() override = default;
+  ~logging_device_resource() = default;
 
- private:
-  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
+  void* allocate(cuda::stream_ref stream,
+                 std::size_t bytes,
+                 std::size_t alignment = alignof(std::max_align_t))
   {
-    void* ptr = _upstream->allocate(stream, bytes);
+    void* ptr = _upstream.allocate(stream, bytes, alignment);
     std::ostringstream oss;
-    oss << "[rmm-alloc] ptr=" << ptr << " size=" << bytes << " stream=" << stream.value()
+    oss << "[rmm-alloc] ptr=" << ptr << " size=" << bytes << " stream=" << stream.get()
         << " tid=" << std::this_thread::get_id();
     std::cout << oss.str() << std::endl << std::flush;
     return ptr;
   }
 
-  void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override
+  void deallocate(cuda::stream_ref stream,
+                  void* ptr,
+                  std::size_t bytes,
+                  std::size_t alignment = alignof(std::max_align_t)) noexcept
   {
     std::ostringstream oss;
-    oss << "[rmm-free ] ptr=" << ptr << " size=" << bytes << " stream=" << stream.value()
+    oss << "[rmm-free ] ptr=" << ptr << " size=" << bytes << " stream=" << stream.get()
         << " tid=" << std::this_thread::get_id();
     std::cout << oss.str() << std::endl << std::flush;
-    _upstream->deallocate(stream, ptr, bytes);
+    _upstream.deallocate(stream, ptr, bytes, alignment);
   }
 
-  bool do_is_equal(const rmm::mr::device_memory_resource& other) const noexcept override
-  {
-    return this == &other;
-  }
+  bool operator==(logging_device_resource const& other) const noexcept { return this == &other; }
+
+  friend void get_property(logging_device_resource const&, cuda::mr::device_accessible) noexcept {}
 
-  rmm::mr::device_memory_resource* _upstream;
+ private:
+  rmm::device_async_resource_ref _upstream;
 };
 
 // Install the logging resource once per process (wraps whatever the current device resource is)
 static void install_rmm_logging_resource_once()
 {
   static bool installed = false;
-  static std::unique_ptr<logging_device_resource> logging_resource;
+  static std::optional<cuda::mr::any_resource<cuda::mr::device_accessible>> logging_resource;
   if (!installed) {
-    auto* prev       = rmm::mr::get_current_device_resource();
-    logging_resource = std::make_unique<logging_device_resource>(prev);
-    rmm::mr::set_current_device_resource(logging_resource.get());
+    auto prev = rmm::mr::get_current_device_resource_ref();
+    logging_resource.emplace(logging_device_resource{prev});
+    rmm::mr::set_current_device_resource_ref(*logging_resource);
     installed = true;
     std::cout << "[rmm-log ] installed logging device resource adaptor" << std::endl << std::flush;
   }
diff --git a/test/utils/mock_test_utils.hpp b/test/utils/mock_test_utils.hpp
index d798934..27ca131 100644
--- a/test/utils/mock_test_utils.hpp
+++ b/test/utils/mock_test_utils.hpp
@@ -19,6 +19,7 @@
 
 #include "utils/test_memory_resources.hpp"
 
+#include <cucascade/cuda_utils.hpp>
 #include <cucascade/data/common.hpp>
 #include <cucascade/data/representation_converter.hpp>
 #include <cucascade/memory/common.hpp>
@@ -32,11 +33,12 @@
 #include <cudf/table/table.hpp>
 #include <cudf/types.hpp>
 
-#include <cucascade/cuda_utils.hpp>
-
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/mr/cuda_memory_resource.hpp>
+#include <rmm/mr/per_device_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
+#include <cuda/memory_resource>
 #include <cuda_runtime_api.h>
 
 #include <memory>
@@ -60,7 +62,7 @@ inline std::shared_ptr<memory::memory_space> make_mock_memory_space(memory::Tier
     config.device_id       = static_cast<int>(device_id);
     config.memory_capacity = 1024 * 1024 * 1024;
     config.mr_factory_fn   = [](int, size_t) {
-      return std::make_unique<rmm::mr::cuda_memory_resource>();
+      return cuda::mr::any_resource<cuda::mr::device_accessible>{rmm::mr::cuda_memory_resource{}};
     };
     return std::make_shared<memory::memory_space>(config);
   } else if (tier == memory::Tier::HOST) {
@@ -69,7 +71,8 @@ inline std::shared_ptr<memory::memory_space> make_mock_memory_space(memory::Tier
     config.memory_capacity      = 1024 * 1024 * 1024;
     config.initial_number_pools = 0;
     config.mr_factory_fn        = [](int, size_t) {
-      return std::make_unique<cucascade::memory::numa_region_pinned_host_memory_resource>(-1);
+      return cuda::mr::any_resource<cuda::mr::device_accessible>{
+        cucascade::memory::numa_region_pinned_host_memory_resource{-1}};
     };
     return std::make_shared<memory::memory_space>(config);
   } else if (tier == memory::Tier::DISK) {
@@ -154,8 +157,8 @@ inline std::vector<memory::memory_space_config> create_conversion_test_configs()
 inline cudf::table create_simple_cudf_table(
   int num_rows,
   int num_columns,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(),
-  rmm::cuda_stream_view stream        = rmm::cuda_stream_default)
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref(),
+  rmm::cuda_stream_view stream      = rmm::cuda_stream_default)
 {
   std::vector<std::unique_ptr<cudf::column>> columns;
 
@@ -187,15 +190,15 @@ inline cudf::table create_simple_cudf_table(
 
 inline cudf::table create_simple_cudf_table(
   int num_rows,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(),
-  rmm::cuda_stream_view stream        = rmm::cuda_stream_default)
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref(),
+  rmm::cuda_stream_view stream      = rmm::cuda_stream_default)
 {
   return create_simple_cudf_table(num_rows, 2, mr, stream);
 }
 
 inline cudf::table create_simple_cudf_table(
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(),
-  rmm::cuda_stream_view stream        = rmm::cuda_stream_default)
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref(),
+  rmm::cuda_stream_view stream      = rmm::cuda_stream_default)
 {
   return create_simple_cudf_table(100, 2, mr, stream);
 }
diff --git a/test/utils/test_memory_resources.hpp b/test/utils/test_memory_resources.hpp
index 2974390..d1c7c3a 100644
--- a/test/utils/test_memory_resources.hpp
+++ b/test/utils/test_memory_resources.hpp
@@ -17,43 +17,45 @@
 
 #pragma once
 
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
 #include <rmm/mr/per_device_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
-#include <memory>
+#include <cuda/memory_resource>
 
 namespace cucascade::test {
 
-class shared_device_resource : public rmm::mr::device_memory_resource {
+class shared_device_resource {
  public:
-  explicit shared_device_resource(rmm::mr::device_memory_resource* upstream) : upstream_(upstream)
-  {
-  }
+  explicit shared_device_resource(rmm::device_async_resource_ref upstream) : upstream_(upstream) {}
 
- private:
-  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
+  void* allocate(cuda::stream_ref stream,
+                 std::size_t bytes,
+                 std::size_t alignment = alignof(std::max_align_t))
   {
-    return upstream_->allocate(stream, bytes);
+    return upstream_.allocate(stream, bytes, alignment);
   }
 
-  void do_deallocate(void* p, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override
+  void deallocate(cuda::stream_ref stream,
+                  void* p,
+                  std::size_t bytes,
+                  std::size_t alignment = alignof(std::max_align_t)) noexcept
   {
-    upstream_->deallocate(stream, p, bytes);
+    upstream_.deallocate(stream, p, bytes, alignment);
   }
 
-  bool do_is_equal(rmm::mr::device_memory_resource const& other) const noexcept override
-  {
-    return upstream_ == &other;
-  }
+  bool operator==(shared_device_resource const&) const noexcept { return false; }
+
+  friend void get_property(shared_device_resource const&, cuda::mr::device_accessible) noexcept {}
 
-  rmm::mr::device_memory_resource* upstream_;
+ private:
+  rmm::device_async_resource_ref upstream_;
 };
 
-inline std::unique_ptr<rmm::mr::device_memory_resource> make_shared_current_device_resource(int,
-                                                                                            size_t)
+inline cuda::mr::any_resource<cuda::mr::device_accessible> make_shared_current_device_resource(
+  int, size_t)
 {
-  return std::make_unique<shared_device_resource>(rmm::mr::get_current_device_resource());
+  return cuda::mr::any_resource<cuda::mr::device_accessible>{
+    shared_device_resource{rmm::mr::get_current_device_resource_ref()}};
 }
 
 }  // namespace cucascade::test