NVIDIA · bdice · Apr 2, 2026 · Apr 15, 2026
diff --git a/include/cucascade/data/common.hpp b/include/cucascade/data/common.hpp
@@ -20,7 +20,6 @@
 #include <cucascade/memory/memory_space.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
 
 #include <concepts>
 #include <cstddef>

diff --git a/include/cucascade/memory/common.hpp b/include/cucascade/memory/common.hpp
@@ -18,7 +18,8 @@
 #pragma once
 
 #include <rmm/error.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
+
+#include <cuda/memory_resource>
 
 #include <cstdint>
 #include <cstring>
@@ -60,13 +61,13 @@ class memory_space_id {
 };
 
 using DeviceMemoryResourceFactoryFn =
-  std::function<std::unique_ptr<rmm::mr::device_memory_resource>(int device_id,
-                                                                 std::size_t capacity)>;
+  std::function<cuda::mr::any_resource<cuda::mr::device_accessible>(int device_id,
+                                                                    std::size_t capacity)>;
 
-std::unique_ptr<rmm::mr::device_memory_resource> make_default_gpu_memory_resource(
+cuda::mr::any_resource<cuda::mr::device_accessible> make_default_gpu_memory_resource(
   int device_id, std::size_t capacity);
 
-std::unique_ptr<rmm::mr::device_memory_resource> make_default_host_memory_resource(
+cuda::mr::any_resource<cuda::mr::device_accessible> make_default_host_memory_resource(
   int device_id, std::size_t capacity);
 
 DeviceMemoryResourceFactoryFn make_default_allocator_for_tier(Tier tier);

diff --git a/include/cucascade/memory/fixed_size_host_memory_resource.hpp b/include/cucascade/memory/fixed_size_host_memory_resource.hpp
@@ -26,8 +26,6 @@
 #include <rmm/aligned.hpp>
 #include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
-#include <rmm/mr/pinned_host_memory_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 #include <cuda/memory_resource>
@@ -65,7 +63,7 @@ namespace memory {
  * Modified to derive from device_memory_resource instead of host_memory_resource for RMM
  * compatibility.
  */
-class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {
+class fixed_size_host_memory_resource {
  public:
   static constexpr std::size_t default_block_size = 1 << 20;  ///< Default block size (1MB)
   static constexpr std::size_t default_pool_size  = 128;      ///< Default number of blocks in pool
@@ -234,7 +232,7 @@ class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {
   /**
    * @brief Destructor - frees all allocated blocks.
    */
-  ~fixed_size_host_memory_resource() override;
+  ~fixed_size_host_memory_resource();
 
   [[nodiscard]] std::size_t get_total_allocated_bytes() const noexcept
   {
@@ -322,6 +320,34 @@ class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {
    */
   std::size_t get_peak_total_allocated_bytes() const;
 
+  void* allocate(cuda::stream_ref stream,
+                 std::size_t bytes,
+                 std::size_t alignment = alignof(std::max_align_t));
+
+  void deallocate(cuda::stream_ref stream,
+                  void* ptr,
+                  std::size_t bytes,
+                  std::size_t alignment = alignof(std::max_align_t)) noexcept;
+
+  void* allocate_sync(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
+  {
+    return allocate(cuda::stream_ref{}, bytes, alignment);
+  }
+
+  void deallocate_sync(void* ptr,
+                       std::size_t bytes,
+                       std::size_t alignment = alignof(std::max_align_t)) noexcept
+  {
+    deallocate(cuda::stream_ref{}, ptr, bytes, alignment);
+  }
+
+  [[nodiscard]] bool operator==(fixed_size_host_memory_resource const& other) const noexcept;
+
+  friend void get_property(fixed_size_host_memory_resource const&,
+                           cuda::mr::device_accessible) noexcept
+  {
+  }
+
  protected:
   /**
    * @brief grows reservation by a `bytes` size
@@ -340,35 +366,6 @@ class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {
 
   std::size_t do_reserve_upto(std::size_t bytes, std::size_t mem_limit);
 
-  /**
-   * @brief Allocate memory of the specified size.
-   *
-   * @param bytes Size in bytes (must be <= block_size_)
-   * @param stream CUDA stream (ignored for host memory)
-   * @return void* Pointer to allocated memory
-   * @throws rmm::logic_error if allocation size exceeds block size
-   * @throws rmm::out_of_memory if no free blocks are available and upstream allocation fails
-   */
-  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override;
-
-  /**
-   * @brief Deallocate memory.
-   *
-   * @param ptr Pointer to deallocate
-   * @param bytes Size in bytes (must be <= block_size_)
-   * @param stream CUDA stream (ignored for host memory)
-   */
-  void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override;
-
-  /**
-   * @brief Check if this resource is equal to another.
-   *
-   * @param other Other resource to compare
-   * @return bool True if equal
-   */
-  [[nodiscard]] bool do_is_equal(
-    const rmm::mr::device_memory_resource& other) const noexcept override;
-
  private:
   /**
    * @brief Expand the pool by allocating more blocks from upstream.

diff --git a/include/cucascade/memory/memory_reservation.hpp b/include/cucascade/memory/memory_reservation.hpp
@@ -22,7 +22,7 @@
 
 #include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <concepts>
 #include <memory>
@@ -42,23 +42,19 @@ class memory_space;
 
 template <Tier TIER>
 struct tier_memory_resource_trait {
-  using upstream_type = rmm::mr::device_memory_resource;
-  using type          = rmm::mr::device_memory_resource;
-  Tier tier           = TIER;
+  Tier tier = TIER;
 };
 
 template <>
 struct tier_memory_resource_trait<Tier::HOST> {
-  using upstream_type = rmm::mr::device_memory_resource;
-  using type          = fixed_size_host_memory_resource;
-  Tier tier           = Tier::HOST;
+  using type = fixed_size_host_memory_resource;
+  Tier tier  = Tier::HOST;
 };
 
 template <>
 struct tier_memory_resource_trait<Tier::GPU> {
-  using upstream_type = rmm::mr::device_memory_resource;
-  using type          = reservation_aware_resource_adaptor;
-  Tier tier           = Tier::GPU;
+  using type = reservation_aware_resource_adaptor;
+  Tier tier  = Tier::GPU;
 };
 
 //===----------------------------------------------------------------------===//
@@ -216,22 +212,15 @@ class reservation {
 
   [[nodiscard]] int device_id() const noexcept;
 
-  [[nodiscard]] rmm::mr::device_memory_resource* get_memory_resource() const noexcept;
+  [[nodiscard]] rmm::device_async_resource_ref get_memory_resource() const noexcept;
 
   [[nodiscard]] const memory_space& get_memory_space() const noexcept;
 
   template <typename T>
-    requires std::derived_from<T, rmm::mr::device_memory_resource>
-  T* get_memory_resource_as() const noexcept
-  {
-    return dynamic_cast<T*>(get_memory_resource());
-  }
+  T* get_memory_resource_as() const noexcept;
 
   template <Tier TIER>
-  auto* get_memory_resource_of() const noexcept
-  {
-    return get_memory_resource_as<typename tier_memory_resource_trait<TIER>::type>();
-  }
+  auto* get_memory_resource_of() const noexcept;
 
   //===----------------------------------------------------------------------===//
   // Reservation Size Management

diff --git a/include/cucascade/memory/memory_reservation_manager.hpp b/include/cucascade/memory/memory_reservation_manager.hpp
@@ -22,6 +22,7 @@
 #include <cucascade/memory/memory_space.hpp>
 
 #include <rmm/cuda_device.hpp>
+#include <rmm/cuda_stream_view.hpp>
 
 #include <condition_variable>
 #include <filesystem>
@@ -34,9 +35,6 @@
 #include <utility>
 #include <vector>
 
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
-
 namespace cucascade {
 namespace memory {
 

diff --git a/include/cucascade/memory/memory_space.hpp b/include/cucascade/memory/memory_space.hpp
@@ -33,7 +33,6 @@
 #include <rmm/cuda_stream.hpp>
 #include <rmm/cuda_stream_pool.hpp>
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
 #include <rmm/resource_ref.hpp>
 
 namespace cucascade {
@@ -113,13 +112,19 @@ class memory_space {
   [[nodiscard]] size_t get_max_memory() const noexcept;
 
   // Allocator management
-  [[nodiscard]] rmm::mr::device_memory_resource* get_default_allocator() const noexcept;
+  [[nodiscard]] rmm::device_async_resource_ref get_default_allocator() const noexcept;
 
   template <typename T>
-    requires std::derived_from<T, rmm::mr::device_memory_resource>
   T* get_memory_resource_as() const noexcept
   {
-    return dynamic_cast<T*>(get_default_allocator());
+    T* result = nullptr;
+    std::visit(
+      [&result](const auto& ptr) {
+        using held_type = std::decay_t<decltype(*ptr)>;
+        if constexpr (std::is_same_v<held_type, T>) { result = ptr.get(); }
+      },
+      _reservation_allocator);
+    return result;
   }
 
   template <Tier TIER>
@@ -155,7 +160,7 @@ class memory_space {
     std::make_shared<notification_channel>();
 
   // Memory resources owned by this memory_space
-  std::unique_ptr<rmm::mr::device_memory_resource> _allocator;
+  cuda::mr::any_resource<cuda::mr::device_accessible> _allocator;
   reserving_adaptor_type _reservation_allocator;
   std::unique_ptr<rmm::cuda_stream_pool> _stream_pool;
   std::shared_ptr<idisk_io_backend> _io_backend;  ///< I/O backend for DISK tier (null for others)
@@ -169,5 +174,17 @@ struct memory_space_hash {
   size_t operator()(const memory_space& ms) const;
 };
 
+template <typename T>
+T* reservation::get_memory_resource_as() const noexcept
+{
+  return _space->get_memory_resource_as<T>();
+}
+
+template <Tier TIER>
+auto* reservation::get_memory_resource_of() const noexcept
+{
+  return get_memory_resource_as<typename tier_memory_resource_trait<TIER>::type>();
+}
+
 }  // namespace memory
 }  // namespace cucascade
diff --git a/include/cucascade/memory/null_device_memory_resource.hpp b/include/cucascade/memory/null_device_memory_resource.hpp
@@ -17,38 +17,58 @@
 
 #pragma once
 
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device_memory_resource.hpp>
+#include <cuda/memory_resource>
+#include <cuda/stream_ref>
+
+#include <cstddef>
 
 namespace cucascade {
 namespace memory {
 
 /**
- * A no-op device_memory_resource used for DISK tier to satisfy API requirements.
+ * A no-op memory resource used for DISK tier to satisfy API requirements.
  * - allocate always returns nullptr
  * - deallocate is a no-op
  */
-class null_device_memory_resource : public rmm::mr::device_memory_resource {
+class null_device_memory_resource {
  public:
-  null_device_memory_resource()           = default;
-  ~null_device_memory_resource() override = default;
+  null_device_memory_resource()  = default;
+  ~null_device_memory_resource() = default;
 
- protected:
-  void* do_allocate([[maybe_unused]] std::size_t bytes,
-                    [[maybe_unused]] rmm::cuda_stream_view stream) override
+  void* allocate([[maybe_unused]] cuda::stream_ref stream,
+                 [[maybe_unused]] std::size_t bytes,
+                 [[maybe_unused]] std::size_t alignment = alignof(std::max_align_t))
   {
     return nullptr;
   }
-  void do_deallocate([[maybe_unused]] void* p,
-                     [[maybe_unused]] std::size_t bytes,
-                     [[maybe_unused]] rmm::cuda_stream_view stream) noexcept override
+
+  void deallocate([[maybe_unused]] cuda::stream_ref stream,
+                  [[maybe_unused]] void* p,
+                  [[maybe_unused]] std::size_t bytes,
+                  [[maybe_unused]] std::size_t alignment = alignof(std::max_align_t)) noexcept
   {
   }
-  [[nodiscard]] bool do_is_equal(
-    const rmm::mr::device_memory_resource& other) const noexcept override
+
+  void* allocate_sync([[maybe_unused]] std::size_t bytes,
+                      [[maybe_unused]] std::size_t alignment = alignof(std::max_align_t))
+  {
+    return nullptr;
+  }
+
+  void deallocate_sync([[maybe_unused]] void* p,
+                       [[maybe_unused]] std::size_t bytes,
+                       [[maybe_unused]] std::size_t alignment = alignof(std::max_align_t)) noexcept
+  {
+  }
+
+  bool operator==(null_device_memory_resource const& other) const noexcept
   {
     return this == &other;
   }
+
+  friend void get_property(null_device_memory_resource const&, cuda::mr::device_accessible) noexcept
+  {
+  }
 };
 
 }  // namespace memory