Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion include/cucascade/data/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <cucascade/memory/memory_space.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device_memory_resource.hpp>

#include <concepts>
#include <cstddef>
Expand Down
11 changes: 6 additions & 5 deletions include/cucascade/memory/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
#pragma once

#include <rmm/error.hpp>
#include <rmm/mr/device_memory_resource.hpp>

#include <cuda/memory_resource>

#include <cstdint>
#include <cstring>
Expand Down Expand Up @@ -60,13 +61,13 @@ class memory_space_id {
};

using DeviceMemoryResourceFactoryFn =
std::function<std::unique_ptr<rmm::mr::device_memory_resource>(int device_id,
std::size_t capacity)>;
std::function<cuda::mr::any_resource<cuda::mr::device_accessible>(int device_id,
std::size_t capacity)>;

std::unique_ptr<rmm::mr::device_memory_resource> make_default_gpu_memory_resource(
cuda::mr::any_resource<cuda::mr::device_accessible> make_default_gpu_memory_resource(
int device_id, std::size_t capacity);

std::unique_ptr<rmm::mr::device_memory_resource> make_default_host_memory_resource(
cuda::mr::any_resource<cuda::mr::device_accessible> make_default_host_memory_resource(
int device_id, std::size_t capacity);

DeviceMemoryResourceFactoryFn make_default_allocator_for_tier(Tier tier);
Expand Down
63 changes: 30 additions & 33 deletions include/cucascade/memory/fixed_size_host_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
#include <rmm/aligned.hpp>
#include <rmm/cuda_device.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device_memory_resource.hpp>
#include <rmm/mr/pinned_host_memory_resource.hpp>
#include <rmm/resource_ref.hpp>

#include <cuda/memory_resource>
Expand Down Expand Up @@ -65,7 +63,7 @@ namespace memory {
* Modified to derive from device_memory_resource instead of host_memory_resource for RMM
* compatibility.
*/
class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {
class fixed_size_host_memory_resource {
public:
static constexpr std::size_t default_block_size = 1 << 20; ///< Default block size (1MB)
static constexpr std::size_t default_pool_size = 128; ///< Default number of blocks in pool
Expand Down Expand Up @@ -234,7 +232,7 @@ class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {
/**
* @brief Destructor - frees all allocated blocks.
*/
~fixed_size_host_memory_resource() override;
~fixed_size_host_memory_resource();

[[nodiscard]] std::size_t get_total_allocated_bytes() const noexcept
{
Expand Down Expand Up @@ -322,6 +320,34 @@ class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {
*/
std::size_t get_peak_total_allocated_bytes() const;

void* allocate(cuda::stream_ref stream,
std::size_t bytes,
std::size_t alignment = alignof(std::max_align_t));

void deallocate(cuda::stream_ref stream,
void* ptr,
std::size_t bytes,
std::size_t alignment = alignof(std::max_align_t)) noexcept;

void* allocate_sync(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
{
return allocate(cuda::stream_ref{}, bytes, alignment);
}

void deallocate_sync(void* ptr,
std::size_t bytes,
std::size_t alignment = alignof(std::max_align_t)) noexcept
{
deallocate(cuda::stream_ref{}, ptr, bytes, alignment);
}

[[nodiscard]] bool operator==(fixed_size_host_memory_resource const& other) const noexcept;

friend void get_property(fixed_size_host_memory_resource const&,
cuda::mr::device_accessible) noexcept
{
}

protected:
/**
* @brief grows reservation by a `bytes` size
Expand All @@ -340,35 +366,6 @@ class fixed_size_host_memory_resource : public rmm::mr::device_memory_resource {

std::size_t do_reserve_upto(std::size_t bytes, std::size_t mem_limit);

/**
* @brief Allocate memory of the specified size.
*
* @param bytes Size in bytes (must be <= block_size_)
* @param stream CUDA stream (ignored for host memory)
* @return void* Pointer to allocated memory
* @throws rmm::logic_error if allocation size exceeds block size
* @throws rmm::out_of_memory if no free blocks are available and upstream allocation fails
*/
void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override;

/**
* @brief Deallocate memory.
*
* @param ptr Pointer to deallocate
* @param bytes Size in bytes (must be <= block_size_)
* @param stream CUDA stream (ignored for host memory)
*/
void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override;

/**
* @brief Check if this resource is equal to another.
*
* @param other Other resource to compare
* @return bool True if equal
*/
[[nodiscard]] bool do_is_equal(
const rmm::mr::device_memory_resource& other) const noexcept override;

private:
/**
* @brief Expand the pool by allocating more blocks from upstream.
Expand Down
29 changes: 9 additions & 20 deletions include/cucascade/memory/memory_reservation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

#include <rmm/cuda_device.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device_memory_resource.hpp>
#include <rmm/resource_ref.hpp>

#include <concepts>
#include <memory>
Expand All @@ -42,23 +42,19 @@ class memory_space;

template <Tier TIER>
struct tier_memory_resource_trait {
using upstream_type = rmm::mr::device_memory_resource;
using type = rmm::mr::device_memory_resource;
Tier tier = TIER;
Tier tier = TIER;
};

template <>
struct tier_memory_resource_trait<Tier::HOST> {
using upstream_type = rmm::mr::device_memory_resource;
using type = fixed_size_host_memory_resource;
Tier tier = Tier::HOST;
using type = fixed_size_host_memory_resource;
Tier tier = Tier::HOST;
};

template <>
struct tier_memory_resource_trait<Tier::GPU> {
using upstream_type = rmm::mr::device_memory_resource;
using type = reservation_aware_resource_adaptor;
Tier tier = Tier::GPU;
using type = reservation_aware_resource_adaptor;
Tier tier = Tier::GPU;
};

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -216,22 +212,15 @@ class reservation {

[[nodiscard]] int device_id() const noexcept;

[[nodiscard]] rmm::mr::device_memory_resource* get_memory_resource() const noexcept;
[[nodiscard]] rmm::device_async_resource_ref get_memory_resource() const noexcept;

[[nodiscard]] const memory_space& get_memory_space() const noexcept;

template <typename T>
requires std::derived_from<T, rmm::mr::device_memory_resource>
T* get_memory_resource_as() const noexcept
{
return dynamic_cast<T*>(get_memory_resource());
}
T* get_memory_resource_as() const noexcept;

template <Tier TIER>
auto* get_memory_resource_of() const noexcept
{
return get_memory_resource_as<typename tier_memory_resource_trait<TIER>::type>();
}
auto* get_memory_resource_of() const noexcept;

//===----------------------------------------------------------------------===//
// Reservation Size Management
Expand Down
4 changes: 1 addition & 3 deletions include/cucascade/memory/memory_reservation_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cucascade/memory/memory_space.hpp>

#include <rmm/cuda_device.hpp>
#include <rmm/cuda_stream_view.hpp>

#include <condition_variable>
#include <filesystem>
Expand All @@ -34,9 +35,6 @@
#include <utility>
#include <vector>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device_memory_resource.hpp>

namespace cucascade {
namespace memory {

Expand Down
27 changes: 22 additions & 5 deletions include/cucascade/memory/memory_space.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
#include <rmm/cuda_stream.hpp>
#include <rmm/cuda_stream_pool.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device_memory_resource.hpp>
#include <rmm/resource_ref.hpp>

namespace cucascade {
Expand Down Expand Up @@ -113,13 +112,19 @@ class memory_space {
[[nodiscard]] size_t get_max_memory() const noexcept;

// Allocator management
[[nodiscard]] rmm::mr::device_memory_resource* get_default_allocator() const noexcept;
[[nodiscard]] rmm::device_async_resource_ref get_default_allocator() const noexcept;

template <typename T>
requires std::derived_from<T, rmm::mr::device_memory_resource>
T* get_memory_resource_as() const noexcept
{
return dynamic_cast<T*>(get_default_allocator());
T* result = nullptr;
std::visit(
[&result](const auto& ptr) {
using held_type = std::decay_t<decltype(*ptr)>;
if constexpr (std::is_same_v<held_type, T>) { result = ptr.get(); }
},
_reservation_allocator);
return result;
}

template <Tier TIER>
Expand Down Expand Up @@ -155,7 +160,7 @@ class memory_space {
std::make_shared<notification_channel>();

// Memory resources owned by this memory_space
std::unique_ptr<rmm::mr::device_memory_resource> _allocator;
cuda::mr::any_resource<cuda::mr::device_accessible> _allocator;
reserving_adaptor_type _reservation_allocator;
std::unique_ptr<rmm::cuda_stream_pool> _stream_pool;
std::shared_ptr<idisk_io_backend> _io_backend; ///< I/O backend for DISK tier (null for others)
Expand All @@ -169,5 +174,17 @@ struct memory_space_hash {
size_t operator()(const memory_space& ms) const;
};

template <typename T>
T* reservation::get_memory_resource_as() const noexcept
{
return _space->get_memory_resource_as<T>();
}

template <Tier TIER>
auto* reservation::get_memory_resource_of() const noexcept
{
return get_memory_resource_as<typename tier_memory_resource_trait<TIER>::type>();
}

} // namespace memory
} // namespace cucascade
48 changes: 34 additions & 14 deletions include/cucascade/memory/null_device_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,38 +17,58 @@

#pragma once

#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device_memory_resource.hpp>
#include <cuda/memory_resource>
#include <cuda/stream_ref>

#include <cstddef>

namespace cucascade {
namespace memory {

/**
* A no-op device_memory_resource used for DISK tier to satisfy API requirements.
* A no-op memory resource used for DISK tier to satisfy API requirements.
* - allocate always returns nullptr
* - deallocate is a no-op
*/
class null_device_memory_resource : public rmm::mr::device_memory_resource {
class null_device_memory_resource {
public:
null_device_memory_resource() = default;
~null_device_memory_resource() override = default;
null_device_memory_resource() = default;
~null_device_memory_resource() = default;

protected:
void* do_allocate([[maybe_unused]] std::size_t bytes,
[[maybe_unused]] rmm::cuda_stream_view stream) override
void* allocate([[maybe_unused]] cuda::stream_ref stream,
[[maybe_unused]] std::size_t bytes,
[[maybe_unused]] std::size_t alignment = alignof(std::max_align_t))
{
return nullptr;
}
void do_deallocate([[maybe_unused]] void* p,
[[maybe_unused]] std::size_t bytes,
[[maybe_unused]] rmm::cuda_stream_view stream) noexcept override

void deallocate([[maybe_unused]] cuda::stream_ref stream,
[[maybe_unused]] void* p,
[[maybe_unused]] std::size_t bytes,
[[maybe_unused]] std::size_t alignment = alignof(std::max_align_t)) noexcept
{
}
[[nodiscard]] bool do_is_equal(
const rmm::mr::device_memory_resource& other) const noexcept override

void* allocate_sync([[maybe_unused]] std::size_t bytes,
[[maybe_unused]] std::size_t alignment = alignof(std::max_align_t))
{
return nullptr;
}

void deallocate_sync([[maybe_unused]] void* p,
[[maybe_unused]] std::size_t bytes,
[[maybe_unused]] std::size_t alignment = alignof(std::max_align_t)) noexcept
{
}

bool operator==(null_device_memory_resource const& other) const noexcept
{
return this == &other;
}

friend void get_property(null_device_memory_resource const&, cuda::mr::device_accessible) noexcept
{
}
};

} // namespace memory
Expand Down
Loading
Loading