Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
116 commits
Select commit Hold shift + click to select a range
e711b62
Initial plan
Copilot Feb 11, 2026
c881bc5
Replace gtest/gtest.h with framework.hpp in all unit tests
Copilot Feb 11, 2026
e227fdc
Convert mp_unit tests from gtest to framework.hpp
Copilot Feb 11, 2026
1e32e17
Address code review comments
Copilot Feb 11, 2026
eafa6fb
Add custom test framework and code coverage support
Copilot Feb 11, 2026
3d8a2e7
Add --gtest_filter support to framework
Copilot Feb 11, 2026
a10aff5
Address code review feedback
Copilot Feb 11, 2026
1818709
Fix CodeQL workflow by disabling test builds
Copilot Feb 11, 2026
5657e4a
Initial plan for fixing test build with GPU bypass
Copilot Feb 11, 2026
0eae34c
Fix test framework for building with Docker
Copilot Feb 11, 2026
4823583
Move FailHelper and SkipHelper into mscclpp::test namespace
Copilot Feb 11, 2026
403b2fb
Remove unnecessary CMake build artifacts from PR
Copilot Feb 11, 2026
305d157
Remove PerfTestResult and reuse TestResult directly
Copilot Feb 11, 2026
b1f458e
Convert test framework identifiers from snake_case to camelCase
Copilot Feb 11, 2026
6da12fa
Comprehensive plan for refactoring
Copilot Feb 11, 2026
7e4365f
Add performance test filtering and remove HTML coverage
Copilot Feb 11, 2026
b59196b
Integrate perf tests into unit_tests and add CI coverage step
Copilot Feb 11, 2026
ba0451a
Remove build2 CMake artifacts from repository
Copilot Feb 11, 2026
50f6a24
Remove test/perf/ directory completely
Copilot Feb 11, 2026
e26f8ab
Address PR review comments
Copilot Feb 11, 2026
7003fec
Simplify filter matching to use substring matching
Copilot Feb 11, 2026
30b9891
simplifying
chhwang Feb 19, 2026
b6ce0f2
simplify
chhwang Feb 19, 2026
d2efc2f
coverage update
chhwang Feb 19, 2026
4afbf78
minor
chhwang Feb 19, 2026
e40c72b
license text update
chhwang Feb 19, 2026
bed85b5
codecov upload
chhwang Feb 19, 2026
4d9acea
badge
chhwang Feb 19, 2026
b693d1b
lint issue
chhwang Feb 19, 2026
2b4adcc
fix lint
chhwang Feb 19, 2026
b64536f
Merge branch 'main' into copilot/remove-gtest-use-custom-framework
chhwang Feb 19, 2026
dcdd3fe
update UT CI
chhwang Feb 20, 2026
caeec75
updates
chhwang Feb 20, 2026
b9609f8
add coverage flags
chhwang Feb 20, 2026
41695ba
Merge branch 'main' into copilot/remove-gtest-use-custom-framework
chhwang Feb 20, 2026
febdbf9
WIP; need amd fix
chhwang Feb 21, 2026
c4afbe1
Merge branch 'main' into copilot/remove-gtest-use-custom-framework
chhwang Feb 23, 2026
04ebd9b
fix coverage file path
chhwang Feb 23, 2026
54e46ba
rocm fix wip
chhwang Feb 23, 2026
6c2bc8f
coverage fix
chhwang Feb 23, 2026
d0c709e
Fix Codecov token usage in coverage upload step
chhwang Feb 23, 2026
edda25d
Merge branch 'main' into copilot/remove-gtest-use-custom-framework
chhwang Feb 23, 2026
2f02d38
Merge branch 'main' into copilot/remove-gtest-use-custom-framework
chhwang Feb 24, 2026
2adf4a4
use variable group
chhwang Feb 24, 2026
98b023a
rocm fixes
chhwang Feb 24, 2026
22e5efb
gdrcopy install in container
chhwang Feb 24, 2026
2f27d7d
Update coverage report to exclude additional directories in lcov command
chhwang Feb 24, 2026
d88ee8d
Refine coverage report to include only mscclpp source and include dir…
chhwang Feb 24, 2026
11e27e2
Update coverage report commands to handle errors and adjust paths
chhwang Feb 24, 2026
25f31b4
updates
chhwang Feb 24, 2026
75dfdd9
Merge branch 'main' into chhwang/fix-ib-no-atomic
chhwang Feb 24, 2026
ac4d713
updates
chhwang Feb 24, 2026
ac022c3
a few updates
chhwang Feb 25, 2026
72407af
License
chhwang Feb 25, 2026
8effd97
License
chhwang Feb 25, 2026
fd7358d
License, lint
chhwang Feb 25, 2026
67d1706
optimized recv loop
chhwang Feb 26, 2026
060982d
updates
chhwang Feb 26, 2026
6b2f819
Merge branch 'main' into chhwang/fix-ib-no-atomic
chhwang Feb 26, 2026
eb99a26
Merge branch 'main' into copilot/remove-gtest-use-custom-framework
chhwang Feb 27, 2026
8c3a436
update CI
chhwang Feb 27, 2026
f4b8574
Merge branch 'main' into copilot/remove-gtest-use-custom-framework
chhwang Mar 3, 2026
3b56b08
data direct
chhwang Mar 4, 2026
448ceb6
updates
chhwang Mar 5, 2026
7ce841b
Updates
chhwang Mar 5, 2026
bbb9c10
Update Docker image
chhwang Mar 6, 2026
60ff32c
updates
chhwang Mar 6, 2026
00583da
separate pipeline for codecov
chhwang Mar 6, 2026
c699b8a
az pipeline refactoring
chhwang Mar 7, 2026
284d913
Merge branch 'main' into copilot/remove-gtest-use-custom-framework
chhwang Mar 7, 2026
75ac8be
fix
chhwang Mar 7, 2026
e0c7ddb
fix
chhwang Mar 7, 2026
c40a233
fix
chhwang Mar 7, 2026
375bc13
fix
chhwang Mar 7, 2026
bcb392f
updates
chhwang Mar 8, 2026
ea1dd65
fix
chhwang Mar 8, 2026
d6a6fa2
simplified
chhwang Mar 8, 2026
a9cf938
fix
chhwang Mar 9, 2026
6647338
debugging
chhwang Mar 10, 2026
7a87c2c
debugging
chhwang Mar 10, 2026
cf505d7
debugging
chhwang Mar 10, 2026
757c0ec
debugging
chhwang Mar 11, 2026
e2a5be4
debugging
chhwang Mar 11, 2026
2a705f5
fix merge
chhwang Mar 11, 2026
a38bd9d
Merge branch 'main' into copilot/remove-gtest-use-custom-framework
chhwang Mar 11, 2026
e2a9692
fix merge
chhwang Mar 11, 2026
2c4bab8
fix
chhwang Mar 16, 2026
a937ce4
debugging
chhwang Mar 16, 2026
d66d7e4
debugging
chhwang Mar 17, 2026
5a65cc7
debugging
chhwang Mar 17, 2026
2297a3d
updates
chhwang Mar 18, 2026
2756221
update
chhwang Mar 18, 2026
bff76d5
Fix TearDown() handling and replace assert() in perf tests
Copilot Mar 18, 2026
6082648
fix for npkit
chhwang Mar 18, 2026
79a0149
updates
chhwang Mar 18, 2026
0200532
Merge branch 'copilot/remove-gtest-use-custom-framework' into chhwang…
chhwang Mar 18, 2026
80f554e
Merge branch 'main' into chhwang/fix-ib-no-atomic
chhwang Mar 26, 2026
67f9933
fix data direct
chhwang Apr 1, 2026
d1124fb
revert
chhwang Apr 1, 2026
144046b
revert
chhwang Apr 1, 2026
f8e94d9
disable mlx5dv_reg_dmabuf_mr
chhwang Apr 1, 2026
4cf5332
updates
chhwang Apr 1, 2026
848b89b
64-bit token reconstruction
chhwang Apr 1, 2026
ff4d825
Merge branch 'main' into chhwang/fix-ib-no-atomic
chhwang Apr 1, 2026
94d0508
prerequisites update
chhwang Apr 1, 2026
553fd3b
lint
chhwang Apr 1, 2026
53099a7
Merge branch 'main' into chhwang/fix-ib-no-atomic
chhwang Apr 2, 2026
f62633a
mlx5dv bug fixes & enhanced unit tests perf reporting
chhwang Apr 4, 2026
b04fa2d
lint
chhwang Apr 4, 2026
4a3ae17
fix: destroy proxyService before connections in TearDown
chhwang Apr 7, 2026
934d40c
Revert "fix: destroy proxyService before connections in TearDown"
chhwang Apr 7, 2026
da53d05
Merge branch 'main' into chhwang/fix-ib-no-atomic
chhwang Apr 7, 2026
e2b0824
Merge branch 'main' into chhwang/fix-ib-no-atomic
chhwang Apr 8, 2026
fd503f2
Minor test updates
chhwang Apr 8, 2026
b2c136e
Merge branch 'main' into chhwang/fix-ib-no-atomic
chhwang Apr 8, 2026
b2fc080
Merge branch 'main' into chhwang/fix-ib-no-atomic
chhwang Apr 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/copilot-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ For testing after successful build:
# To run tests with two GPUs - two is enough for most tests
mpirun -np 2 ./build/bin/mp_unit_tests
# To run tests excluding IB-related ones (when IB is not available)
mpirun -np 2 ./build/bin/mp_unit_tests --gtest_filter=-*Ib*
mpirun -np 2 ./build/bin/mp_unit_tests --filter=-*Ib*
```

For building a Python package:
Expand Down
21 changes: 21 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,30 @@ if(MSCCLPP_USE_IB)
if(NOT IBVERBS_FOUND)
message(FATAL_ERROR "IBVerbs not found. Install libibverbs-dev or rdma-core-devel. If you want to disable InfiniBand, add `-DMSCCLPP_USE_IB=OFF` in your cmake command.")
endif()
find_package(MLX5)
if(MLX5_FOUND)
message(STATUS "MLX5 Direct Verbs found: ${MLX5_LIBRARIES}")
else()
message(STATUS "MLX5 Direct Verbs not found, mlx5dv optimizations disabled")
endif()
endif()
find_package(NUMA REQUIRED)
find_package(Threads REQUIRED)

option(MSCCLPP_USE_GDRCOPY "Use GDRCopy for direct GPU memory access from host." ON)
if(MSCCLPP_USE_ROCM)
set(MSCCLPP_USE_GDRCOPY OFF)
endif()
if(MSCCLPP_USE_GDRCOPY)
find_package(GDRCopy)
if(NOT GDRCOPY_FOUND)
message(STATUS "GDRCopy not found, disabling GDRCopy support")
set(MSCCLPP_USE_GDRCOPY OFF)
else()
message(STATUS "GDRCopy found: ${GDRCOPY_LIBRARIES}")
endif()
endif()

include(FetchContent)
FetchContent_Declare(json
GIT_REPOSITORY https://github.com/nlohmann/json.git
Expand Down
50 changes: 50 additions & 0 deletions cmake/FindGDRCopy.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

# Find the GDRCopy libraries (>= 2.5 required for gdr_pin_buffer_v2 / GDR_PIN_FLAG_FORCE_PCIE)
#
# The following variables are optionally searched for defaults
# GDRCOPY_ROOT_DIR: Base directory where all GDRCopy components are found
# GDRCOPY_INCLUDE_DIR: Directory where GDRCopy headers are found
# GDRCOPY_LIB_DIR: Directory where GDRCopy libraries are found

# The following are set after configuration is done:
# GDRCOPY_FOUND
# GDRCOPY_INCLUDE_DIRS
# GDRCOPY_LIBRARIES

find_path(GDRCOPY_INCLUDE_DIRS
NAMES gdrapi.h
HINTS
${GDRCOPY_INCLUDE_DIR}
${GDRCOPY_ROOT_DIR}
${GDRCOPY_ROOT_DIR}/include
/usr/local/include
/usr/include)

find_library(GDRCOPY_LIBRARIES
NAMES gdrapi
HINTS
${GDRCOPY_LIB_DIR}
${GDRCOPY_ROOT_DIR}
${GDRCOPY_ROOT_DIR}/lib
/usr/local/lib
/usr/lib
/usr/lib/x86_64-linux-gnu)

if(GDRCOPY_INCLUDE_DIRS)
include(CheckSymbolExists)
set(CMAKE_REQUIRED_INCLUDES ${GDRCOPY_INCLUDE_DIRS})
set(CMAKE_REQUIRED_LIBRARIES ${GDRCOPY_LIBRARIES})
check_symbol_exists(gdr_pin_buffer_v2 "gdrapi.h" GDRCOPY_HAS_PIN_BUFFER_V2)
unset(CMAKE_REQUIRED_LIBRARIES)
unset(CMAKE_REQUIRED_INCLUDES)
if(NOT GDRCOPY_HAS_PIN_BUFFER_V2)
message(STATUS "GDRCopy found but too old (gdr_pin_buffer_v2 not available). Requires >= 2.5.")
set(GDRCOPY_INCLUDE_DIRS GDRCOPY_INCLUDE_DIRS-NOTFOUND)
endif()
endif()

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(GDRCopy DEFAULT_MSG GDRCOPY_INCLUDE_DIRS GDRCOPY_LIBRARIES)
mark_as_advanced(GDRCOPY_INCLUDE_DIRS GDRCOPY_LIBRARIES)
38 changes: 38 additions & 0 deletions cmake/FindMLX5.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

# Find the MLX5 Direct Verbs (mlx5dv) library
#
# The following variables are optionally searched for defaults
# MLX5_ROOT_DIR: Base directory where all MLX5 components are found
# MLX5_INCLUDE_DIR: Directory where MLX5 headers are found
# MLX5_LIB_DIR: Directory where MLX5 libraries are found

# The following are set after configuration is done:
# MLX5_FOUND
# MLX5_INCLUDE_DIRS
# MLX5_LIBRARIES

find_path(MLX5_INCLUDE_DIRS
NAMES infiniband/mlx5dv.h
HINTS
${MLX5_INCLUDE_DIR}
${MLX5_ROOT_DIR}
${MLX5_ROOT_DIR}/include
/usr/local/include
/usr/include)

find_library(MLX5_LIBRARIES
NAMES mlx5
HINTS
${MLX5_LIB_DIR}
${MLX5_ROOT_DIR}
${MLX5_ROOT_DIR}/lib
/usr/local/lib
/usr/lib
/usr/lib/x86_64-linux-gnu)

include(FindPackageHandleStandardArgs)

find_package_handle_standard_args(MLX5 DEFAULT_MSG MLX5_INCLUDE_DIRS MLX5_LIBRARIES)
mark_as_advanced(MLX5_INCLUDE_DIRS MLX5_LIBRARIES)
19 changes: 18 additions & 1 deletion docker/base-dev-x.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,25 @@ RUN OS_ARCH=$(uname -m) && \
rm -rf ${CMAKE_HOME}.tar.gz && \
ln -s /usr/local/cmake-${CMAKE_VERSION}-linux-${OS_ARCH}/bin/* /usr/bin/

# Install ROCm-specific packages if building for ROCm
# Install GDRCopy userspace library for CUDA targets
ARG TARGET="cuda13.0"
RUN if echo "$TARGET" | grep -q "^cuda"; then \
GDRCOPY_VERSION="2.5.2" && \
apt-get update -y && \
apt-get install -y --no-install-recommends devscripts debhelper fakeroot pkg-config dkms && \
cd /tmp && \
curl -L https://github.com/NVIDIA/gdrcopy/archive/refs/tags/v${GDRCOPY_VERSION}.tar.gz -o gdrcopy.tar.gz && \
tar xzf gdrcopy.tar.gz && \
cd gdrcopy-${GDRCOPY_VERSION}/packages && \
./build-deb-packages.sh -k -t && \
dpkg -i libgdrapi_*.deb && \
cd / && rm -rf /tmp/gdrcopy* && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/*; \
fi

# Install ROCm-specific packages if building for ROCm
RUN if echo "$TARGET" | grep -q "^rocm"; then \
apt-get update -y && \
apt-get install -y hipblas hipsparse rocsparse rocrand hiprand rocthrust rocsolver rocfft hipfft hipcub rocprim rccl roctracer-dev && \
Expand Down
3 changes: 3 additions & 0 deletions docs/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
```
If you don't want to build Python module, you need to set `-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF` in your `cmake` command (see details in [Install from Source](#install-from-source)).
* (Optional, for benchmarks) MPI
* (Optional, for NVIDIA platforms) [GDRCopy](https://github.com/NVIDIA/gdrcopy) >= 2.5.1
* GDRCopy is required for IB `HostNoAtomic` mode, which uses CPU-side signal forwarding to GPU memory via BAR1 mappings. This mode is used on platforms where RDMA atomics are not available (e.g., when using Data Direct Virtual Functions).
* Install GDRCopy from source or via packages. See the [GDRCopy installation guide](https://github.com/NVIDIA/gdrcopy#installation).
* Others
* For RDMA (InfiniBand or RoCE) support on NVIDIA platforms, [GPUDirect RDMA](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/gpu-operator-rdma.html#gpudirect-rdma-and-gpudirect-storage) should be supported by the system. See the detailed prerequisites from [this NVIDIA documentation](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/gpu-operator-rdma.html#common-prerequisites).
* For NVLink SHARP (NVLS) support on NVIDIA platforms, the Linux kernel version should be 5.6 or above.
Expand Down
5 changes: 2 additions & 3 deletions include/mscclpp/atomic_device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,14 @@ MSCCLPP_HOST_DEVICE_INLINE T atomicFetchAdd(T* ptr, const T& val, cuda::memory_o
return cuda::atomic_ref<T, Scope>{*ptr}.fetch_add(val, memoryOrder);
}

#elif defined(MSCCLPP_DEVICE_HIP)
#else // !defined(MSCCLPP_DEVICE_CUDA)

constexpr auto memoryOrderRelaxed = __ATOMIC_RELAXED;
constexpr auto memoryOrderAcquire = __ATOMIC_ACQUIRE;
constexpr auto memoryOrderRelease = __ATOMIC_RELEASE;
constexpr auto memoryOrderAcqRel = __ATOMIC_ACQ_REL;
constexpr auto memoryOrderSeqCst = __ATOMIC_SEQ_CST;

// HIP does not have thread scope enums like CUDA
constexpr auto scopeSystem = 0;
constexpr auto scopeDevice = 0;

Expand All @@ -65,7 +64,7 @@ MSCCLPP_HOST_DEVICE_INLINE T atomicFetchAdd(T* ptr, const T& val, int memoryOrde
return __atomic_fetch_add(ptr, val, memoryOrder);
}

#endif // defined(MSCCLPP_DEVICE_HIP)
#endif // !defined(MSCCLPP_DEVICE_CUDA)

} // namespace mscclpp

Expand Down
5 changes: 5 additions & 0 deletions include/mscclpp/env.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ class Env {
/// Default is false.
const bool forceDisableNvls;

/// Env name: `MSCCLPP_FORCE_DISABLE_GDR`. If set to true, it will disable the GDRCopy support in MSCCL++.
/// When false (default), GDRCopy is auto-detected and enabled if the gdrcopy driver is loaded.
/// Default is false.
const bool forceDisableGdr;

private:
Env();

Expand Down
10 changes: 10 additions & 0 deletions include/mscclpp/semaphore.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ namespace mscclpp {
class Host2DeviceSemaphore {
private:
Semaphore semaphore_;
std::shared_ptr<uint64_t> inboundToken_;
detail::UniqueGpuPtr<uint64_t> expectedInboundToken_;
std::unique_ptr<uint64_t> outboundToken_;

Expand All @@ -29,6 +30,15 @@ class Host2DeviceSemaphore {
/// @param connection The connection associated with this semaphore.
Host2DeviceSemaphore(Communicator& communicator, const Connection& connection);

/// Destructor.
~Host2DeviceSemaphore();

/// Move constructor.
Host2DeviceSemaphore(Host2DeviceSemaphore&&) noexcept = default;

/// Move assignment operator.
Host2DeviceSemaphore& operator=(Host2DeviceSemaphore&&) noexcept = default;

/// Returns the connection.
/// @return The connection associated with this semaphore.
Connection& connection();
Expand Down
10 changes: 10 additions & 0 deletions src/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ if(MSCCLPP_USE_IB)
target_include_directories(mscclpp_obj SYSTEM PRIVATE ${IBVERBS_INCLUDE_DIRS})
target_link_libraries(mscclpp_obj PRIVATE ${IBVERBS_LIBRARIES})
target_compile_definitions(mscclpp_obj PUBLIC USE_IBVERBS)
if(MLX5_FOUND)
target_include_directories(mscclpp_obj SYSTEM PRIVATE ${MLX5_INCLUDE_DIRS})
target_compile_definitions(mscclpp_obj PRIVATE MSCCLPP_USE_MLX5DV)
endif()
endif()

if(MSCCLPP_USE_GDRCOPY)
target_include_directories(mscclpp_obj SYSTEM PRIVATE ${GDRCOPY_INCLUDE_DIRS})
target_link_libraries(mscclpp_obj PRIVATE ${GDRCOPY_LIBRARIES})
target_compile_definitions(mscclpp_obj PRIVATE MSCCLPP_USE_GDRCOPY)
endif()

set_target_properties(mscclpp_obj PROPERTIES LINKER_LANGUAGE CXX POSITION_INDEPENDENT_CODE 1 VERSION ${MSCCLPP_VERSION} SOVERSION ${MSCCLPP_SOVERSION})
Expand Down
Loading
Loading