Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
705bd0c
added runtime cuda toggle
kSkip Feb 25, 2025
4b8feab
only set if built with cuda
kSkip Feb 25, 2025
3ea1be1
fixed missing macro condition
kSkip Feb 25, 2025
9ca4809
Fixed another missing directive
kSkip Feb 26, 2025
49f8b0a
Check image size when reading targa file (#3058)
tabudz Feb 27, 2025
2a82cc3
change the default u32string char type to char32_t (#3059)
wenbingl Feb 27, 2025
8bf1f52
Change version
davisking Mar 3, 2025
1a38b7a
change version back
davisking Mar 3, 2025
4a402ba
`throw()` -> `noexcept`
r-barnes Mar 3, 2025
3b5f484
Fix a bug when getting a gzip header extra field with inflate(). (#3063)
tabudz Mar 4, 2025
3b37d5c
Drop namespace std (#3067)
penguinpee Mar 16, 2025
f14e77b
Add Byte Pair Encoding (BPE) class for subword tokenization (#3056)
Cydral Mar 23, 2025
207c3d7
cleanup serialization code and add missing fields
davisking Mar 24, 2025
9ca200f
Some more cleanup
davisking Mar 24, 2025
44fbbeb
fix SyntaxWarning: invalid escape sequence '\(' (#3069)
Dobatymo Apr 5, 2025
883dd88
Merge branch 'refs/heads/master' into kSkip-cpu-cuda-toggle
davisking Apr 19, 2025
47af03f
Defined macros to wrap use_cuda() branches
kSkip May 6, 2025
dc33fa6
Added docs for new public functions
kSkip May 6, 2025
652af01
Add linear_ layer for neural networks (#3074)
Cydral May 3, 2025
f97db8f
ci: remove unsupported ubuntu 20.04 (#3075)
arrufat May 9, 2025
ec1881b
Add `reshape_to` layer for flexible tensor reshaping/rescaling (#3076)
Cydral May 23, 2025
696586c
tagging a new release
davisking May 15, 2025
b9f5fa1
put back to .99
davisking May 15, 2025
412ef35
Merge branch 'refs/heads/master' into kSkip-cpu-cuda-toggle
davisking May 24, 2025
0a77590
Make use_cuda() only set to true if there is also a GPU available
davisking May 24, 2025
458e785
Merge branch 'davisking:master' into cpu-cuda-toggle
kSkip Jul 16, 2025
345b9b7
Allocate CUDA memory only when use_cuda() returns true
kSkip Jul 18, 2025
8a6d0b7
Merge branch 'master' into cpu-cuda-toggle
kSkip Jan 5, 2026
40dc2dd
update to use newer sphinx
davisking Jan 25, 2026
5a490e0
update build rules to work with latest python build practices (#3134)
davisking Feb 5, 2026
94553fd
Update path to mkl and kiss fft headers (#3136)
kSkip Feb 15, 2026
0e667c1
Improve numerical robustness of find_min_trust_region()
davisking Mar 9, 2026
adf7593
fix(test/string): fix gcc 16 build issue (#3137)
ykshek Mar 17, 2026
1fdece3
tag 20.0.1
davisking Mar 29, 2026
fcf5fbe
set back to .99
davisking Mar 29, 2026
eea87b9
Add undo and redo functionality to imglab (#606) (#3143)
gzbykyasin Apr 25, 2026
dcc211e
Do some cleanup
davisking Apr 25, 2026
679b01e
Merge branch 'refs/heads/master' into kSkip-cpu-cuda-toggle
davisking Apr 25, 2026
d1b6bcc
switch to a single immutable global env var that controls cuda use
davisking Apr 25, 2026
5447b10
more cleanup
davisking May 3, 2026
dbc8a44
more cleanup
davisking May 3, 2026
7776832
cleanup
davisking May 3, 2026
f743b43
more cleanup
davisking May 3, 2026
1c8455d
more cleanup
davisking May 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 66 additions & 3 deletions dlib/cuda/cuda_dlib.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,42 @@
#include "cuda_dlib.h"
#include "cudnn_dlibapi.h"
#include <math_constants.h>
#include <cstdlib>
#include <cstring>


namespace dlib
{
namespace
{
bool cuda_device_available (
)
{
int num_devices;
return cudaGetDeviceCount(&num_devices) == cudaSuccess && num_devices > 0;
}

bool cuda_disabled_by_environment (
)
{
const char* var = std::getenv("DLIB_DISABLE_CUDA_USE");
return var != nullptr &&
std::strcmp(var, "") != 0 &&
std::strcmp(var, "0") != 0 &&
std::strcmp(var, "false") != 0 &&
std::strcmp(var, "False") != 0 &&
std::strcmp(var, "FALSE") != 0;
}

bool use_cuda_impl (
)
{
static const bool var = !cuda_disabled_by_environment() && cuda_device_available();
return var;
}

}

namespace cuda
{

Expand All @@ -18,21 +50,34 @@ namespace dlib
int dev
)
{
if (!use_cuda())
{
DLIB_CASSERT(dev == 0, "dlib::cuda::set_device(id) called with an invalid device id.");
return;
}

CHECK_CUDA(cudaSetDevice(dev));
}

int get_device (
)
{
int dev = 0;
CHECK_CUDA(cudaGetDevice(&dev));
if (use_cuda())
CHECK_CUDA(cudaGetDevice(&dev));
return dev;
}

std::string get_device_name (
int device
)
{
if (!use_cuda())
{
DLIB_CASSERT(device == 0, "dlib::cuda::get_device_name(device) called with an invalid device id.");
return "CUDA_DISABLED";
}

cudaDeviceProp props;
CHECK_CUDA(cudaGetDeviceProperties(&props, device));
return props.name;
Expand All @@ -41,19 +86,32 @@ namespace dlib
void set_current_device_blocking_sync(
)
{
CHECK_CUDA(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
if (use_cuda())
CHECK_CUDA(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
}

bool use_cuda(
)
{
return use_cuda_impl();
}

int get_num_devices (
)
{
if (!use_cuda())
return 0;

int num_devices;
CHECK_CUDA(cudaGetDeviceCount(&num_devices));
return num_devices;
}

bool can_access_peer (int device_id, int peer_device_id)
{
if (!use_cuda())
return false;

int can_access;
CHECK_CUDA(cudaDeviceCanAccessPeer(&can_access, device_id, peer_device_id));
return can_access != 0;
Expand All @@ -65,6 +123,9 @@ namespace dlib

void device_synchronize (int dev)
{
if (!use_cuda())
return;

raii_set_device set_dev(dev);
CHECK_CUDA(cudaDeviceSynchronize());
}
Expand All @@ -76,6 +137,9 @@ namespace dlib
int peer_device_id
) : call_disable(false), device_id(device_id), peer_device_id(peer_device_id)
{
if (!use_cuda())
return;

raii_set_device set_dev(device_id);

auto err = cudaDeviceEnablePeerAccess(peer_device_id, 0);
Expand Down Expand Up @@ -3220,4 +3284,3 @@ namespace dlib

}
}

6 changes: 6 additions & 0 deletions dlib/cuda/cuda_dlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ namespace dlib
int get_num_devices (
);

bool use_cuda(
);

std::string get_device_name (
int device
);
Expand Down Expand Up @@ -942,6 +945,9 @@ namespace dlib
inline int get_num_devices (
) { return 1; }

inline bool use_cuda(
) { return false; }

inline std::string get_device_name (
int device
)
Expand Down
12 changes: 8 additions & 4 deletions dlib/cuda/curand_dlibapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#ifdef DLIB_USE_CUDA

#include "curand_dlibapi.h"
#include "cuda_dlib.h"
#include <curand.h>
#include "../string.h"

Expand Down Expand Up @@ -47,11 +48,14 @@ namespace dlib
unsigned long long seed
) : handle(nullptr)
{
curandGenerator_t gen;
CHECK_CURAND(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT));
handle = gen;
if (use_cuda())
{
curandGenerator_t gen;
CHECK_CURAND(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT));
handle = gen;

CHECK_CURAND(curandSetPseudoRandomGeneratorSeed(gen, seed));
CHECK_CURAND(curandSetPseudoRandomGeneratorSeed(gen, seed));
}
}

curand_generator::
Expand Down
22 changes: 19 additions & 3 deletions dlib/cuda/gpu_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,19 @@ namespace dlib
}
else
{
if (!cuda::use_cuda())
{
std::memcpy(dest.host()+dest_offset, src.host()+src_offset, num*sizeof(float));
return;
}

// if we write to the entire thing then we can use device_write_only()
if (dest_offset == 0 && num == dest.size())
{
// copy the memory efficiently based on which copy is current in each object.
if (src.device_ready())
CHECK_CUDA(cudaMemcpy(dest.device_write_only(), src.device()+src_offset, num*sizeof(float), cudaMemcpyDeviceToDevice));
else
else
CHECK_CUDA(cudaMemcpy(dest.device_write_only(), src.host()+src_offset, num*sizeof(float), cudaMemcpyHostToDevice));
}
else
Expand All @@ -72,7 +78,7 @@ namespace dlib
CHECK_CUDA(cudaMemcpy(dest.host()+dest_offset, src.device()+src_offset, num*sizeof(float), cudaMemcpyDeviceToHost));
else if (dest.device_ready() && !src.device_ready())
CHECK_CUDA(cudaMemcpy(dest.device()+dest_offset, src.host()+src_offset, num*sizeof(float), cudaMemcpyHostToDevice));
else
else
CHECK_CUDA(cudaMemcpy(dest.host()+dest_offset, src.host()+src_offset, num*sizeof(float), cudaMemcpyHostToHost));
}
}
Expand Down Expand Up @@ -147,6 +153,9 @@ namespace dlib
void gpu_data::
async_copy_to_device() const
{
if (!cuda::use_cuda())
return;

if (!device_current)
{
if (device_in_use)
Expand Down Expand Up @@ -181,6 +190,7 @@ namespace dlib
host_current = true;
device_current = true;
device_in_use = false;
the_device_id = 0;
data_host.reset();
data_device.reset();
}
Expand All @@ -199,6 +209,13 @@ namespace dlib
device_current = true;
device_in_use = false;

if (!cuda::use_cuda())
{
data_host.reset(new float[new_size], std::default_delete<float[]>());
the_device_id = 0;
return;
}

try
{
CHECK_CUDA(cudaGetDevice(&the_device_id));
Expand Down Expand Up @@ -251,4 +268,3 @@ namespace dlib
#endif // DLIB_USE_CUDA

#endif // DLIB_GPU_DaTA_CPP_

24 changes: 22 additions & 2 deletions dlib/cuda/gpu_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@
namespace dlib
{

// ----------------------------------------------------------------------------------------

namespace cuda
{
bool use_cuda(
);
}

// ----------------------------------------------------------------------------------------

class gpu_data
Expand Down Expand Up @@ -108,6 +116,8 @@ namespace dlib
{
#ifndef DLIB_USE_CUDA
DLIB_CASSERT(false, "CUDA NOT ENABLED");
#else
DLIB_CASSERT(cuda::use_cuda(), "CUDA disabled");
#endif
copy_to_device();
device_in_use = true;
Expand All @@ -118,6 +128,8 @@ namespace dlib
{
#ifndef DLIB_USE_CUDA
DLIB_CASSERT(false, "CUDA NOT ENABLED");
#else
DLIB_CASSERT(cuda::use_cuda(), "CUDA disabled");
#endif
copy_to_device();
host_current = false;
Expand All @@ -129,6 +141,8 @@ namespace dlib
{
#ifndef DLIB_USE_CUDA
DLIB_CASSERT(false, "CUDA NOT ENABLED");
#else
DLIB_CASSERT(cuda::use_cuda(), "CUDA disabled");
#endif
wait_for_transfer_to_finish();
host_current = false;
Expand All @@ -141,7 +155,14 @@ namespace dlib
) const { return host_current; }

bool device_ready (
) const { return device_current && !have_active_transfer; }
) const
{
#ifdef DLIB_USE_CUDA
if (!cuda::use_cuda() && size() != 0)
return false;
#endif
return device_current && !have_active_transfer;
}

size_t size() const { return data_size; }

Expand Down Expand Up @@ -263,4 +284,3 @@ namespace dlib
}

#endif // DLIB_GPU_DaTA_H_

15 changes: 12 additions & 3 deletions dlib/cuda/gpu_data_abstract.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ namespace dlib
to the host do not happen before the relevant computations have completed.

If DLIB_USE_CUDA is not #defined then this object will not use CUDA at all.
Instead, it will simply store one host side memory block of floats.
Instead, it will simply store one host side memory block of floats.
Similarly, if DLIB_USE_CUDA is #defined but cuda::use_cuda() == false,
then this object will be host only and will not allocate a CUDA device
memory block.

THREAD SAFETY
Instances of this object are not thread-safe. So don't touch one from
Expand Down Expand Up @@ -67,6 +70,8 @@ namespace dlib
);
/*!
ensures
- if (cuda::use_cuda() == false) then
- this function does nothing.
- if (!device_ready()) then
- Begins asynchronously copying host data to the device once it is safe
to do so. I.e. This function will wait until any previously
Expand Down Expand Up @@ -99,10 +104,12 @@ namespace dlib
) const;
/*!
ensures
- returns true if and only if the device's copy of the data is current.
- returns true if and only if the device's copy of the data exists and is current.
The device's data is current if there aren't any modifications to the
data which were made on the host side that have yet to be copied to the
device.
- if (DLIB_USE_CUDA is defined && cuda::use_cuda() == false && size() != 0) then
- returns false.
!*/

const float* host(
Expand Down Expand Up @@ -153,6 +160,7 @@ namespace dlib
/*!
requires
- DLIB_USE_CUDA is #defined
- cuda::use_cuda() == true
ensures
- returns a pointer to the device memory block of size() contiguous float
values or nullptr if size()==0.
Expand All @@ -167,6 +175,7 @@ namespace dlib
/*!
requires
- DLIB_USE_CUDA is #defined
- cuda::use_cuda() == true
ensures
- returns a pointer to the device memory block of size() contiguous float
values or nullptr if size()==0.
Expand All @@ -182,6 +191,7 @@ namespace dlib
/*!
requires
- DLIB_USE_CUDA is #defined
- cuda::use_cuda() == true
ensures
- This function returns the same pointer as device(), except that it never
performs a host to device memory copy. Instead, it immediately marks the
Expand Down Expand Up @@ -263,4 +273,3 @@ namespace dlib
}

#endif // DLIB_GPU_DaTA_ABSTRACT_H_

Loading
Loading