davisking · davisking · May 4, 2026 · Feb 25, 2025 · Feb 25, 2025 · Feb 25, 2025
diff --git a/dlib/cuda/cuda_dlib.cu b/dlib/cuda/cuda_dlib.cu
@@ -5,10 +5,42 @@
 #include "cuda_dlib.h"
 #include "cudnn_dlibapi.h"
 #include <math_constants.h>
+#include <cstdlib>
+#include <cstring>
 
 
 namespace dlib 
 { 
+    namespace
+    {
+        bool cuda_device_available (
+        )
+        {
+            int num_devices;
+            return cudaGetDeviceCount(&num_devices) == cudaSuccess && num_devices > 0;
+        }
+
+        bool cuda_disabled_by_environment (
+        )
+        {
+            const char* var = std::getenv("DLIB_DISABLE_CUDA_USE");
+            return var != nullptr &&
+                std::strcmp(var, "") != 0 &&
+                std::strcmp(var, "0") != 0 &&
+                std::strcmp(var, "false") != 0 &&
+                std::strcmp(var, "False") != 0 &&
+                std::strcmp(var, "FALSE") != 0;
+        }
+
+        bool use_cuda_impl (
+        )
+        {
+            static const bool var = !cuda_disabled_by_environment() && cuda_device_available();
+            return var;
+        }
+
+    }
+
     namespace cuda 
     {
 
@@ -18,21 +50,34 @@ namespace dlib
             int dev
         )
         {
+            if (!use_cuda())
+            {
+                DLIB_CASSERT(dev == 0, "dlib::cuda::set_device(id) called with an invalid device id.");
+                return;
+            }
+
             CHECK_CUDA(cudaSetDevice(dev));
         }
 
         int get_device (
         )
         {
             int dev = 0;
-            CHECK_CUDA(cudaGetDevice(&dev));
+            if (use_cuda())
+                CHECK_CUDA(cudaGetDevice(&dev));
             return dev;
         }
 
         std::string get_device_name (
             int device
         )
         {
+            if (!use_cuda())
+            {
+                DLIB_CASSERT(device == 0, "dlib::cuda::get_device_name(device) called with an invalid device id.");
+                return "CUDA_DISABLED";
+            }
+
             cudaDeviceProp props;
             CHECK_CUDA(cudaGetDeviceProperties(&props, device));
             return props.name;
@@ -41,19 +86,32 @@ namespace dlib
         void set_current_device_blocking_sync(
         )
         {
-            CHECK_CUDA(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+            if (use_cuda())
+                CHECK_CUDA(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+        }
+
+        bool use_cuda(
+        )
+        {
+            return use_cuda_impl();
         }
 
         int get_num_devices (
         )
         {
+            if (!use_cuda())
+                return 0;
+
             int num_devices;
             CHECK_CUDA(cudaGetDeviceCount(&num_devices));
             return num_devices;
         }
 
         bool can_access_peer (int device_id, int peer_device_id)
         {
+            if (!use_cuda())
+                return false;
+
             int can_access;
             CHECK_CUDA(cudaDeviceCanAccessPeer(&can_access, device_id, peer_device_id));
             return can_access != 0;
@@ -65,6 +123,9 @@ namespace dlib
 
         void device_synchronize (int dev) 
         { 
+            if (!use_cuda())
+                return;
+
             raii_set_device set_dev(dev);
             CHECK_CUDA(cudaDeviceSynchronize());
         }
@@ -76,6 +137,9 @@ namespace dlib
             int peer_device_id
         ) : call_disable(false), device_id(device_id), peer_device_id(peer_device_id)
         {
+            if (!use_cuda())
+                return;
+
             raii_set_device set_dev(device_id);
 
             auto err = cudaDeviceEnablePeerAccess(peer_device_id, 0);
@@ -3220,4 +3284,3 @@ namespace dlib
 
     }
 }
-
diff --git a/dlib/cuda/cuda_dlib.h b/dlib/cuda/cuda_dlib.h
@@ -25,6 +25,9 @@ namespace dlib
         int get_num_devices (
         );
 
+        bool use_cuda(
+        );
+
         std::string get_device_name (
             int device
         );
@@ -942,6 +945,9 @@ namespace dlib
         inline int get_num_devices (
         ) { return 1; }
 
+        inline bool use_cuda(
+        ) { return false; }
+
         inline std::string get_device_name (
             int device
         ) 

diff --git a/dlib/cuda/curand_dlibapi.cpp b/dlib/cuda/curand_dlibapi.cpp
@@ -6,6 +6,7 @@
 #ifdef DLIB_USE_CUDA
 
 #include "curand_dlibapi.h"
+#include "cuda_dlib.h"
 #include <curand.h>
 #include "../string.h"
 
@@ -47,11 +48,14 @@ namespace dlib
             unsigned long long seed
         ) : handle(nullptr)
         {
-            curandGenerator_t gen;
-            CHECK_CURAND(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT));
-            handle = gen;
+            if (use_cuda())
+            {
+                curandGenerator_t gen;
+                CHECK_CURAND(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT));
+                handle = gen;
 
-            CHECK_CURAND(curandSetPseudoRandomGeneratorSeed(gen, seed));
+                CHECK_CURAND(curandSetPseudoRandomGeneratorSeed(gen, seed));
+            }
         }
 
         curand_generator::

diff --git a/dlib/cuda/gpu_data.cpp b/dlib/cuda/gpu_data.cpp
@@ -54,13 +54,19 @@ namespace dlib
         }
         else
         {
+            if (!cuda::use_cuda())
+            {
+                std::memcpy(dest.host()+dest_offset, src.host()+src_offset, num*sizeof(float));
+                return;
+            }
+
             // if we write to the entire thing then we can use device_write_only()
             if (dest_offset == 0 && num == dest.size())
             {
                 // copy the memory efficiently based on which copy is current in each object.
                 if (src.device_ready())
                     CHECK_CUDA(cudaMemcpy(dest.device_write_only(), src.device()+src_offset,  num*sizeof(float), cudaMemcpyDeviceToDevice));
-                else 
+                else
                     CHECK_CUDA(cudaMemcpy(dest.device_write_only(), src.host()+src_offset,    num*sizeof(float), cudaMemcpyHostToDevice));
             }
             else
@@ -72,7 +78,7 @@ namespace dlib
                     CHECK_CUDA(cudaMemcpy(dest.host()+dest_offset, src.device()+src_offset,   num*sizeof(float), cudaMemcpyDeviceToHost));
                 else if (dest.device_ready() && !src.device_ready())
                     CHECK_CUDA(cudaMemcpy(dest.device()+dest_offset, src.host()+src_offset,   num*sizeof(float), cudaMemcpyHostToDevice));
-                else 
+                else
                     CHECK_CUDA(cudaMemcpy(dest.host()+dest_offset, src.host()+src_offset,     num*sizeof(float), cudaMemcpyHostToHost));
             }
         }
@@ -147,6 +153,9 @@ namespace dlib
     void gpu_data::
     async_copy_to_device() const
     {
+        if (!cuda::use_cuda())
+            return;
+
         if (!device_current)
         {
             if (device_in_use)
@@ -181,6 +190,7 @@ namespace dlib
             host_current = true;
             device_current = true;
             device_in_use = false;
+            the_device_id = 0;
             data_host.reset();
             data_device.reset();
         }
@@ -199,6 +209,13 @@ namespace dlib
             device_current = true;
             device_in_use = false;
 
+            if (!cuda::use_cuda())
+            {
+                data_host.reset(new float[new_size], std::default_delete<float[]>());
+                the_device_id = 0;
+                return;
+            }
+
             try
             {
                 CHECK_CUDA(cudaGetDevice(&the_device_id));
@@ -251,4 +268,3 @@ namespace dlib
 #endif // DLIB_USE_CUDA
 
 #endif // DLIB_GPU_DaTA_CPP_
-
diff --git a/dlib/cuda/gpu_data.h b/dlib/cuda/gpu_data.h
@@ -12,6 +12,14 @@
 namespace dlib
 {
 
+// ----------------------------------------------------------------------------------------
+
+    namespace cuda
+    {
+        bool use_cuda(
+        );
+    }
+
 // ----------------------------------------------------------------------------------------
 
     class gpu_data 
@@ -108,6 +116,8 @@ namespace dlib
         { 
 #ifndef DLIB_USE_CUDA
             DLIB_CASSERT(false, "CUDA NOT ENABLED");
+#else
+            DLIB_CASSERT(cuda::use_cuda(), "CUDA disabled");
 #endif
             copy_to_device();
             device_in_use = true;
@@ -118,6 +128,8 @@ namespace dlib
         {
 #ifndef DLIB_USE_CUDA
             DLIB_CASSERT(false, "CUDA NOT ENABLED");
+#else
+            DLIB_CASSERT(cuda::use_cuda(), "CUDA disabled");
 #endif
             copy_to_device();
             host_current = false;
@@ -129,6 +141,8 @@ namespace dlib
         {
 #ifndef DLIB_USE_CUDA
             DLIB_CASSERT(false, "CUDA NOT ENABLED");
+#else
+            DLIB_CASSERT(cuda::use_cuda(), "CUDA disabled");
 #endif
             wait_for_transfer_to_finish();
             host_current = false;
@@ -141,7 +155,14 @@ namespace dlib
         ) const { return host_current; }
 
         bool device_ready (
-        ) const { return device_current && !have_active_transfer; }
+        ) const
+        {
+#ifdef DLIB_USE_CUDA
+            if (!cuda::use_cuda() && size() != 0)
+                return false;
+#endif
+            return device_current && !have_active_transfer;
+        }
 
         size_t size() const { return data_size; }
 
@@ -263,4 +284,3 @@ namespace dlib
 }
 
 #endif // DLIB_GPU_DaTA_H_
-
diff --git a/dlib/cuda/gpu_data_abstract.h b/dlib/cuda/gpu_data_abstract.h
@@ -28,7 +28,10 @@ namespace dlib
                 to the host do not happen before the relevant computations have completed.
 
                 If DLIB_USE_CUDA is not #defined then this object will not use CUDA at all.
-                Instead, it will simply store one host side memory block of floats.  
+                Instead, it will simply store one host side memory block of floats.
+                Similarly, if DLIB_USE_CUDA is #defined but cuda::use_cuda() == false,
+                then this object will be host only and will not allocate a CUDA device
+                memory block.
 
             THREAD SAFETY
                 Instances of this object are not thread-safe.  So don't touch one from
@@ -67,6 +70,8 @@ namespace dlib
         ); 
         /*!
             ensures
+                - if (cuda::use_cuda() == false) then
+                    - this function does nothing.
                 - if (!device_ready()) then
                     - Begins asynchronously copying host data to the device once it is safe
                       to do so.  I.e. This function will wait until any previously
@@ -99,10 +104,12 @@ namespace dlib
         ) const; 
         /*!
             ensures
-                - returns true if and only if the device's copy of the data is current.
+                - returns true if and only if the device's copy of the data exists and is current.
                   The device's data is current if there aren't any modifications to the
                   data which were made on the host side that have yet to be copied to the
                   device.
+                - if (DLIB_USE_CUDA is defined && cuda::use_cuda() == false && size() != 0) then
+                    - returns false.
         !*/
 
         const float* host(
@@ -153,6 +160,7 @@ namespace dlib
         /*!
             requires
                 - DLIB_USE_CUDA is #defined
+                - cuda::use_cuda() == true
             ensures
                 - returns a pointer to the device memory block of size() contiguous float
                   values or nullptr if size()==0.
@@ -167,6 +175,7 @@ namespace dlib
         /*!
             requires
                 - DLIB_USE_CUDA is #defined
+                - cuda::use_cuda() == true
             ensures
                 - returns a pointer to the device memory block of size() contiguous float
                   values or nullptr if size()==0.
@@ -182,6 +191,7 @@ namespace dlib
         /*!
             requires
                 - DLIB_USE_CUDA is #defined
+                - cuda::use_cuda() == true
             ensures
                 - This function returns the same pointer as device(), except that it never
                   performs a host to device memory copy.  Instead, it immediately marks the
@@ -263,4 +273,3 @@ namespace dlib
 }
 
 #endif // DLIB_GPU_DaTA_ABSTRACT_H_
-