From 91a957d3e8f9d94a94705a66e7f922ea8f02cb75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20=C5=81opaciuk?= Date: Wed, 29 Apr 2026 15:39:06 +0200 Subject: [PATCH] CUDA: enable -Ofc only when supported by the compiler --- xobjects/context_cupy.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/xobjects/context_cupy.py b/xobjects/context_cupy.py index 715040b..37d3fdd 100644 --- a/xobjects/context_cupy.py +++ b/xobjects/context_cupy.py @@ -4,6 +4,7 @@ # ########################################### # import logging +import warnings from typing import Dict, List, Tuple import numpy as np @@ -32,6 +33,7 @@ import cupyx.scipy.special import cupyx.scipy.stats from cupyx.scipy import fftpack as cufftp + from cupy_backends.cuda.libs import nvrtc _enabled = True except ImportError: @@ -40,6 +42,7 @@ message=("cupy is not installed. " "ContextCupy is not available!") ) cufftp = cupy + nvrtc = None _enabled = False if _enabled: @@ -466,12 +469,14 @@ def build_kernels( *extra_compile_args, *include_flags, "-DXO_CONTEXT_CUDA", - # Skip heavy optimizations (e.g. involving cloning), - # which for us don't translate to a lot of runtime gains, - # but consume a lot of compile time and memory: - "--Ofast-compile=min", ) + if nvrtc and nvrtc.getVersion() >= (12, 9): + # If supported, skip prohibitively heavy optimisations (e.g. + # involving cloning). This it at the expense of <20% + # runtime performance, but gain of a lot of compile time and memory. + extra_compile_args += ("--Ofast-compile=min",) + module = cupy.RawModule( code=specialized_source, options=extra_compile_args )