From eaefade6a8a31eeb32a51b1ea9c443a3b766e1b0 Mon Sep 17 00:00:00 2001 From: Simba Zhang Date: Sun, 8 Mar 2026 15:37:58 -0700 Subject: [PATCH 1/7] fix(env_config): graceful CPU fallback when ROCm PyTorch unavailable - load_optimized() now catches device='cuda' failures on ROCm systems where PyTorch-ROCm is not installed, degrades to CPU gracefully - deploy.sh removes CPU-only onnxruntime before installing onnxruntime-rocm to prevent the shadowing bug --- skills/detection/yolo-detection-2026/deploy.sh | 8 ++++++++ .../yolo-detection-2026/scripts/env_config.py | 14 ++++++++++++-- skills/lib/env_config.py | 14 ++++++++++++-- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/skills/detection/yolo-detection-2026/deploy.sh b/skills/detection/yolo-detection-2026/deploy.sh index a1e4771..b9a277d 100755 --- a/skills/detection/yolo-detection-2026/deploy.sh +++ b/skills/detection/yolo-detection-2026/deploy.sh @@ -160,6 +160,14 @@ fi log "Installing dependencies from $REQ_FILE ..." emit "{\"event\": \"progress\", \"stage\": \"install\", \"message\": \"Installing $BACKEND dependencies...\"}" +# ROCm: remove CPU-only onnxruntime if present (it shadows onnxruntime-rocm) +if [ "$BACKEND" = "rocm" ]; then + if "$PIP" show onnxruntime &>/dev/null 2>&1; then + log "Removing CPU-only onnxruntime to avoid shadowing onnxruntime-rocm..." + "$PIP" uninstall -y onnxruntime -q 2>&1 || true + fi +fi + "$PIP" install -r "$REQ_FILE" -q 2>&1 | tail -5 >&2 # ─── Step 5: Pre-convert model to optimized format ─────────────────────────── diff --git a/skills/detection/yolo-detection-2026/scripts/env_config.py b/skills/detection/yolo-detection-2026/scripts/env_config.py index 49935b1..7f4baa6 100644 --- a/skills/detection/yolo-detection-2026/scripts/env_config.py +++ b/skills/detection/yolo-detection-2026/scripts/env_config.py @@ -467,13 +467,23 @@ def load_optimized(self, model_name: str, use_optimized: bool = True): # Fallback: use the PT model we already loaded _log("Falling back to PyTorch model") - pt_model.to(self.device) + try: + pt_model.to(self.device) + except Exception as e: + _log(f"Device {self.device} unavailable ({e}), falling back to CPU") + self.device = "cpu" + pt_model.to("cpu") self.load_ms = (time.perf_counter() - t0) * 1000 return pt_model, "pytorch" # No optimization requested or framework missing model = YOLO(f"{model_name}.pt") - model.to(self.device) + try: + model.to(self.device) + except Exception as e: + _log(f"Device {self.device} unavailable ({e}), falling back to CPU") + self.device = "cpu" + model.to("cpu") self.load_ms = (time.perf_counter() - t0) * 1000 return model, "pytorch" diff --git a/skills/lib/env_config.py b/skills/lib/env_config.py index 49935b1..7f4baa6 100644 --- a/skills/lib/env_config.py +++ b/skills/lib/env_config.py @@ -467,13 +467,23 @@ def load_optimized(self, model_name: str, use_optimized: bool = True): # Fallback: use the PT model we already loaded _log("Falling back to PyTorch model") - pt_model.to(self.device) + try: + pt_model.to(self.device) + except Exception as e: + _log(f"Device {self.device} unavailable ({e}), falling back to CPU") + self.device = "cpu" + pt_model.to("cpu") self.load_ms = (time.perf_counter() - t0) * 1000 return pt_model, "pytorch" # No optimization requested or framework missing model = YOLO(f"{model_name}.pt") - model.to(self.device) + try: + model.to(self.device) + except Exception as e: + _log(f"Device {self.device} unavailable ({e}), falling back to CPU") + self.device = "cpu" + model.to("cpu") self.load_ms = (time.perf_counter() - t0) * 1000 return model, "pytorch" From fd87c7f50fb4330e223d646a6946810067e11155 Mon Sep 17 00:00:00 2001 From: Simba Zhang Date: Sun, 8 Mar 2026 19:46:21 -0700 Subject: [PATCH 2/7] fix(env_config): proactive torch.cuda guard for ROCm PyTorch fallback - _try_rocm() checks torch.cuda.is_available() before setting device='cuda' If PyTorch-ROCm is not installed, device stays 'cpu' from the start - load_optimized() fallback pre-checks torch.cuda instead of catching NVIDIA driver exceptions reactively (cleaner logs, no crash) - Added test: no-PyTorch-ROCm falls back to cpu device (15 tests total) --- .../yolo-detection-2026/scripts/env_config.py | 45 +++++++++++++------ skills/lib/env_config.py | 45 +++++++++++++------ skills/lib/test_env_config_rocm.py | 24 +++++++++- 3 files changed, 86 insertions(+), 28 deletions(-) diff --git a/skills/detection/yolo-detection-2026/scripts/env_config.py b/skills/detection/yolo-detection-2026/scripts/env_config.py index 7f4baa6..f7c7ddc 100644 --- a/skills/detection/yolo-detection-2026/scripts/env_config.py +++ b/skills/detection/yolo-detection-2026/scripts/env_config.py @@ -165,7 +165,16 @@ def _try_rocm(self) -> bool: return False self.backend = "rocm" - self.device = "cuda" # ROCm exposes as CUDA in PyTorch + # ROCm exposes as CUDA in PyTorch — but only if PyTorch-ROCm is installed + try: + import torch + if torch.cuda.is_available(): + self.device = "cuda" + else: + self.device = "cpu" + _log("PyTorch CUDA/ROCm not available, using CPU for PyTorch fallback") + except ImportError: + self.device = "cpu" # Strategy 1: amd-smi static --json (ROCm 6.3+/7.x, richest output) if has_amd_smi: @@ -467,23 +476,33 @@ def load_optimized(self, model_name: str, use_optimized: bool = True): # Fallback: use the PT model we already loaded _log("Falling back to PyTorch model") - try: - pt_model.to(self.device) - except Exception as e: - _log(f"Device {self.device} unavailable ({e}), falling back to CPU") - self.device = "cpu" - pt_model.to("cpu") + fallback_device = self.device + if fallback_device == "cuda": + try: + import torch + if not torch.cuda.is_available(): + fallback_device = "cpu" + _log("torch.cuda not available, falling back to CPU") + except ImportError: + fallback_device = "cpu" + pt_model.to(fallback_device) + self.device = fallback_device self.load_ms = (time.perf_counter() - t0) * 1000 return pt_model, "pytorch" # No optimization requested or framework missing model = YOLO(f"{model_name}.pt") - try: - model.to(self.device) - except Exception as e: - _log(f"Device {self.device} unavailable ({e}), falling back to CPU") - self.device = "cpu" - model.to("cpu") + fallback_device = self.device + if fallback_device == "cuda": + try: + import torch + if not torch.cuda.is_available(): + fallback_device = "cpu" + _log("torch.cuda not available, falling back to CPU") + except ImportError: + fallback_device = "cpu" + model.to(fallback_device) + self.device = fallback_device self.load_ms = (time.perf_counter() - t0) * 1000 return model, "pytorch" diff --git a/skills/lib/env_config.py b/skills/lib/env_config.py index 7f4baa6..f7c7ddc 100644 --- a/skills/lib/env_config.py +++ b/skills/lib/env_config.py @@ -165,7 +165,16 @@ def _try_rocm(self) -> bool: return False self.backend = "rocm" - self.device = "cuda" # ROCm exposes as CUDA in PyTorch + # ROCm exposes as CUDA in PyTorch — but only if PyTorch-ROCm is installed + try: + import torch + if torch.cuda.is_available(): + self.device = "cuda" + else: + self.device = "cpu" + _log("PyTorch CUDA/ROCm not available, using CPU for PyTorch fallback") + except ImportError: + self.device = "cpu" # Strategy 1: amd-smi static --json (ROCm 6.3+/7.x, richest output) if has_amd_smi: @@ -467,23 +476,33 @@ def load_optimized(self, model_name: str, use_optimized: bool = True): # Fallback: use the PT model we already loaded _log("Falling back to PyTorch model") - try: - pt_model.to(self.device) - except Exception as e: - _log(f"Device {self.device} unavailable ({e}), falling back to CPU") - self.device = "cpu" - pt_model.to("cpu") + fallback_device = self.device + if fallback_device == "cuda": + try: + import torch + if not torch.cuda.is_available(): + fallback_device = "cpu" + _log("torch.cuda not available, falling back to CPU") + except ImportError: + fallback_device = "cpu" + pt_model.to(fallback_device) + self.device = fallback_device self.load_ms = (time.perf_counter() - t0) * 1000 return pt_model, "pytorch" # No optimization requested or framework missing model = YOLO(f"{model_name}.pt") - try: - model.to(self.device) - except Exception as e: - _log(f"Device {self.device} unavailable ({e}), falling back to CPU") - self.device = "cpu" - model.to("cpu") + fallback_device = self.device + if fallback_device == "cuda": + try: + import torch + if not torch.cuda.is_available(): + fallback_device = "cpu" + _log("torch.cuda not available, falling back to CPU") + except ImportError: + fallback_device = "cpu" + model.to(fallback_device) + self.device = fallback_device self.load_ms = (time.perf_counter() - t0) * 1000 return model, "pytorch" diff --git a/skills/lib/test_env_config_rocm.py b/skills/lib/test_env_config_rocm.py index 76021c3..b17d92a 100644 --- a/skills/lib/test_env_config_rocm.py +++ b/skills/lib/test_env_config_rocm.py @@ -93,8 +93,11 @@ def test_dual_gpu_picks_discrete(self, mock_run, _mock_dir): """With 2 GPUs, picks the R9700 (32 GB) over iGPU (2 GB).""" mock_run.return_value = _make_run_result(AMD_SMI_DUAL_GPU) - env = HardwareEnv() - result = env._try_rocm() + mock_torch = mock.MagicMock() + mock_torch.cuda.is_available.return_value = True + with mock.patch.dict("sys.modules", {"torch": mock_torch}): + env = HardwareEnv() + result = env._try_rocm() assert result is True assert env.backend == "rocm" @@ -170,6 +173,23 @@ def test_amd_smi_failure_returns_true_with_defaults(self, mock_run, _mock_dir): assert env.backend == "rocm" assert env.gpu_name == "" # No name parsed, but backend detected + @mock.patch("env_config.shutil.which", _mock_which({"amd-smi"})) + @mock.patch("env_config.Path.is_dir", return_value=False) + @mock.patch("env_config.subprocess.run") + def test_no_pytorch_rocm_falls_back_to_cpu_device(self, mock_run, _mock_dir): + """When torch.cuda.is_available() is False, device stays 'cpu'.""" + mock_run.return_value = _make_run_result(AMD_SMI_SINGLE_GPU) + + mock_torch = mock.MagicMock() + mock_torch.cuda.is_available.return_value = False + with mock.patch.dict("sys.modules", {"torch": mock_torch}): + env = HardwareEnv() + env._try_rocm() + + assert env.backend == "rocm" + assert env.device == "cpu" # No PyTorch-ROCm → CPU fallback + assert env.gpu_name == "AMD Radeon RX 7900 XTX" # GPU still detected + class TestTryRocmFallback: """rocm-smi fallback (amd-smi not available).""" From dacf7cb9c2eb52a9d04f0ea911a15d299a52a1e9 Mon Sep 17 00:00:00 2001 From: Simba Zhang Date: Sun, 8 Mar 2026 20:30:23 -0700 Subject: [PATCH 3/7] fix(deploy): prevent ultralytics from re-installing CPU onnxruntime Root cause: ultralytics AutoUpdate detects onnx/onnxslim/onnxruntime as missing during ONNX export and auto-installs CPU onnxruntime, re-shadowing onnxruntime-rocm. Three-layer defense: - requirements_rocm.txt: pre-install onnx + onnxslim so ultralytics doesn't trigger AutoUpdate for ONNX export deps - deploy.sh: set YOLO_AUTOINSTALL=0 during export step - deploy.sh: post-export cleanup removes CPU onnxruntime if present --- skills/detection/yolo-detection-2026/deploy.sh | 11 +++++++++++ .../yolo-detection-2026/requirements_rocm.txt | 2 ++ 2 files changed, 13 insertions(+) diff --git a/skills/detection/yolo-detection-2026/deploy.sh b/skills/detection/yolo-detection-2026/deploy.sh index b9a277d..5a6ae6d 100755 --- a/skills/detection/yolo-detection-2026/deploy.sh +++ b/skills/detection/yolo-detection-2026/deploy.sh @@ -176,6 +176,9 @@ if [ "$BACKEND" != "cpu" ] || [ -f "$SKILL_DIR/requirements_cpu.txt" ]; then log "Pre-converting model to optimized format for $BACKEND..." emit "{\"event\": \"progress\", \"stage\": \"optimize\", \"message\": \"Converting model for $BACKEND (~30-120s)...\"}" + # Disable ultralytics auto-install (it would re-install CPU onnxruntime) + export YOLO_AUTOINSTALL=0 + "$VENV_DIR/bin/python" -c " import sys sys.path.insert(0, '$ENV_CONFIG_DIR') @@ -202,6 +205,14 @@ else: fi fi +# ROCm: final cleanup — remove CPU onnxruntime if ultralytics re-installed it +if [ "$BACKEND" = "rocm" ]; then + if "$PIP" show onnxruntime 2>/dev/null | grep -q "^Name: onnxruntime$"; then + log "Post-export cleanup: removing CPU onnxruntime (re-installed by ultralytics)..." + "$PIP" uninstall -y onnxruntime -q 2>&1 || true + fi +fi + # ─── Step 6: Verify installation ──────────────────────────────────────────── log "Verifying installation..." diff --git a/skills/detection/yolo-detection-2026/requirements_rocm.txt b/skills/detection/yolo-detection-2026/requirements_rocm.txt index 0d0ca7f..dcb12d3 100644 --- a/skills/detection/yolo-detection-2026/requirements_rocm.txt +++ b/skills/detection/yolo-detection-2026/requirements_rocm.txt @@ -5,6 +5,8 @@ torch>=2.4.0 torchvision>=0.19.0 ultralytics>=8.3.0 onnxruntime-rocm>=1.18 +onnx>=1.12.0,<2.0.0 # pre-install: prevents ultralytics from auto-installing CPU onnxruntime +onnxslim>=0.1.71 # pre-install: same reason numpy>=1.24.0 opencv-python-headless>=4.8.0 Pillow>=10.0.0 From bbd5db61ec923df5049ab47e3560aef57cf345c3 Mon Sep 17 00:00:00 2001 From: Simba Zhang Date: Sun, 8 Mar 2026 20:37:43 -0700 Subject: [PATCH 4/7] =?UTF-8?q?fix(deploy):=20simplify=20ROCm=20install=20?= =?UTF-8?q?=E2=80=94=20correct=20packages=20from=20the=20start?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of installing wrong packages then cleaning up: - Phase 1: PyTorch from ROCm --index-url (forces ROCm build, not CUDA) - Phase 2: remaining packages incl. onnxruntime-rocm, onnx, onnxslim - YOLO_AUTOINSTALL=0 prevents ultralytics from auto-installing CPU onnxruntime Removed: pre-install onnxruntime cleanup, post-export onnxruntime cleanup (no longer needed when packages are installed correctly) --- .../detection/yolo-detection-2026/deploy.sh | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/skills/detection/yolo-detection-2026/deploy.sh b/skills/detection/yolo-detection-2026/deploy.sh index 5a6ae6d..a3bfd09 100755 --- a/skills/detection/yolo-detection-2026/deploy.sh +++ b/skills/detection/yolo-detection-2026/deploy.sh @@ -160,15 +160,21 @@ fi log "Installing dependencies from $REQ_FILE ..." emit "{\"event\": \"progress\", \"stage\": \"install\", \"message\": \"Installing $BACKEND dependencies...\"}" -# ROCm: remove CPU-only onnxruntime if present (it shadows onnxruntime-rocm) if [ "$BACKEND" = "rocm" ]; then - if "$PIP" show onnxruntime &>/dev/null 2>&1; then - log "Removing CPU-only onnxruntime to avoid shadowing onnxruntime-rocm..." - "$PIP" uninstall -y onnxruntime -q 2>&1 || true - fi -fi + # ROCm: two-phase install to get the correct packages + # Phase 1: PyTorch from ROCm index (--index-url forces ROCm build, not CUDA) + log "Installing PyTorch with ROCm support..." + "$PIP" install torch torchvision --index-url https://download.pytorch.org/whl/rocm6.2 -q 2>&1 | tail -3 >&2 + + # Phase 2: remaining packages (ultralytics, onnxruntime-rocm, etc.) + "$PIP" install ultralytics onnxruntime-rocm 'onnx>=1.12.0,<2.0.0' 'onnxslim>=0.1.71' \ + 'numpy>=1.24.0' 'opencv-python-headless>=4.8.0' 'Pillow>=10.0.0' -q 2>&1 | tail -3 >&2 -"$PIP" install -r "$REQ_FILE" -q 2>&1 | tail -5 >&2 + # Prevent ultralytics from auto-installing CPU onnxruntime during export + export YOLO_AUTOINSTALL=0 +else + "$PIP" install -r "$REQ_FILE" -q 2>&1 | tail -5 >&2 +fi # ─── Step 5: Pre-convert model to optimized format ─────────────────────────── @@ -176,9 +182,6 @@ if [ "$BACKEND" != "cpu" ] || [ -f "$SKILL_DIR/requirements_cpu.txt" ]; then log "Pre-converting model to optimized format for $BACKEND..." emit "{\"event\": \"progress\", \"stage\": \"optimize\", \"message\": \"Converting model for $BACKEND (~30-120s)...\"}" - # Disable ultralytics auto-install (it would re-install CPU onnxruntime) - export YOLO_AUTOINSTALL=0 - "$VENV_DIR/bin/python" -c " import sys sys.path.insert(0, '$ENV_CONFIG_DIR') @@ -205,14 +208,6 @@ else: fi fi -# ROCm: final cleanup — remove CPU onnxruntime if ultralytics re-installed it -if [ "$BACKEND" = "rocm" ]; then - if "$PIP" show onnxruntime 2>/dev/null | grep -q "^Name: onnxruntime$"; then - log "Post-export cleanup: removing CPU onnxruntime (re-installed by ultralytics)..." - "$PIP" uninstall -y onnxruntime -q 2>&1 || true - fi -fi - # ─── Step 6: Verify installation ──────────────────────────────────────────── log "Verifying installation..." From 58f3d54377698ee1176ee4c65e4356d38e7dabf1 Mon Sep 17 00:00:00 2001 From: Simba Zhang Date: Sun, 8 Mar 2026 20:40:02 -0700 Subject: [PATCH 5/7] fix(deploy): auto-detect installed ROCm version for PyTorch index deploy.sh now reads ROCm version from /opt/rocm/.info/version, amd-smi, or rocminfo and constructs the PyTorch index URL dynamically (e.g. rocm7.2 instead of hardcoded rocm6.2). Falls back to 6.2 only if version detection fails. --- skills/detection/yolo-detection-2026/deploy.sh | 17 ++++++++++++++--- .../yolo-detection-2026/requirements_rocm.txt | 4 ++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/skills/detection/yolo-detection-2026/deploy.sh b/skills/detection/yolo-detection-2026/deploy.sh index a3bfd09..22afb55 100755 --- a/skills/detection/yolo-detection-2026/deploy.sh +++ b/skills/detection/yolo-detection-2026/deploy.sh @@ -161,10 +161,21 @@ log "Installing dependencies from $REQ_FILE ..." emit "{\"event\": \"progress\", \"stage\": \"install\", \"message\": \"Installing $BACKEND dependencies...\"}" if [ "$BACKEND" = "rocm" ]; then - # ROCm: two-phase install to get the correct packages + # ROCm: detect installed version for correct PyTorch index URL + ROCM_VER="" + if [ -f /opt/rocm/.info/version ]; then + ROCM_VER=$(head -1 /opt/rocm/.info/version | grep -oE '[0-9]+\.[0-9]+') + elif command -v amd-smi &>/dev/null; then + ROCM_VER=$(amd-smi version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+' | head -1) + elif command -v rocminfo &>/dev/null; then + ROCM_VER=$(rocminfo 2>/dev/null | grep -i "HSA Runtime" | grep -oE '[0-9]+\.[0-9]+' | head -1) + fi + ROCM_VER="${ROCM_VER:-6.2}" # fallback if detection fails + log "Detected ROCm version: $ROCM_VER" + # Phase 1: PyTorch from ROCm index (--index-url forces ROCm build, not CUDA) - log "Installing PyTorch with ROCm support..." - "$PIP" install torch torchvision --index-url https://download.pytorch.org/whl/rocm6.2 -q 2>&1 | tail -3 >&2 + log "Installing PyTorch with ROCm $ROCM_VER support..." + "$PIP" install torch torchvision --index-url "https://download.pytorch.org/whl/rocm${ROCM_VER}" -q 2>&1 | tail -3 >&2 # Phase 2: remaining packages (ultralytics, onnxruntime-rocm, etc.) "$PIP" install ultralytics onnxruntime-rocm 'onnx>=1.12.0,<2.0.0' 'onnxslim>=0.1.71' \ diff --git a/skills/detection/yolo-detection-2026/requirements_rocm.txt b/skills/detection/yolo-detection-2026/requirements_rocm.txt index dcb12d3..00a2909 100644 --- a/skills/detection/yolo-detection-2026/requirements_rocm.txt +++ b/skills/detection/yolo-detection-2026/requirements_rocm.txt @@ -1,6 +1,6 @@ # YOLO 2026 — ROCm (AMD GPU) requirements -# Installs PyTorch with ROCm 6.2 support ---extra-index-url https://download.pytorch.org/whl/rocm6.2 +# NOTE: deploy.sh auto-detects the installed ROCm version and installs +# PyTorch from the matching index URL. This file is a reference manifest. torch>=2.4.0 torchvision>=0.19.0 ultralytics>=8.3.0 From 2d32d52277074a3dfd9b9e7a1d28e3d08efde54c Mon Sep 17 00:00:00 2001 From: Simba Zhang Date: Sun, 8 Mar 2026 21:02:26 -0700 Subject: [PATCH 6/7] fix(deploy): fallback through ROCm versions for PyTorch wheels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PyTorch only publishes wheels for specific ROCm versions (e.g. 6.2, 7.0, 7.1) — not every point release. For ROCm 7.2, deploy now tries: 7.2 → 7.1 → 7.0 → 6.4 → 6.3 → 6.2 → 6.1 → 6.0 Stops at first successful install. Falls back to PyPI CPU torch if no ROCm wheels found at all. --- .../detection/yolo-detection-2026/deploy.sh | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/skills/detection/yolo-detection-2026/deploy.sh b/skills/detection/yolo-detection-2026/deploy.sh index 22afb55..4cd7840 100755 --- a/skills/detection/yolo-detection-2026/deploy.sh +++ b/skills/detection/yolo-detection-2026/deploy.sh @@ -173,9 +173,36 @@ if [ "$BACKEND" = "rocm" ]; then ROCM_VER="${ROCM_VER:-6.2}" # fallback if detection fails log "Detected ROCm version: $ROCM_VER" - # Phase 1: PyTorch from ROCm index (--index-url forces ROCm build, not CUDA) - log "Installing PyTorch with ROCm $ROCM_VER support..." - "$PIP" install torch torchvision --index-url "https://download.pytorch.org/whl/rocm${ROCM_VER}" -q 2>&1 | tail -3 >&2 + # Build list of ROCm versions to try (detected → step down → previous major) + ROCM_MAJOR=$(echo "$ROCM_VER" | cut -d. -f1) + ROCM_MINOR=$(echo "$ROCM_VER" | cut -d. -f2) + ROCM_CANDIDATES="$ROCM_VER" + m=$((ROCM_MINOR - 1)) + while [ "$m" -ge 0 ]; do + ROCM_CANDIDATES="$ROCM_CANDIDATES ${ROCM_MAJOR}.${m}" + m=$((m - 1)) + done + # Also try previous major version (e.g., 6.4, 6.2 if on 7.x) + prev_major=$((ROCM_MAJOR - 1)) + for pm in 4 3 2 1 0; do + ROCM_CANDIDATES="$ROCM_CANDIDATES ${prev_major}.${pm}" + done + + # Phase 1: Try each candidate until PyTorch installs successfully + TORCH_INSTALLED=false + for ver in $ROCM_CANDIDATES; do + log "Trying PyTorch for ROCm $ver ..." + if "$PIP" install torch torchvision --index-url "https://download.pytorch.org/whl/rocm${ver}" -q 2>&1; then + log "Installed PyTorch with ROCm $ver support" + TORCH_INSTALLED=true + break + fi + done + + if [ "$TORCH_INSTALLED" = false ]; then + log "WARNING: No PyTorch ROCm wheels found, installing CPU PyTorch from PyPI" + "$PIP" install torch torchvision -q 2>&1 | tail -3 >&2 + fi # Phase 2: remaining packages (ultralytics, onnxruntime-rocm, etc.) "$PIP" install ultralytics onnxruntime-rocm 'onnx>=1.12.0,<2.0.0' 'onnxslim>=0.1.71' \ From 28aede13af3226d88b27aeacb93bb8127a5f8ac5 Mon Sep 17 00:00:00 2001 From: Simba Zhang Date: Sun, 8 Mar 2026 21:22:06 -0700 Subject: [PATCH 7/7] fix(rocm): use PyTorch+HIP for inference instead of ONNX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ultralytics' ONNX loader only supports CUDAExecutionProvider (NVIDIA). On ROCm, it falls back to CPU even though ROCMExecutionProvider is available. PyTorch + HIP runs natively on AMD GPUs via device='cuda'. - Change ROCm BackendSpec: onnx → pytorch (skip ONNX export entirely) - Set YOLO_AUTOINSTALL=0 in detect.py to prevent ultralytics from auto-installing onnxruntime-gpu (NVIDIA) at runtime --- skills/detection/yolo-detection-2026/scripts/detect.py | 4 ++++ skills/detection/yolo-detection-2026/scripts/env_config.py | 6 +++--- skills/lib/env_config.py | 6 +++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/skills/detection/yolo-detection-2026/scripts/detect.py b/skills/detection/yolo-detection-2026/scripts/detect.py index 595d9dc..d149374 100644 --- a/skills/detection/yolo-detection-2026/scripts/detect.py +++ b/skills/detection/yolo-detection-2026/scripts/detect.py @@ -15,12 +15,16 @@ """ import sys +import os import json import argparse import signal import time from pathlib import Path +# Prevent ultralytics from auto-installing packages (e.g. onnxruntime-gpu on ROCm) +os.environ.setdefault("YOLO_AUTOINSTALL", "0") + # Import env_config — try multiple locations: # 1. Same directory as detect.py (bundled copy) # 2. DeepCamera repo: skills/lib/ diff --git a/skills/detection/yolo-detection-2026/scripts/env_config.py b/skills/detection/yolo-detection-2026/scripts/env_config.py index f7c7ddc..ff42e6f 100644 --- a/skills/detection/yolo-detection-2026/scripts/env_config.py +++ b/skills/detection/yolo-detection-2026/scripts/env_config.py @@ -51,9 +51,9 @@ class BackendSpec: ), "rocm": BackendSpec( name="rocm", - export_format="onnx", - model_suffix=".onnx", - half=False, # ONNX Runtime ROCm handles precision internally + export_format="pytorch", # PyTorch + HIP — ultralytics ONNX doesn't support ROCMExecutionProvider + model_suffix=".pt", + half=False, ), "mps": BackendSpec( name="mps", diff --git a/skills/lib/env_config.py b/skills/lib/env_config.py index f7c7ddc..ff42e6f 100644 --- a/skills/lib/env_config.py +++ b/skills/lib/env_config.py @@ -51,9 +51,9 @@ class BackendSpec: ), "rocm": BackendSpec( name="rocm", - export_format="onnx", - model_suffix=".onnx", - half=False, # ONNX Runtime ROCm handles precision internally + export_format="pytorch", # PyTorch + HIP — ultralytics ONNX doesn't support ROCMExecutionProvider + model_suffix=".pt", + half=False, ), "mps": BackendSpec( name="mps",