From eaefade6a8a31eeb32a51b1ea9c443a3b766e1b0 Mon Sep 17 00:00:00 2001
From: Simba Zhang <solderzzc@gmail.com>
Date: Sun, 8 Mar 2026 15:37:58 -0700
Subject: [PATCH 1/7] fix(env_config): graceful CPU fallback when ROCm PyTorch
 unavailable

- load_optimized() now catches device='cuda' failures on ROCm systems
  where PyTorch-ROCm is not installed, degrades to CPU gracefully
- deploy.sh removes CPU-only onnxruntime before installing onnxruntime-rocm
  to prevent the shadowing bug
---
 skills/detection/yolo-detection-2026/deploy.sh     |  8 ++++++++
 .../yolo-detection-2026/scripts/env_config.py      | 14 ++++++++++++--
 skills/lib/env_config.py                           | 14 ++++++++++++--
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/skills/detection/yolo-detection-2026/deploy.sh b/skills/detection/yolo-detection-2026/deploy.sh
index a1e4771..b9a277d 100755
--- a/skills/detection/yolo-detection-2026/deploy.sh
+++ b/skills/detection/yolo-detection-2026/deploy.sh
@@ -160,6 +160,14 @@ fi
 log "Installing dependencies from $REQ_FILE ..."
 emit "{\"event\": \"progress\", \"stage\": \"install\", \"message\": \"Installing $BACKEND dependencies...\"}"
 
+# ROCm: remove CPU-only onnxruntime if present (it shadows onnxruntime-rocm)
+if [ "$BACKEND" = "rocm" ]; then
+    if "$PIP" show onnxruntime &>/dev/null 2>&1; then
+        log "Removing CPU-only onnxruntime to avoid shadowing onnxruntime-rocm..."
+        "$PIP" uninstall -y onnxruntime -q 2>&1 || true
+    fi
+fi
+
 "$PIP" install -r "$REQ_FILE" -q 2>&1 | tail -5 >&2
 
 # ─── Step 5: Pre-convert model to optimized format ───────────────────────────
diff --git a/skills/detection/yolo-detection-2026/scripts/env_config.py b/skills/detection/yolo-detection-2026/scripts/env_config.py
index 49935b1..7f4baa6 100644
--- a/skills/detection/yolo-detection-2026/scripts/env_config.py
+++ b/skills/detection/yolo-detection-2026/scripts/env_config.py
@@ -467,13 +467,23 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
 
             # Fallback: use the PT model we already loaded
             _log("Falling back to PyTorch model")
-            pt_model.to(self.device)
+            try:
+                pt_model.to(self.device)
+            except Exception as e:
+                _log(f"Device {self.device} unavailable ({e}), falling back to CPU")
+                self.device = "cpu"
+                pt_model.to("cpu")
             self.load_ms = (time.perf_counter() - t0) * 1000
             return pt_model, "pytorch"
 
         # No optimization requested or framework missing
         model = YOLO(f"{model_name}.pt")
-        model.to(self.device)
+        try:
+            model.to(self.device)
+        except Exception as e:
+            _log(f"Device {self.device} unavailable ({e}), falling back to CPU")
+            self.device = "cpu"
+            model.to("cpu")
         self.load_ms = (time.perf_counter() - t0) * 1000
         return model, "pytorch"
 
diff --git a/skills/lib/env_config.py b/skills/lib/env_config.py
index 49935b1..7f4baa6 100644
--- a/skills/lib/env_config.py
+++ b/skills/lib/env_config.py
@@ -467,13 +467,23 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
 
             # Fallback: use the PT model we already loaded
             _log("Falling back to PyTorch model")
-            pt_model.to(self.device)
+            try:
+                pt_model.to(self.device)
+            except Exception as e:
+                _log(f"Device {self.device} unavailable ({e}), falling back to CPU")
+                self.device = "cpu"
+                pt_model.to("cpu")
             self.load_ms = (time.perf_counter() - t0) * 1000
             return pt_model, "pytorch"
 
         # No optimization requested or framework missing
         model = YOLO(f"{model_name}.pt")
-        model.to(self.device)
+        try:
+            model.to(self.device)
+        except Exception as e:
+            _log(f"Device {self.device} unavailable ({e}), falling back to CPU")
+            self.device = "cpu"
+            model.to("cpu")
         self.load_ms = (time.perf_counter() - t0) * 1000
         return model, "pytorch"
 

From fd87c7f50fb4330e223d646a6946810067e11155 Mon Sep 17 00:00:00 2001
From: Simba Zhang <solderzzc@gmail.com>
Date: Sun, 8 Mar 2026 19:46:21 -0700
Subject: [PATCH 2/7] fix(env_config): proactive torch.cuda guard for ROCm
 PyTorch fallback

- _try_rocm() checks torch.cuda.is_available() before setting device='cuda'
  If PyTorch-ROCm is not installed, device stays 'cpu' from the start
- load_optimized() fallback pre-checks torch.cuda instead of catching
  NVIDIA driver exceptions reactively (cleaner logs, no crash)
- Added test: no-PyTorch-ROCm falls back to cpu device (15 tests total)
---
 .../yolo-detection-2026/scripts/env_config.py | 45 +++++++++++++------
 skills/lib/env_config.py                      | 45 +++++++++++++------
 skills/lib/test_env_config_rocm.py            | 24 +++++++++-
 3 files changed, 86 insertions(+), 28 deletions(-)

diff --git a/skills/detection/yolo-detection-2026/scripts/env_config.py b/skills/detection/yolo-detection-2026/scripts/env_config.py
index 7f4baa6..f7c7ddc 100644
--- a/skills/detection/yolo-detection-2026/scripts/env_config.py
+++ b/skills/detection/yolo-detection-2026/scripts/env_config.py
@@ -165,7 +165,16 @@ def _try_rocm(self) -> bool:
             return False
 
         self.backend = "rocm"
-        self.device = "cuda"  # ROCm exposes as CUDA in PyTorch
+        # ROCm exposes as CUDA in PyTorch — but only if PyTorch-ROCm is installed
+        try:
+            import torch
+            if torch.cuda.is_available():
+                self.device = "cuda"
+            else:
+                self.device = "cpu"
+                _log("PyTorch CUDA/ROCm not available, using CPU for PyTorch fallback")
+        except ImportError:
+            self.device = "cpu"
 
         # Strategy 1: amd-smi static --json (ROCm 6.3+/7.x, richest output)
         if has_amd_smi:
@@ -467,23 +476,33 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
 
             # Fallback: use the PT model we already loaded
             _log("Falling back to PyTorch model")
-            try:
-                pt_model.to(self.device)
-            except Exception as e:
-                _log(f"Device {self.device} unavailable ({e}), falling back to CPU")
-                self.device = "cpu"
-                pt_model.to("cpu")
+            fallback_device = self.device
+            if fallback_device == "cuda":
+                try:
+                    import torch
+                    if not torch.cuda.is_available():
+                        fallback_device = "cpu"
+                        _log("torch.cuda not available, falling back to CPU")
+                except ImportError:
+                    fallback_device = "cpu"
+            pt_model.to(fallback_device)
+            self.device = fallback_device
             self.load_ms = (time.perf_counter() - t0) * 1000
             return pt_model, "pytorch"
 
         # No optimization requested or framework missing
         model = YOLO(f"{model_name}.pt")
-        try:
-            model.to(self.device)
-        except Exception as e:
-            _log(f"Device {self.device} unavailable ({e}), falling back to CPU")
-            self.device = "cpu"
-            model.to("cpu")
+        fallback_device = self.device
+        if fallback_device == "cuda":
+            try:
+                import torch
+                if not torch.cuda.is_available():
+                    fallback_device = "cpu"
+                    _log("torch.cuda not available, falling back to CPU")
+            except ImportError:
+                fallback_device = "cpu"
+        model.to(fallback_device)
+        self.device = fallback_device
         self.load_ms = (time.perf_counter() - t0) * 1000
         return model, "pytorch"
 
diff --git a/skills/lib/env_config.py b/skills/lib/env_config.py
index 7f4baa6..f7c7ddc 100644
--- a/skills/lib/env_config.py
+++ b/skills/lib/env_config.py
@@ -165,7 +165,16 @@ def _try_rocm(self) -> bool:
             return False
 
         self.backend = "rocm"
-        self.device = "cuda"  # ROCm exposes as CUDA in PyTorch
+        # ROCm exposes as CUDA in PyTorch — but only if PyTorch-ROCm is installed
+        try:
+            import torch
+            if torch.cuda.is_available():
+                self.device = "cuda"
+            else:
+                self.device = "cpu"
+                _log("PyTorch CUDA/ROCm not available, using CPU for PyTorch fallback")
+        except ImportError:
+            self.device = "cpu"
 
         # Strategy 1: amd-smi static --json (ROCm 6.3+/7.x, richest output)
         if has_amd_smi:
@@ -467,23 +476,33 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
 
             # Fallback: use the PT model we already loaded
             _log("Falling back to PyTorch model")
-            try:
-                pt_model.to(self.device)
-            except Exception as e:
-                _log(f"Device {self.device} unavailable ({e}), falling back to CPU")
-                self.device = "cpu"
-                pt_model.to("cpu")
+            fallback_device = self.device
+            if fallback_device == "cuda":
+                try:
+                    import torch
+                    if not torch.cuda.is_available():
+                        fallback_device = "cpu"
+                        _log("torch.cuda not available, falling back to CPU")
+                except ImportError:
+                    fallback_device = "cpu"
+            pt_model.to(fallback_device)
+            self.device = fallback_device
             self.load_ms = (time.perf_counter() - t0) * 1000
             return pt_model, "pytorch"
 
         # No optimization requested or framework missing
         model = YOLO(f"{model_name}.pt")
-        try:
-            model.to(self.device)
-        except Exception as e:
-            _log(f"Device {self.device} unavailable ({e}), falling back to CPU")
-            self.device = "cpu"
-            model.to("cpu")
+        fallback_device = self.device
+        if fallback_device == "cuda":
+            try:
+                import torch
+                if not torch.cuda.is_available():
+                    fallback_device = "cpu"
+                    _log("torch.cuda not available, falling back to CPU")
+            except ImportError:
+                fallback_device = "cpu"
+        model.to(fallback_device)
+        self.device = fallback_device
         self.load_ms = (time.perf_counter() - t0) * 1000
         return model, "pytorch"
 
diff --git a/skills/lib/test_env_config_rocm.py b/skills/lib/test_env_config_rocm.py
index 76021c3..b17d92a 100644
--- a/skills/lib/test_env_config_rocm.py
+++ b/skills/lib/test_env_config_rocm.py
@@ -93,8 +93,11 @@ def test_dual_gpu_picks_discrete(self, mock_run, _mock_dir):
         """With 2 GPUs, picks the R9700 (32 GB) over iGPU (2 GB)."""
         mock_run.return_value = _make_run_result(AMD_SMI_DUAL_GPU)
 
-        env = HardwareEnv()
-        result = env._try_rocm()
+        mock_torch = mock.MagicMock()
+        mock_torch.cuda.is_available.return_value = True
+        with mock.patch.dict("sys.modules", {"torch": mock_torch}):
+            env = HardwareEnv()
+            result = env._try_rocm()
 
         assert result is True
         assert env.backend == "rocm"
@@ -170,6 +173,23 @@ def test_amd_smi_failure_returns_true_with_defaults(self, mock_run, _mock_dir):
         assert env.backend == "rocm"
         assert env.gpu_name == ""  # No name parsed, but backend detected
 
+    @mock.patch("env_config.shutil.which", _mock_which({"amd-smi"}))
+    @mock.patch("env_config.Path.is_dir", return_value=False)
+    @mock.patch("env_config.subprocess.run")
+    def test_no_pytorch_rocm_falls_back_to_cpu_device(self, mock_run, _mock_dir):
+        """When torch.cuda.is_available() is False, device stays 'cpu'."""
+        mock_run.return_value = _make_run_result(AMD_SMI_SINGLE_GPU)
+
+        mock_torch = mock.MagicMock()
+        mock_torch.cuda.is_available.return_value = False
+        with mock.patch.dict("sys.modules", {"torch": mock_torch}):
+            env = HardwareEnv()
+            env._try_rocm()
+
+        assert env.backend == "rocm"
+        assert env.device == "cpu"  # No PyTorch-ROCm → CPU fallback
+        assert env.gpu_name == "AMD Radeon RX 7900 XTX"  # GPU still detected
+
 
 class TestTryRocmFallback:
     """rocm-smi fallback (amd-smi not available)."""

From dacf7cb9c2eb52a9d04f0ea911a15d299a52a1e9 Mon Sep 17 00:00:00 2001
From: Simba Zhang <solderzzc@gmail.com>
Date: Sun, 8 Mar 2026 20:30:23 -0700
Subject: [PATCH 3/7] fix(deploy): prevent ultralytics from re-installing CPU
 onnxruntime

Root cause: ultralytics AutoUpdate detects onnx/onnxslim/onnxruntime
as missing during ONNX export and auto-installs CPU onnxruntime,
re-shadowing onnxruntime-rocm.

Three-layer defense:
- requirements_rocm.txt: pre-install onnx + onnxslim so ultralytics
  doesn't trigger AutoUpdate for ONNX export deps
- deploy.sh: set YOLO_AUTOINSTALL=0 during export step
- deploy.sh: post-export cleanup removes CPU onnxruntime if present
---
 skills/detection/yolo-detection-2026/deploy.sh        | 11 +++++++++++
 .../yolo-detection-2026/requirements_rocm.txt         |  2 ++
 2 files changed, 13 insertions(+)

diff --git a/skills/detection/yolo-detection-2026/deploy.sh b/skills/detection/yolo-detection-2026/deploy.sh
index b9a277d..5a6ae6d 100755
--- a/skills/detection/yolo-detection-2026/deploy.sh
+++ b/skills/detection/yolo-detection-2026/deploy.sh
@@ -176,6 +176,9 @@ if [ "$BACKEND" != "cpu" ] || [ -f "$SKILL_DIR/requirements_cpu.txt" ]; then
     log "Pre-converting model to optimized format for $BACKEND..."
     emit "{\"event\": \"progress\", \"stage\": \"optimize\", \"message\": \"Converting model for $BACKEND (~30-120s)...\"}"
 
+    # Disable ultralytics auto-install (it would re-install CPU onnxruntime)
+    export YOLO_AUTOINSTALL=0
+
     "$VENV_DIR/bin/python" -c "
 import sys
 sys.path.insert(0, '$ENV_CONFIG_DIR')
@@ -202,6 +205,14 @@ else:
     fi
 fi
 
+# ROCm: final cleanup — remove CPU onnxruntime if ultralytics re-installed it
+if [ "$BACKEND" = "rocm" ]; then
+    if "$PIP" show onnxruntime 2>/dev/null | grep -q "^Name: onnxruntime$"; then
+        log "Post-export cleanup: removing CPU onnxruntime (re-installed by ultralytics)..."
+        "$PIP" uninstall -y onnxruntime -q 2>&1 || true
+    fi
+fi
+
 # ─── Step 6: Verify installation ────────────────────────────────────────────
 
 log "Verifying installation..."
diff --git a/skills/detection/yolo-detection-2026/requirements_rocm.txt b/skills/detection/yolo-detection-2026/requirements_rocm.txt
index 0d0ca7f..dcb12d3 100644
--- a/skills/detection/yolo-detection-2026/requirements_rocm.txt
+++ b/skills/detection/yolo-detection-2026/requirements_rocm.txt
@@ -5,6 +5,8 @@ torch>=2.4.0
 torchvision>=0.19.0
 ultralytics>=8.3.0
 onnxruntime-rocm>=1.18
+onnx>=1.12.0,<2.0.0        # pre-install: prevents ultralytics from auto-installing CPU onnxruntime
+onnxslim>=0.1.71            # pre-install: same reason
 numpy>=1.24.0
 opencv-python-headless>=4.8.0
 Pillow>=10.0.0

From bbd5db61ec923df5049ab47e3560aef57cf345c3 Mon Sep 17 00:00:00 2001
From: Simba Zhang <solderzzc@gmail.com>
Date: Sun, 8 Mar 2026 20:37:43 -0700
Subject: [PATCH 4/7] =?UTF-8?q?fix(deploy):=20simplify=20ROCm=20install=20?=
 =?UTF-8?q?=E2=80=94=20correct=20packages=20from=20the=20start?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of installing wrong packages then cleaning up:
- Phase 1: PyTorch from ROCm --index-url (forces ROCm build, not CUDA)
- Phase 2: remaining packages incl. onnxruntime-rocm, onnx, onnxslim
- YOLO_AUTOINSTALL=0 prevents ultralytics from auto-installing CPU onnxruntime

Removed: pre-install onnxruntime cleanup, post-export onnxruntime cleanup
(no longer needed when packages are installed correctly)
---
 .../detection/yolo-detection-2026/deploy.sh   | 31 ++++++++-----------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/skills/detection/yolo-detection-2026/deploy.sh b/skills/detection/yolo-detection-2026/deploy.sh
index 5a6ae6d..a3bfd09 100755
--- a/skills/detection/yolo-detection-2026/deploy.sh
+++ b/skills/detection/yolo-detection-2026/deploy.sh
@@ -160,15 +160,21 @@ fi
 log "Installing dependencies from $REQ_FILE ..."
 emit "{\"event\": \"progress\", \"stage\": \"install\", \"message\": \"Installing $BACKEND dependencies...\"}"
 
-# ROCm: remove CPU-only onnxruntime if present (it shadows onnxruntime-rocm)
 if [ "$BACKEND" = "rocm" ]; then
-    if "$PIP" show onnxruntime &>/dev/null 2>&1; then
-        log "Removing CPU-only onnxruntime to avoid shadowing onnxruntime-rocm..."
-        "$PIP" uninstall -y onnxruntime -q 2>&1 || true
-    fi
-fi
+    # ROCm: two-phase install to get the correct packages
+    # Phase 1: PyTorch from ROCm index (--index-url forces ROCm build, not CUDA)
+    log "Installing PyTorch with ROCm support..."
+    "$PIP" install torch torchvision --index-url https://download.pytorch.org/whl/rocm6.2 -q 2>&1 | tail -3 >&2
+
+    # Phase 2: remaining packages (ultralytics, onnxruntime-rocm, etc.)
+    "$PIP" install ultralytics onnxruntime-rocm 'onnx>=1.12.0,<2.0.0' 'onnxslim>=0.1.71' \
+        'numpy>=1.24.0' 'opencv-python-headless>=4.8.0' 'Pillow>=10.0.0' -q 2>&1 | tail -3 >&2
 
-"$PIP" install -r "$REQ_FILE" -q 2>&1 | tail -5 >&2
+    # Prevent ultralytics from auto-installing CPU onnxruntime during export
+    export YOLO_AUTOINSTALL=0
+else
+    "$PIP" install -r "$REQ_FILE" -q 2>&1 | tail -5 >&2
+fi
 
 # ─── Step 5: Pre-convert model to optimized format ───────────────────────────
 
@@ -176,9 +182,6 @@ if [ "$BACKEND" != "cpu" ] || [ -f "$SKILL_DIR/requirements_cpu.txt" ]; then
     log "Pre-converting model to optimized format for $BACKEND..."
     emit "{\"event\": \"progress\", \"stage\": \"optimize\", \"message\": \"Converting model for $BACKEND (~30-120s)...\"}"
 
-    # Disable ultralytics auto-install (it would re-install CPU onnxruntime)
-    export YOLO_AUTOINSTALL=0
-
     "$VENV_DIR/bin/python" -c "
 import sys
 sys.path.insert(0, '$ENV_CONFIG_DIR')
@@ -205,14 +208,6 @@ else:
     fi
 fi
 
-# ROCm: final cleanup — remove CPU onnxruntime if ultralytics re-installed it
-if [ "$BACKEND" = "rocm" ]; then
-    if "$PIP" show onnxruntime 2>/dev/null | grep -q "^Name: onnxruntime$"; then
-        log "Post-export cleanup: removing CPU onnxruntime (re-installed by ultralytics)..."
-        "$PIP" uninstall -y onnxruntime -q 2>&1 || true
-    fi
-fi
-
 # ─── Step 6: Verify installation ────────────────────────────────────────────
 
 log "Verifying installation..."

From 58f3d54377698ee1176ee4c65e4356d38e7dabf1 Mon Sep 17 00:00:00 2001
From: Simba Zhang <solderzzc@gmail.com>
Date: Sun, 8 Mar 2026 20:40:02 -0700
Subject: [PATCH 5/7] fix(deploy): auto-detect installed ROCm version for
 PyTorch index

deploy.sh now reads ROCm version from /opt/rocm/.info/version,
amd-smi, or rocminfo and constructs the PyTorch index URL dynamically
(e.g. rocm7.2 instead of hardcoded rocm6.2). Falls back to 6.2 only
if version detection fails.
---
 skills/detection/yolo-detection-2026/deploy.sh  | 17 ++++++++++++++---
 .../yolo-detection-2026/requirements_rocm.txt   |  4 ++--
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/skills/detection/yolo-detection-2026/deploy.sh b/skills/detection/yolo-detection-2026/deploy.sh
index a3bfd09..22afb55 100755
--- a/skills/detection/yolo-detection-2026/deploy.sh
+++ b/skills/detection/yolo-detection-2026/deploy.sh
@@ -161,10 +161,21 @@ log "Installing dependencies from $REQ_FILE ..."
 emit "{\"event\": \"progress\", \"stage\": \"install\", \"message\": \"Installing $BACKEND dependencies...\"}"
 
 if [ "$BACKEND" = "rocm" ]; then
-    # ROCm: two-phase install to get the correct packages
+    # ROCm: detect installed version for correct PyTorch index URL
+    ROCM_VER=""
+    if [ -f /opt/rocm/.info/version ]; then
+        ROCM_VER=$(head -1 /opt/rocm/.info/version | grep -oE '[0-9]+\.[0-9]+')
+    elif command -v amd-smi &>/dev/null; then
+        ROCM_VER=$(amd-smi version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+' | head -1)
+    elif command -v rocminfo &>/dev/null; then
+        ROCM_VER=$(rocminfo 2>/dev/null | grep -i "HSA Runtime" | grep -oE '[0-9]+\.[0-9]+' | head -1)
+    fi
+    ROCM_VER="${ROCM_VER:-6.2}"  # fallback if detection fails
+    log "Detected ROCm version: $ROCM_VER"
+
     # Phase 1: PyTorch from ROCm index (--index-url forces ROCm build, not CUDA)
-    log "Installing PyTorch with ROCm support..."
-    "$PIP" install torch torchvision --index-url https://download.pytorch.org/whl/rocm6.2 -q 2>&1 | tail -3 >&2
+    log "Installing PyTorch with ROCm $ROCM_VER support..."
+    "$PIP" install torch torchvision --index-url "https://download.pytorch.org/whl/rocm${ROCM_VER}" -q 2>&1 | tail -3 >&2
 
     # Phase 2: remaining packages (ultralytics, onnxruntime-rocm, etc.)
     "$PIP" install ultralytics onnxruntime-rocm 'onnx>=1.12.0,<2.0.0' 'onnxslim>=0.1.71' \
diff --git a/skills/detection/yolo-detection-2026/requirements_rocm.txt b/skills/detection/yolo-detection-2026/requirements_rocm.txt
index dcb12d3..00a2909 100644
--- a/skills/detection/yolo-detection-2026/requirements_rocm.txt
+++ b/skills/detection/yolo-detection-2026/requirements_rocm.txt
@@ -1,6 +1,6 @@
 # YOLO 2026 — ROCm (AMD GPU) requirements
-# Installs PyTorch with ROCm 6.2 support
---extra-index-url https://download.pytorch.org/whl/rocm6.2
+# NOTE: deploy.sh auto-detects the installed ROCm version and installs
+# PyTorch from the matching index URL. This file is a reference manifest.
 torch>=2.4.0
 torchvision>=0.19.0
 ultralytics>=8.3.0

From 2d32d52277074a3dfd9b9e7a1d28e3d08efde54c Mon Sep 17 00:00:00 2001
From: Simba Zhang <solderzzc@gmail.com>
Date: Sun, 8 Mar 2026 21:02:26 -0700
Subject: [PATCH 6/7] fix(deploy): fallback through ROCm versions for PyTorch
 wheels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PyTorch only publishes wheels for specific ROCm versions (e.g. 6.2,
7.0, 7.1) — not every point release. For ROCm 7.2, deploy now tries:
7.2 → 7.1 → 7.0 → 6.4 → 6.3 → 6.2 → 6.1 → 6.0
Stops at first successful install. Falls back to PyPI CPU torch if
no ROCm wheels found at all.
---
 .../detection/yolo-detection-2026/deploy.sh   | 33 +++++++++++++++++--
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/skills/detection/yolo-detection-2026/deploy.sh b/skills/detection/yolo-detection-2026/deploy.sh
index 22afb55..4cd7840 100755
--- a/skills/detection/yolo-detection-2026/deploy.sh
+++ b/skills/detection/yolo-detection-2026/deploy.sh
@@ -173,9 +173,36 @@ if [ "$BACKEND" = "rocm" ]; then
     ROCM_VER="${ROCM_VER:-6.2}"  # fallback if detection fails
     log "Detected ROCm version: $ROCM_VER"
 
-    # Phase 1: PyTorch from ROCm index (--index-url forces ROCm build, not CUDA)
-    log "Installing PyTorch with ROCm $ROCM_VER support..."
-    "$PIP" install torch torchvision --index-url "https://download.pytorch.org/whl/rocm${ROCM_VER}" -q 2>&1 | tail -3 >&2
+    # Build list of ROCm versions to try (detected → step down → previous major)
+    ROCM_MAJOR=$(echo "$ROCM_VER" | cut -d. -f1)
+    ROCM_MINOR=$(echo "$ROCM_VER" | cut -d. -f2)
+    ROCM_CANDIDATES="$ROCM_VER"
+    m=$((ROCM_MINOR - 1))
+    while [ "$m" -ge 0 ]; do
+        ROCM_CANDIDATES="$ROCM_CANDIDATES ${ROCM_MAJOR}.${m}"
+        m=$((m - 1))
+    done
+    # Also try previous major version (e.g., 6.4, 6.2 if on 7.x)
+    prev_major=$((ROCM_MAJOR - 1))
+    for pm in 4 3 2 1 0; do
+        ROCM_CANDIDATES="$ROCM_CANDIDATES ${prev_major}.${pm}"
+    done
+
+    # Phase 1: Try each candidate until PyTorch installs successfully
+    TORCH_INSTALLED=false
+    for ver in $ROCM_CANDIDATES; do
+        log "Trying PyTorch for ROCm $ver ..."
+        if "$PIP" install torch torchvision --index-url "https://download.pytorch.org/whl/rocm${ver}" -q 2>&1; then
+            log "Installed PyTorch with ROCm $ver support"
+            TORCH_INSTALLED=true
+            break
+        fi
+    done
+
+    if [ "$TORCH_INSTALLED" = false ]; then
+        log "WARNING: No PyTorch ROCm wheels found, installing CPU PyTorch from PyPI"
+        "$PIP" install torch torchvision -q 2>&1 | tail -3 >&2
+    fi
 
     # Phase 2: remaining packages (ultralytics, onnxruntime-rocm, etc.)
     "$PIP" install ultralytics onnxruntime-rocm 'onnx>=1.12.0,<2.0.0' 'onnxslim>=0.1.71' \

From 28aede13af3226d88b27aeacb93bb8127a5f8ac5 Mon Sep 17 00:00:00 2001
From: Simba Zhang <solderzzc@gmail.com>
Date: Sun, 8 Mar 2026 21:22:06 -0700
Subject: [PATCH 7/7] fix(rocm): use PyTorch+HIP for inference instead of ONNX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ultralytics' ONNX loader only supports CUDAExecutionProvider (NVIDIA).
On ROCm, it falls back to CPU even though ROCMExecutionProvider is
available. PyTorch + HIP runs natively on AMD GPUs via device='cuda'.

- Change ROCm BackendSpec: onnx → pytorch (skip ONNX export entirely)
- Set YOLO_AUTOINSTALL=0 in detect.py to prevent ultralytics from
  auto-installing onnxruntime-gpu (NVIDIA) at runtime
---
 skills/detection/yolo-detection-2026/scripts/detect.py     | 4 ++++
 skills/detection/yolo-detection-2026/scripts/env_config.py | 6 +++---
 skills/lib/env_config.py                                   | 6 +++---
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/skills/detection/yolo-detection-2026/scripts/detect.py b/skills/detection/yolo-detection-2026/scripts/detect.py
index 595d9dc..d149374 100644
--- a/skills/detection/yolo-detection-2026/scripts/detect.py
+++ b/skills/detection/yolo-detection-2026/scripts/detect.py
@@ -15,12 +15,16 @@
 """
 
 import sys
+import os
 import json
 import argparse
 import signal
 import time
 from pathlib import Path
 
+# Prevent ultralytics from auto-installing packages (e.g. onnxruntime-gpu on ROCm)
+os.environ.setdefault("YOLO_AUTOINSTALL", "0")
+
 # Import env_config — try multiple locations:
 # 1. Same directory as detect.py (bundled copy)
 # 2. DeepCamera repo: skills/lib/
diff --git a/skills/detection/yolo-detection-2026/scripts/env_config.py b/skills/detection/yolo-detection-2026/scripts/env_config.py
index f7c7ddc..ff42e6f 100644
--- a/skills/detection/yolo-detection-2026/scripts/env_config.py
+++ b/skills/detection/yolo-detection-2026/scripts/env_config.py
@@ -51,9 +51,9 @@ class BackendSpec:
     ),
     "rocm": BackendSpec(
         name="rocm",
-        export_format="onnx",
-        model_suffix=".onnx",
-        half=False,  # ONNX Runtime ROCm handles precision internally
+        export_format="pytorch",     # PyTorch + HIP — ultralytics ONNX doesn't support ROCMExecutionProvider
+        model_suffix=".pt",
+        half=False,
     ),
     "mps": BackendSpec(
         name="mps",
diff --git a/skills/lib/env_config.py b/skills/lib/env_config.py
index f7c7ddc..ff42e6f 100644
--- a/skills/lib/env_config.py
+++ b/skills/lib/env_config.py
@@ -51,9 +51,9 @@ class BackendSpec:
     ),
     "rocm": BackendSpec(
         name="rocm",
-        export_format="onnx",
-        model_suffix=".onnx",
-        half=False,  # ONNX Runtime ROCm handles precision internally
+        export_format="pytorch",     # PyTorch + HIP — ultralytics ONNX doesn't support ROCMExecutionProvider
+        model_suffix=".pt",
+        half=False,
     ),
     "mps": BackendSpec(
         name="mps",