diff --git a/skills/detection/yolo-detection-2026/deploy.sh b/skills/detection/yolo-detection-2026/deploy.sh index a1e4771..4cd7840 100755 --- a/skills/detection/yolo-detection-2026/deploy.sh +++ b/skills/detection/yolo-detection-2026/deploy.sh @@ -160,7 +160,59 @@ fi log "Installing dependencies from $REQ_FILE ..." emit "{\"event\": \"progress\", \"stage\": \"install\", \"message\": \"Installing $BACKEND dependencies...\"}" -"$PIP" install -r "$REQ_FILE" -q 2>&1 | tail -5 >&2 +if [ "$BACKEND" = "rocm" ]; then + # ROCm: detect installed version for correct PyTorch index URL + ROCM_VER="" + if [ -f /opt/rocm/.info/version ]; then + ROCM_VER=$(head -1 /opt/rocm/.info/version | grep -oE '[0-9]+\.[0-9]+') + elif command -v amd-smi &>/dev/null; then + ROCM_VER=$(amd-smi version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+' | head -1) + elif command -v rocminfo &>/dev/null; then + ROCM_VER=$(rocminfo 2>/dev/null | grep -i "HSA Runtime" | grep -oE '[0-9]+\.[0-9]+' | head -1) + fi + ROCM_VER="${ROCM_VER:-6.2}" # fallback if detection fails + log "Detected ROCm version: $ROCM_VER" + + # Build list of ROCm versions to try (detected → step down → previous major) + ROCM_MAJOR=$(echo "$ROCM_VER" | cut -d. -f1) + ROCM_MINOR=$(echo "$ROCM_VER" | cut -d. -f2) + ROCM_CANDIDATES="$ROCM_VER" + m=$((ROCM_MINOR - 1)) + while [ "$m" -ge 0 ]; do + ROCM_CANDIDATES="$ROCM_CANDIDATES ${ROCM_MAJOR}.${m}" + m=$((m - 1)) + done + # Also try previous major version (e.g., 6.4, 6.2 if on 7.x) + prev_major=$((ROCM_MAJOR - 1)) + for pm in 4 3 2 1 0; do + ROCM_CANDIDATES="$ROCM_CANDIDATES ${prev_major}.${pm}" + done + + # Phase 1: Try each candidate until PyTorch installs successfully + TORCH_INSTALLED=false + for ver in $ROCM_CANDIDATES; do + log "Trying PyTorch for ROCm $ver ..." + if "$PIP" install torch torchvision --index-url "https://download.pytorch.org/whl/rocm${ver}" -q 2>&1; then + log "Installed PyTorch with ROCm $ver support" + TORCH_INSTALLED=true + break + fi + done + + if [ "$TORCH_INSTALLED" = false ]; then + log "WARNING: No PyTorch ROCm wheels found, installing CPU PyTorch from PyPI" + "$PIP" install torch torchvision -q 2>&1 | tail -3 >&2 + fi + + # Phase 2: remaining packages (ultralytics, onnxruntime-rocm, etc.) + "$PIP" install ultralytics onnxruntime-rocm 'onnx>=1.12.0,<2.0.0' 'onnxslim>=0.1.71' \ + 'numpy>=1.24.0' 'opencv-python-headless>=4.8.0' 'Pillow>=10.0.0' -q 2>&1 | tail -3 >&2 + + # Prevent ultralytics from auto-installing CPU onnxruntime during export + export YOLO_AUTOINSTALL=0 +else + "$PIP" install -r "$REQ_FILE" -q 2>&1 | tail -5 >&2 +fi # ─── Step 5: Pre-convert model to optimized format ─────────────────────────── diff --git a/skills/detection/yolo-detection-2026/requirements_rocm.txt b/skills/detection/yolo-detection-2026/requirements_rocm.txt index 0d0ca7f..00a2909 100644 --- a/skills/detection/yolo-detection-2026/requirements_rocm.txt +++ b/skills/detection/yolo-detection-2026/requirements_rocm.txt @@ -1,10 +1,12 @@ # YOLO 2026 — ROCm (AMD GPU) requirements -# Installs PyTorch with ROCm 6.2 support ---extra-index-url https://download.pytorch.org/whl/rocm6.2 +# NOTE: deploy.sh auto-detects the installed ROCm version and installs +# PyTorch from the matching index URL. This file is a reference manifest. torch>=2.4.0 torchvision>=0.19.0 ultralytics>=8.3.0 onnxruntime-rocm>=1.18 +onnx>=1.12.0,<2.0.0 # pre-install: prevents ultralytics from auto-installing CPU onnxruntime +onnxslim>=0.1.71 # pre-install: same reason numpy>=1.24.0 opencv-python-headless>=4.8.0 Pillow>=10.0.0 diff --git a/skills/detection/yolo-detection-2026/scripts/detect.py b/skills/detection/yolo-detection-2026/scripts/detect.py index 595d9dc..d149374 100644 --- a/skills/detection/yolo-detection-2026/scripts/detect.py +++ b/skills/detection/yolo-detection-2026/scripts/detect.py @@ -15,12 +15,16 @@ """ import sys +import os import json import argparse import signal import time from pathlib import Path +# Prevent ultralytics from auto-installing packages (e.g. onnxruntime-gpu on ROCm) +os.environ.setdefault("YOLO_AUTOINSTALL", "0") + # Import env_config — try multiple locations: # 1. Same directory as detect.py (bundled copy) # 2. DeepCamera repo: skills/lib/ diff --git a/skills/detection/yolo-detection-2026/scripts/env_config.py b/skills/detection/yolo-detection-2026/scripts/env_config.py index 49935b1..ff42e6f 100644 --- a/skills/detection/yolo-detection-2026/scripts/env_config.py +++ b/skills/detection/yolo-detection-2026/scripts/env_config.py @@ -51,9 +51,9 @@ class BackendSpec: ), "rocm": BackendSpec( name="rocm", - export_format="onnx", - model_suffix=".onnx", - half=False, # ONNX Runtime ROCm handles precision internally + export_format="pytorch", # PyTorch + HIP — ultralytics ONNX doesn't support ROCMExecutionProvider + model_suffix=".pt", + half=False, ), "mps": BackendSpec( name="mps", @@ -165,7 +165,16 @@ def _try_rocm(self) -> bool: return False self.backend = "rocm" - self.device = "cuda" # ROCm exposes as CUDA in PyTorch + # ROCm exposes as CUDA in PyTorch — but only if PyTorch-ROCm is installed + try: + import torch + if torch.cuda.is_available(): + self.device = "cuda" + else: + self.device = "cpu" + _log("PyTorch CUDA/ROCm not available, using CPU for PyTorch fallback") + except ImportError: + self.device = "cpu" # Strategy 1: amd-smi static --json (ROCm 6.3+/7.x, richest output) if has_amd_smi: @@ -467,13 +476,33 @@ def load_optimized(self, model_name: str, use_optimized: bool = True): # Fallback: use the PT model we already loaded _log("Falling back to PyTorch model") - pt_model.to(self.device) + fallback_device = self.device + if fallback_device == "cuda": + try: + import torch + if not torch.cuda.is_available(): + fallback_device = "cpu" + _log("torch.cuda not available, falling back to CPU") + except ImportError: + fallback_device = "cpu" + pt_model.to(fallback_device) + self.device = fallback_device self.load_ms = (time.perf_counter() - t0) * 1000 return pt_model, "pytorch" # No optimization requested or framework missing model = YOLO(f"{model_name}.pt") - model.to(self.device) + fallback_device = self.device + if fallback_device == "cuda": + try: + import torch + if not torch.cuda.is_available(): + fallback_device = "cpu" + _log("torch.cuda not available, falling back to CPU") + except ImportError: + fallback_device = "cpu" + model.to(fallback_device) + self.device = fallback_device self.load_ms = (time.perf_counter() - t0) * 1000 return model, "pytorch" diff --git a/skills/lib/env_config.py b/skills/lib/env_config.py index 49935b1..ff42e6f 100644 --- a/skills/lib/env_config.py +++ b/skills/lib/env_config.py @@ -51,9 +51,9 @@ class BackendSpec: ), "rocm": BackendSpec( name="rocm", - export_format="onnx", - model_suffix=".onnx", - half=False, # ONNX Runtime ROCm handles precision internally + export_format="pytorch", # PyTorch + HIP — ultralytics ONNX doesn't support ROCMExecutionProvider + model_suffix=".pt", + half=False, ), "mps": BackendSpec( name="mps", @@ -165,7 +165,16 @@ def _try_rocm(self) -> bool: return False self.backend = "rocm" - self.device = "cuda" # ROCm exposes as CUDA in PyTorch + # ROCm exposes as CUDA in PyTorch — but only if PyTorch-ROCm is installed + try: + import torch + if torch.cuda.is_available(): + self.device = "cuda" + else: + self.device = "cpu" + _log("PyTorch CUDA/ROCm not available, using CPU for PyTorch fallback") + except ImportError: + self.device = "cpu" # Strategy 1: amd-smi static --json (ROCm 6.3+/7.x, richest output) if has_amd_smi: @@ -467,13 +476,33 @@ def load_optimized(self, model_name: str, use_optimized: bool = True): # Fallback: use the PT model we already loaded _log("Falling back to PyTorch model") - pt_model.to(self.device) + fallback_device = self.device + if fallback_device == "cuda": + try: + import torch + if not torch.cuda.is_available(): + fallback_device = "cpu" + _log("torch.cuda not available, falling back to CPU") + except ImportError: + fallback_device = "cpu" + pt_model.to(fallback_device) + self.device = fallback_device self.load_ms = (time.perf_counter() - t0) * 1000 return pt_model, "pytorch" # No optimization requested or framework missing model = YOLO(f"{model_name}.pt") - model.to(self.device) + fallback_device = self.device + if fallback_device == "cuda": + try: + import torch + if not torch.cuda.is_available(): + fallback_device = "cpu" + _log("torch.cuda not available, falling back to CPU") + except ImportError: + fallback_device = "cpu" + model.to(fallback_device) + self.device = fallback_device self.load_ms = (time.perf_counter() - t0) * 1000 return model, "pytorch" diff --git a/skills/lib/test_env_config_rocm.py b/skills/lib/test_env_config_rocm.py index 76021c3..b17d92a 100644 --- a/skills/lib/test_env_config_rocm.py +++ b/skills/lib/test_env_config_rocm.py @@ -93,8 +93,11 @@ def test_dual_gpu_picks_discrete(self, mock_run, _mock_dir): """With 2 GPUs, picks the R9700 (32 GB) over iGPU (2 GB).""" mock_run.return_value = _make_run_result(AMD_SMI_DUAL_GPU) - env = HardwareEnv() - result = env._try_rocm() + mock_torch = mock.MagicMock() + mock_torch.cuda.is_available.return_value = True + with mock.patch.dict("sys.modules", {"torch": mock_torch}): + env = HardwareEnv() + result = env._try_rocm() assert result is True assert env.backend == "rocm" @@ -170,6 +173,23 @@ def test_amd_smi_failure_returns_true_with_defaults(self, mock_run, _mock_dir): assert env.backend == "rocm" assert env.gpu_name == "" # No name parsed, but backend detected + @mock.patch("env_config.shutil.which", _mock_which({"amd-smi"})) + @mock.patch("env_config.Path.is_dir", return_value=False) + @mock.patch("env_config.subprocess.run") + def test_no_pytorch_rocm_falls_back_to_cpu_device(self, mock_run, _mock_dir): + """When torch.cuda.is_available() is False, device stays 'cpu'.""" + mock_run.return_value = _make_run_result(AMD_SMI_SINGLE_GPU) + + mock_torch = mock.MagicMock() + mock_torch.cuda.is_available.return_value = False + with mock.patch.dict("sys.modules", {"torch": mock_torch}): + env = HardwareEnv() + env._try_rocm() + + assert env.backend == "rocm" + assert env.device == "cpu" # No PyTorch-ROCm → CPU fallback + assert env.gpu_name == "AMD Radeon RX 7900 XTX" # GPU still detected + class TestTryRocmFallback: """rocm-smi fallback (amd-smi not available)."""