From 64b610ef142f9c865c582631a7736b9b22364fb7 Mon Sep 17 00:00:00 2001 From: Cryptopoly <31970407+cryptopoly@users.noreply.github.com> Date: Sun, 17 May 2026 10:47:48 +0100 Subject: [PATCH 01/15] feat: accelerator capability flags + probes (FU-056 Phase 1) Foundation for in-app install UX. Lazy importability + version probes for nunchaku / sageattention / dflash-mlx / dflash-cuda / triattention / kvpress, plus a Windows-only wsl2 detector that seeds the upcoming vLLM-via-WSL bridge. Eleven new fields on BackendCapabilities surface through /api/health; the placeholder probe primes them on first paint so the UI never flashes Install for a package that is actually present. Probes resilient to the half-baked-install failure mode we hit on Windows (torch directory present but Python source missing): find_spec swallows ValueError, version reads swallow ImportError and missing __version__. DFlash MLX vs CUDA flags delegate to the existing dflash.is_mlx_available / dflash.is_vllm_available helpers so the upstream package-layout dance stays in one place. Tests: 25 in tests/test_accelerator_capabilities.py covering present / absent / broken-install / WSL-status branches. --- CLAUDE.md | 1 + backend_service/inference/accelerators.py | 201 ++++++++++++++++++ backend_service/inference/base.py | 33 +++ backend_service/inference/capabilities.py | 46 ++++ tests/test_accelerator_capabilities.py | 248 ++++++++++++++++++++++ 5 files changed, 529 insertions(+) create mode 100644 backend_service/inference/accelerators.py create mode 100644 tests/test_accelerator_capabilities.py diff --git a/CLAUDE.md b/CLAUDE.md index f72565d..660faff 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -173,6 +173,7 @@ no longer relevant. | FU-053 | Library status false-positive: distill / sibling variants marked installed when only base repo is on disk | **Shipped 2026-05-17.** Surfaced live: the Video Models tab showed `Wan 2.2 I2V A14B · Distill 4-step (BF16)` + `(FP8)` rows with a green "installed" tick + 117.5 GB sized, but `du -sh ~/.cache/huggingface/hub/models--Wan-AI--Wan2.2-I2V-A14B-Diffusers-distill-{bf16,fp8}` returned nothing — neither distill repo was actually present. Root cause: both catalog variants share `repo: "Wan-AI/Wan2.2-I2V-A14B-Diffusers"` (the BASE, non-distill repo) and route their distinguishing weights via the separate `distillTransformerRepo: "lightx2v/Wan2.2-Distill-Models"` field (FU-019 pattern). The validator at [_video_variant_validation_error](backend_service/helpers/video.py:226) only checked the *base* repo via `_video_download_validation_error` + `_video_full_precision_weights_validation_error` — both passed because the base repo IS on disk. The distill-specific files (`distillTransformerHighNoiseFile` + `distillTransformerLowNoiseFile`) were never checked, so any variant that pinned distill weights via the FU-019 swap mechanism was unconditionally marked installed once the base was downloaded. Fix: new `_distill_transformer_validation_error` helper that, when a variant declares `distillTransformerRepo`, requires (a) that repo's HF snapshot dir exists and (b) both the high-noise + low-noise filenames are present inside it. Wired into `_video_variant_validation_error` so distill variants now flip to "Not installed" when the distill weights are missing. New unit test in [tests/test_video.py](tests/test_video.py) pins both the false-positive regression (base-only → not installed) and the happy path (base + distill snapshot → installed). Live verification: after the fix landed on this M4 Max, the two distill rows correctly dropped out of the "installed" filter. | Trigger / Condition — closed inline. | | FU-054 | Same-repo variants: show actual on-disk size + shared-repo badge | **Shipped 2026-05-17.** Wan 2.2 TI2V 5B GGUF (Q4_K_M + Q6_K + Q8_0) renders as three rows in the Video Models tab, each labelled "31.9 GB". On disk those three share ONE `models--QuantStack--Wan2.2-TI2V-5B-GGUF/` dir totalling 12 GB (Q4_K_M=3.2 GB + Q6_K=3.9 GB + Q8_0=5.0 GB), not 95.7 GB as the per-row "31.9 GB" repetition implies. Two surgical changes: (1) Backend `_video_variant_for_payload` now stat-sizes the specific `ggufFile` (not the whole snapshot dir) for variants that pin a single GGUF, exposing `ggufFileBytes` alongside the existing `onDiskBytes`. (2) UI shows the live on-disk byte size next to the catalog estimate when present, and adds a "shares repo with N other variants" badge when ≥2 catalog variants resolve to the same on-disk repo dir. Avoided a full table restructure — the badge gives the user the "deleting this row does/doesn't affect siblings" signal without dragging in expandable parent rows. Frontend changes confined to [VideoModelsTab.tsx](src/features/video/VideoModelsTab.tsx); CSS additions land in [styles.css](src/styles.css) under `.shared-repo-badge`. | Trigger / Condition — closed inline. | | FU-055 | Storage explorer panel in Diagnostics tab — surface top disk consumers in-app | **Shipped 2026-05-17.** Complements Stuff Diver's blind spot on HF cache layout: blobs live at `~/.cache/huggingface/hub/models--*/blobs/` (each a single 5–30 GB safetensors / GGUF shard), but `snapshots//` are symlinks — third-party scanners that don't follow symlinks miss the real bytes. New endpoint `GET /api/diagnostics/storage-top?limit=20` walks every directory under `state.settings.modelDirectories` (HF cache + AI_Models + ~/Models + user dirs), sums per-repo via `du`-equivalent path walk with cycle protection (reuses `_path_size_bytes` from [discovery.py](backend_service/helpers/discovery.py:49)), and returns sorted `[{path, repoLabel, sizeBytes, lastModified, sourceKind}]`. Frontend renders the top-N table in a new "Disk usage" subsection of the Diagnostics tab with a "Reveal in Finder" + "Delete repo" action per row. Cycle protection prevents the `mlx-video-wan` converted-output dirs (which contain symlinks back to HF cache blobs in some configs) from double-counting. Trip-wire numbers from the M4 Max box on first-run: total `~/.cache/huggingface/hub` = 997 GB, top-3 = LTX-2 dev / LTX-2 distilled / LTX-2.3 distilled at ~87 / 87 / 81 GB respectively. | Trigger / Condition — closed inline. | +| FU-056 | In-app accelerator install UX (Nunchaku / SageAttention / DFlash CUDA / TriAttention / kvpress + vLLM-via-WSL bridge) | Active. Phase 1 shipped on `feature/accelerator-install-ux`; phases 2-9 pending. | The plan: bring every CUDA-side accelerator install in-band so users never need to drop to PowerShell to type `pip install ` — the install affordance lives next to the thing it accelerates (FLUX cards in Image Studio Discover, Wan cards in Video Studio Discover, the chat composer for spec-dec, a one-stop "Boost Pack" panel in Diagnostics for the completionist). The backend pipeline (`POST /api/setup/install-package` → background or sync install → capability re-probe → UI refresh) is already proven by FU-008 / FU-016 / FU-019 / FU-023 / FU-025 — what's missing is per-accelerator capability flags + the contextual badges/buttons on each feature surface. **Phase 1 (foundation, shipped 2026-05-17):** new probe module [backend_service/inference/accelerators.py](backend_service/inference/accelerators.py) with lazy importability + version helpers for nunchaku, sageattention, dflash-mlx, dflash-cuda, triattention, kvpress, plus a Windows-only `wsl2_available()` shell probe for Phase 8. 11 new fields on `BackendCapabilities` ([base.py](backend_service/inference/base.py)) + matching serialization in `to_dict`. Probes wired into both the cheap placeholder probe and the full `_probe_native_backends` ([capabilities.py](backend_service/inference/capabilities.py)) so the frontend gets accurate "Install" / "Installed" state on first paint. 25 unit tests in [tests/test_accelerator_capabilities.py](tests/test_accelerator_capabilities.py) pin the present / absent / broken-install matrix. **Phases 2-9 planned:** (2) reusable `` component, (3) Image Studio Discover/Models badges + post-generation suggestion toast, (4) Video Studio Discover/Models badges + LongLive bundle rename, (5) Chat composer hint when CUDA + DRAFT_MODEL_MAP hit, (6) Diagnostics "Boost Pack" panel (one-stop view), (7) per-variant `recommendedAccelerators` catalog metadata + i18n, (8) Windows vLLM-via-WSL2 bridge (WSL detector + isolated venv install + remote subprocess engine), (9) cache-strategy-matrix runner + pre-build gate integration. End-state UX: fresh user installs ChaosEngineAI → downloads FLUX → sees "🚀 Nunchaku +3× available [Install]" pill on the catalog card → one click → 90s later first generation runs at SVDQuant speed, no terminal required. | | FU-049 | Python 3.14 support gate | Re-evaluate quarterly. Trigger to bump `requires-python` floor: torch ≥2.6 publishes stable cp314 wheels for darwin-arm64 + win-amd64 + linux-x86_64 (CUDA + CPU) **AND** mlx-lm + mlx-vlm + mlx-video publish cp314 wheels **AND** Astral `python-build-standalone` ships a 3.14 portable build for the Tauri sidecar. | Today `pyproject.toml` declares `requires-python = ">=3.10"` and the test matrix runs 3.11/3.12 ([scripts/e2e_test_suite.py](scripts/e2e_test_suite.py), Windows test guide, CI). Stay on 3.11/3.12 for ship + test until cp314 wheel coverage closes. **Why 3.14 is on radar:** (a) we already renamed `compression/` → `cache_compression/` to avoid shadowing Python 3.14's new stdlib `compression` namespace pkg — pre-emptive fix landed in v0.8.0. (b) 3.14 ships PEP 779 free-threaded build as stable (still opt-in via `python3.14t` builds), interesting for the FastAPI parent process but irrelevant for subprocess-isolated MLX / sd-cli / longlive workers. (c) GIL-default 3.14 still gives modest perf wins from tail-call interpreter (~5% on CPython benchmarks) + new sub-interpreter API. **3.14 blockers as of 2026-05-17:** (1) **PyTorch** — torch 2.5 stable + 2.6 nightly do not yet publish cp314 wheels on the full darwin-arm64 + win-amd64 + linux-x86_64 × {CPU, CUDA 12.4, ROCm 6.2, MPS} matrix; most painful single dep since image + video runtimes pin torch. (2) **MLX stack** — `mlx`, `mlx-lm`, `mlx-vlm`, `mlx-video` currently ship cp310–cp313 wheels; Apple Silicon adoption typically lags CPython release by 1–3 months. (3) **CUDA-compiled deps** — `bitsandbytes`, `flash-attn`, `sageattention`, `nunchaku`, `triattention`, `vllm-swift`, `dflash-mlx` (git+url, builds from source — needs cp314 cython/setuptools chain too). (4) **Tauri sidecar** — desktop release builds embed Python via Astral's `python-build-standalone`; need their 3.14 portable build before bundling. (5) **3.14 stdlib breakage** — deprecation removals (e.g. `typing.io`, `typing.re`, `asyncio.coroutine` shim, `pkg_resources` consequences); needs an audit pass via `python -W error::DeprecationWarning -m pytest tests/` on a 3.14 venv. (6) **3.14 `compression` namespace pkg** — already mitigated, but a regression probe should land in `pre-build-check` once we run CI on 3.14 (assert `from cache_compression import registry` works on cp314). **Plan when gate opens:** (a) bump `requires-python` to `>=3.11` first as an intermediate step (drops 3.10, which has no cp314 wheels anyway and lets us drop a few back-compat code paths); (b) add cp314 to GitHub Actions CI matrix alongside cp311 + cp312; (c) add a `python -X importtime` regression probe to `scripts/perf-baseline.py` (3.14's tail-call interpreter should improve cold-start by ~3–5%, want to measure); (d) bump `requires-python` to `>=3.11` floor + `<3.15` ceiling once green; (e) the Tauri sidecar Python pin advances independently — driven by `python-build-standalone` releases not pyproject. | --- diff --git a/backend_service/inference/accelerators.py b/backend_service/inference/accelerators.py new file mode 100644 index 0000000..5b73f23 --- /dev/null +++ b/backend_service/inference/accelerators.py @@ -0,0 +1,201 @@ +"""Probe helpers for CUDA-side accelerator packages (FU-056 Phase 1). + +Lazy importability + version probes for the five accelerators the +Setup tab + per-feature install panels expose: + +- **nunchaku** — SVDQuant 4-bit transformers for FLUX / SD3.5 / Qwen-Image + (FU-023). Pulled in by ``ImageStudio`` when a DiT pipeline loads with + ``nunchakuRepo`` pinned. CUDA-only at runtime, but the import itself + succeeds on any platform so the capability flag tracks "package usable" + rather than "package will accelerate this machine". +- **sageattention** — fast attention kernels for DiT pipelines on CUDA + (FU-016). Stacks multiplicatively with FBCache / Nunchaku. No-op on + Apple Silicon and on UNet pipelines. +- **dflash CUDA** — PyTorch/CUDA half of the speculative decoding family + (FU-031, FU-048). ``dflash.is_vllm_available()`` already exists in the + local ``dflash/__init__.py`` wrapper and inspects the ``dflash.model`` + submodule, so we delegate to it rather than re-detecting here. +- **triattention** — vLLM compressor used by FU-003 LongLive on CUDA + and FU-002 on Apple Silicon. The pip name + import name agree + (``triattention``). +- **kvpress** — NVIDIA KV cache compression toolkit (FU-027). Already + registered in ``_INSTALLABLE_PIP_PACKAGES`` but had no capability flag + before this phase; integration code arrives in a later phase, but the + install button needs the flag to gate "Installed ✓" state. + +Plus a Windows-specific ``wsl2_available()`` helper used by the future +Phase 8 vLLM-via-WSL bridge. On macOS/Linux it's always ``False`` — the +flag only carries weight on Windows where ``vllm`` has no native wheels. + +Probes are deliberately lazy: every ``import`` lives inside a function +body so ``python -X importtime backend_service.app`` stays under the 2 s +cold-start budget (per CLAUDE.md performance guidelines). The companion +``_version`` helpers return ``None`` if the package isn't installed — +callers don't need a separate availability check before reading them. +""" + +from __future__ import annotations + +import importlib +import importlib.util +import subprocess +import sys + + +def _spec_exists(module_name: str) -> bool: + """``importlib.util.find_spec`` wrapper that swallows ModuleNotFoundError. + + ``find_spec`` can raise on partially-broken installs (e.g. a torch + directory that exists on disk but has no ``__init__.py``) — see the + Windows torch install bug investigated 2026-05-17. We treat any raise + as "not available" so the capability resolver never crashes on a half- + installed package. + """ + try: + return importlib.util.find_spec(module_name) is not None + except (ImportError, ValueError): + return False + + +def _safe_version(module_name: str) -> str | None: + """Read ``__version__`` without crashing on broken installs. + + Mirrors the half-broken-install resilience of ``_spec_exists``: a + package that registers an import spec but has no Python source (the + Windows ``torch/`` failure mode) raises on attribute access, not on + ``find_spec``. Catching here keeps the capability payload honest. + """ + if not _spec_exists(module_name): + return None + try: + module = importlib.import_module(module_name) + except Exception: + return None + version = getattr(module, "__version__", None) + return str(version) if version is not None else None + + +# --------------------------------------------------------------------------- +# Nunchaku — FU-023 +# --------------------------------------------------------------------------- + +def nunchaku_available() -> bool: + return _spec_exists("nunchaku") + + +def nunchaku_version() -> str | None: + return _safe_version("nunchaku") + + +# --------------------------------------------------------------------------- +# SageAttention — FU-016 +# --------------------------------------------------------------------------- + +def sageattention_available() -> bool: + return _spec_exists("sageattention") + + +def sageattention_version() -> str | None: + return _safe_version("sageattention") + + +# --------------------------------------------------------------------------- +# DFlash — FU-031 (MLX side) + FU-048 (CUDA side) +# +# Two flags here because the two backends live in two separate pip +# packages with two import names (``dflash_mlx`` for Apple Silicon, +# ``dflash.model`` for CUDA). The shared ``dflash`` integration module +# already exposes detection helpers; reuse them so the wrapping stays +# in one place if the upstream package layout changes. +# --------------------------------------------------------------------------- + +def dflash_mlx_available() -> bool: + """``dflash_mlx`` (Apple Silicon) — the MLX-native draft runner.""" + try: + from dflash import is_mlx_available + except ImportError: + return False + try: + return bool(is_mlx_available()) + except Exception: + return False + + +def dflash_cuda_available() -> bool: + """``dflash`` PyPI package (CUDA) — the PyTorch/CUDA draft runner. + + Uses the integration module's existing helper, which checks for the + ``dflash.model`` submodule specifically (the local ``dflash/`` wrapper + in this repo shadows the bare ``dflash`` import, so the submodule + check is what disambiguates "real upstream package" from "our shim"). + """ + try: + from dflash import is_vllm_available + except ImportError: + return False + try: + return bool(is_vllm_available()) + except Exception: + return False + + +def dflash_mlx_version() -> str | None: + return _safe_version("dflash_mlx") + + +def dflash_cuda_version() -> str | None: + """The CUDA wheel exposes its version via ``dflash.model.__version__`` + when installed, but our local wrapper ``dflash/__init__.py`` shadows + the bare name. Probe the submodule path the upstream package owns. + """ + if not dflash_cuda_available(): + return None + return _safe_version("dflash.model") + + +# --------------------------------------------------------------------------- +# TriAttention — FU-002 (MLX) + FU-003 LongLive (CUDA) +# --------------------------------------------------------------------------- + +def triattention_available() -> bool: + return _spec_exists("triattention") + + +def triattention_version() -> str | None: + return _safe_version("triattention") + + +# --------------------------------------------------------------------------- +# kvpress — FU-027 (capability flag now; integration in a later phase) +# --------------------------------------------------------------------------- + +def kvpress_available() -> bool: + return _spec_exists("kvpress") + + +def kvpress_version() -> str | None: + return _safe_version("kvpress") + + +# --------------------------------------------------------------------------- +# WSL2 — Windows-only bridge for vLLM (FU-056 Phase 8) +# +# Pure no-op on macOS / Linux. On Windows we shell ``wsl --status`` with +# a tight timeout. The two-second timeout covers cold WSL service starts +# without hanging the capability probe — repeated calls are throttled by +# the capability cache, so a slow first probe doesn't compound. +# --------------------------------------------------------------------------- + +def wsl2_available() -> bool: + if sys.platform != "win32": + return False + try: + result = subprocess.run( + ["wsl", "--status"], + capture_output=True, + timeout=2.0, + check=False, + ) + except (FileNotFoundError, subprocess.TimeoutExpired, OSError): + return False + return result.returncode == 0 diff --git a/backend_service/inference/base.py b/backend_service/inference/base.py index c1782f2..47119ac 100644 --- a/backend_service/inference/base.py +++ b/backend_service/inference/base.py @@ -97,6 +97,26 @@ class BackendCapabilities: # help text. The UI keys an MTP affordance for GGUF models off this # alongside mtplxAvailable for MLX models. ggufMtpAvailable: bool = False + # FU-056 Phase 1: CUDA-side accelerator capability flags. The Setup + # tab + per-feature install panels gate "Install" vs "Installed" UI + # off these. ``dflashMlxAvailable`` and ``dflashCudaAvailable`` are + # separate because the two backends live in two pip packages, even + # though the user-facing "DFlash" affordance is the same feature on + # both platforms. ``wsl2Available`` is Windows-only and seeds the + # Phase 8 vLLM-via-WSL bridge — always ``False`` on macOS / Linux. + nunchakuAvailable: bool = False + nunchakuVersion: str | None = None + sageattentionAvailable: bool = False + sageattentionVersion: str | None = None + dflashMlxAvailable: bool = False + dflashMlxVersion: str | None = None + dflashCudaAvailable: bool = False + dflashCudaVersion: str | None = None + triattentionAvailable: bool = False + triattentionVersion: str | None = None + kvpressAvailable: bool = False + kvpressVersion: str | None = None + wsl2Available: bool = False probing: bool = False def to_dict(self) -> dict[str, Any]: @@ -118,6 +138,19 @@ def to_dict(self) -> dict[str, Any]: "mtplxAvailable": self.mtplxAvailable, "mtplxPythonPath": self.mtplxPythonPath, "ggufMtpAvailable": self.ggufMtpAvailable, + "nunchakuAvailable": self.nunchakuAvailable, + "nunchakuVersion": self.nunchakuVersion, + "sageattentionAvailable": self.sageattentionAvailable, + "sageattentionVersion": self.sageattentionVersion, + "dflashMlxAvailable": self.dflashMlxAvailable, + "dflashMlxVersion": self.dflashMlxVersion, + "dflashCudaAvailable": self.dflashCudaAvailable, + "dflashCudaVersion": self.dflashCudaVersion, + "triattentionAvailable": self.triattentionAvailable, + "triattentionVersion": self.triattentionVersion, + "kvpressAvailable": self.kvpressAvailable, + "kvpressVersion": self.kvpressVersion, + "wsl2Available": self.wsl2Available, "probing": self.probing, } diff --git a/backend_service/inference/capabilities.py b/backend_service/inference/capabilities.py index 0f3a0c5..ca82854 100644 --- a/backend_service/inference/capabilities.py +++ b/backend_service/inference/capabilities.py @@ -16,6 +16,21 @@ from pathlib import Path from backend_service.inference._constants import CAPABILITY_CACHE_TTL_SECONDS +from backend_service.inference.accelerators import ( + dflash_cuda_available, + dflash_cuda_version, + dflash_mlx_available, + dflash_mlx_version, + kvpress_available, + kvpress_version, + nunchaku_available, + nunchaku_version, + sageattention_available, + sageattention_version, + triattention_available, + triattention_version, + wsl2_available, +) from backend_service.inference.base import BackendCapabilities from backend_service.inference.binaries import ( _json_subprocess, @@ -59,6 +74,10 @@ def _initial_backend_capabilities() -> BackendCapabilities: llama_server_turbo_path = _resolve_llama_server_turbo() llama_cli_path = _resolve_llama_cli() mtplx_available, mtplx_python = _detect_mtplx() + # FU-056 Phase 1: prime accelerator flags during the placeholder phase + # too. The probes are cheap (single ``find_spec`` per package, no + # imports) so the UI gets accurate "Install" vs "Installed" state on + # first render without waiting for the full MLX subprocess probe. return BackendCapabilities( pythonExecutable=python_executable, mlxAvailable=False, @@ -74,6 +93,19 @@ def _initial_backend_capabilities() -> BackendCapabilities: vllmVersion=None, mtplxAvailable=mtplx_available, mtplxPythonPath=mtplx_python, + nunchakuAvailable=nunchaku_available(), + nunchakuVersion=nunchaku_version(), + sageattentionAvailable=sageattention_available(), + sageattentionVersion=sageattention_version(), + dflashMlxAvailable=dflash_mlx_available(), + dflashMlxVersion=dflash_mlx_version(), + dflashCudaAvailable=dflash_cuda_available(), + dflashCudaVersion=dflash_cuda_version(), + triattentionAvailable=triattention_available(), + triattentionVersion=triattention_version(), + kvpressAvailable=kvpress_available(), + kvpressVersion=kvpress_version(), + wsl2Available=wsl2_available(), probing=True, ) @@ -133,6 +165,20 @@ def _probe_native_backends() -> BackendCapabilities: mtplxAvailable=mtplx_available, mtplxPythonPath=mtplx_python, ggufMtpAvailable=gguf_mtp_available, + # FU-056 Phase 1: per-accelerator import + version probes. + nunchakuAvailable=nunchaku_available(), + nunchakuVersion=nunchaku_version(), + sageattentionAvailable=sageattention_available(), + sageattentionVersion=sageattention_version(), + dflashMlxAvailable=dflash_mlx_available(), + dflashMlxVersion=dflash_mlx_version(), + dflashCudaAvailable=dflash_cuda_available(), + dflashCudaVersion=dflash_cuda_version(), + triattentionAvailable=triattention_available(), + triattentionVersion=triattention_version(), + kvpressAvailable=kvpress_available(), + kvpressVersion=kvpress_version(), + wsl2Available=wsl2_available(), ) diff --git a/tests/test_accelerator_capabilities.py b/tests/test_accelerator_capabilities.py new file mode 100644 index 0000000..a9c00dc --- /dev/null +++ b/tests/test_accelerator_capabilities.py @@ -0,0 +1,248 @@ +"""Tests for FU-056 Phase 1 accelerator capability probes. + +Covers ``backend_service/inference/accelerators.py`` and its wiring +into ``BackendCapabilities.to_dict``. The probes are intentionally +boring — we're mostly pinning the "package present / package absent / +package broken" matrix so future regressions can't silently flip the +UI gating that downstream phases depend on. +""" + +from __future__ import annotations + +import sys +import unittest +from unittest.mock import MagicMock, patch + +from backend_service.inference import accelerators +from backend_service.inference.base import BackendCapabilities + + +class SpecExistsTests(unittest.TestCase): + def test_returns_true_when_module_resolvable(self): + # ``json`` is in the stdlib — always findable. + self.assertTrue(accelerators._spec_exists("json")) + + def test_returns_false_when_module_absent(self): + self.assertFalse(accelerators._spec_exists("nunchaku_fake_module_xyz")) + + def test_swallows_partial_install_raise(self): + with patch( + "backend_service.inference.accelerators.importlib.util.find_spec", + side_effect=ValueError("broken __spec__"), + ): + self.assertFalse(accelerators._spec_exists("anything")) + + +class SafeVersionTests(unittest.TestCase): + def test_returns_none_when_module_absent(self): + self.assertIsNone(accelerators._safe_version("nunchaku_fake_module_xyz")) + + def test_returns_version_string_when_present(self): + fake_module = MagicMock(__version__="1.2.3") + with patch.object(accelerators, "_spec_exists", return_value=True): + with patch.object( + accelerators.importlib, + "import_module", + return_value=fake_module, + ): + self.assertEqual(accelerators._safe_version("anything"), "1.2.3") + + def test_returns_none_when_module_lacks_version(self): + fake_module = MagicMock(spec=[]) # no __version__ attribute + with patch.object(accelerators, "_spec_exists", return_value=True): + with patch.object( + accelerators.importlib, + "import_module", + return_value=fake_module, + ): + self.assertIsNone(accelerators._safe_version("anything")) + + def test_swallows_import_failure(self): + with patch.object(accelerators, "_spec_exists", return_value=True): + with patch.object( + accelerators.importlib, + "import_module", + side_effect=ImportError("broken native ext"), + ): + self.assertIsNone(accelerators._safe_version("anything")) + + +class PerAcceleratorAvailabilityTests(unittest.TestCase): + """Each accelerator's ``*_available()`` helper must flip cleanly on + ``find_spec`` answers. Patching ``_spec_exists`` rather than + ``find_spec`` keeps the test independent of how the real probes + are implemented underneath.""" + + def test_nunchaku_available_true(self): + with patch.object(accelerators, "_spec_exists", return_value=True): + self.assertTrue(accelerators.nunchaku_available()) + + def test_nunchaku_available_false(self): + with patch.object(accelerators, "_spec_exists", return_value=False): + self.assertFalse(accelerators.nunchaku_available()) + + def test_sageattention_available_true(self): + with patch.object(accelerators, "_spec_exists", return_value=True): + self.assertTrue(accelerators.sageattention_available()) + + def test_triattention_available_true(self): + with patch.object(accelerators, "_spec_exists", return_value=True): + self.assertTrue(accelerators.triattention_available()) + + def test_kvpress_available_true(self): + with patch.object(accelerators, "_spec_exists", return_value=True): + self.assertTrue(accelerators.kvpress_available()) + + +class DflashAvailabilityTests(unittest.TestCase): + """DFlash MLX / CUDA flags delegate to ``dflash.is_mlx_available`` and + ``dflash.is_vllm_available``. Patch those to drive the branch matrix.""" + + def test_mlx_available_when_helper_returns_true(self): + with patch("dflash.is_mlx_available", return_value=True, create=True): + self.assertTrue(accelerators.dflash_mlx_available()) + + def test_mlx_unavailable_when_helper_returns_false(self): + with patch("dflash.is_mlx_available", return_value=False, create=True): + self.assertFalse(accelerators.dflash_mlx_available()) + + def test_mlx_unavailable_when_helper_raises(self): + with patch("dflash.is_mlx_available", side_effect=RuntimeError("boom"), create=True): + self.assertFalse(accelerators.dflash_mlx_available()) + + def test_cuda_available_when_helper_returns_true(self): + with patch("dflash.is_vllm_available", return_value=True, create=True): + self.assertTrue(accelerators.dflash_cuda_available()) + + def test_cuda_unavailable_when_helper_returns_false(self): + with patch("dflash.is_vllm_available", return_value=False, create=True): + self.assertFalse(accelerators.dflash_cuda_available()) + + def test_cuda_version_returns_none_when_unavailable(self): + with patch("dflash.is_vllm_available", return_value=False, create=True): + self.assertIsNone(accelerators.dflash_cuda_version()) + + +class Wsl2AvailableTests(unittest.TestCase): + def test_returns_false_off_windows(self): + with patch.object(accelerators.sys, "platform", "linux"): + self.assertFalse(accelerators.wsl2_available()) + with patch.object(accelerators.sys, "platform", "darwin"): + self.assertFalse(accelerators.wsl2_available()) + + def test_returns_true_when_wsl_status_succeeds(self): + fake_result = MagicMock(returncode=0) + with patch.object(accelerators.sys, "platform", "win32"): + with patch.object( + accelerators.subprocess, + "run", + return_value=fake_result, + ) as run_mock: + self.assertTrue(accelerators.wsl2_available()) + run_mock.assert_called_once() + self.assertEqual(run_mock.call_args.args[0][0], "wsl") + self.assertEqual(run_mock.call_args.args[0][1], "--status") + + def test_returns_false_when_wsl_status_fails(self): + fake_result = MagicMock(returncode=1) + with patch.object(accelerators.sys, "platform", "win32"): + with patch.object(accelerators.subprocess, "run", return_value=fake_result): + self.assertFalse(accelerators.wsl2_available()) + + def test_returns_false_when_wsl_not_installed(self): + with patch.object(accelerators.sys, "platform", "win32"): + with patch.object( + accelerators.subprocess, + "run", + side_effect=FileNotFoundError(), + ): + self.assertFalse(accelerators.wsl2_available()) + + def test_returns_false_on_subprocess_timeout(self): + with patch.object(accelerators.sys, "platform", "win32"): + with patch.object( + accelerators.subprocess, + "run", + side_effect=accelerators.subprocess.TimeoutExpired(cmd="wsl", timeout=2.0), + ): + self.assertFalse(accelerators.wsl2_available()) + + +class BackendCapabilitiesToDictTests(unittest.TestCase): + """The frontend reads accelerator flags via ``/api/health``. Pin + the serialized payload so a future field rename (or a forgetful + ``to_dict`` update) gets caught here rather than in a vague UI bug.""" + + def test_to_dict_includes_every_accelerator_field(self): + caps = BackendCapabilities( + pythonExecutable="/x/python", + mlxAvailable=False, + mlxLmAvailable=False, + mlxUsable=False, + nunchakuAvailable=True, + nunchakuVersion="1.2.1", + sageattentionAvailable=True, + sageattentionVersion="2.2.0", + dflashMlxAvailable=False, + dflashMlxVersion=None, + dflashCudaAvailable=True, + dflashCudaVersion="0.1.0", + triattentionAvailable=True, + triattentionVersion="0.2.0", + kvpressAvailable=False, + kvpressVersion=None, + wsl2Available=True, + ) + payload = caps.to_dict() + for key in ( + "nunchakuAvailable", + "nunchakuVersion", + "sageattentionAvailable", + "sageattentionVersion", + "dflashMlxAvailable", + "dflashMlxVersion", + "dflashCudaAvailable", + "dflashCudaVersion", + "triattentionAvailable", + "triattentionVersion", + "kvpressAvailable", + "kvpressVersion", + "wsl2Available", + ): + self.assertIn(key, payload, f"{key} missing from to_dict payload") + self.assertTrue(payload["nunchakuAvailable"]) + self.assertEqual(payload["sageattentionVersion"], "2.2.0") + self.assertFalse(payload["dflashMlxAvailable"]) + self.assertTrue(payload["wsl2Available"]) + + def test_defaults_render_as_false_and_none(self): + caps = BackendCapabilities( + pythonExecutable="/x/python", + mlxAvailable=False, + mlxLmAvailable=False, + mlxUsable=False, + ) + payload = caps.to_dict() + for flag in ( + "nunchakuAvailable", + "sageattentionAvailable", + "dflashMlxAvailable", + "dflashCudaAvailable", + "triattentionAvailable", + "kvpressAvailable", + "wsl2Available", + ): + self.assertFalse(payload[flag], f"{flag} should default False") + for version in ( + "nunchakuVersion", + "sageattentionVersion", + "dflashMlxVersion", + "dflashCudaVersion", + "triattentionVersion", + "kvpressVersion", + ): + self.assertIsNone(payload[version], f"{version} should default None") + + +if __name__ == "__main__": + unittest.main() From 36ecee94b84dd219387be841f448f72c85573fea Mon Sep 17 00:00:00 2001 From: Cryptopoly <31970407+cryptopoly@users.noreply.github.com> Date: Sun, 17 May 2026 10:55:19 +0100 Subject: [PATCH 02/15] test: pytest auto-loads installed app extras + runners point at app Tests should exercise the same install users have, not a parallel .venv install. New tests/conftest.py calls ensure_extras_on_sys_path at collection time, so pytest tests/ resolves torch / diffusers / mlx / nunchaku / sageattention / triattention / vllm against the persistent extras dir at: Windows: %LOCALAPPDATA%\ChaosEngineAI\extras\cp{XY}\site-packages macOS: ~/Library/Application Support/ChaosEngineAI/extras/cp{XY}/site-packages Linux: ${XDG_DATA_HOME}/ChaosEngineAI/extras/cp{XY}/site-packages A torch upgrade landing via the in-app installer is reflected in the next pytest run automatically; no pip install dance in .venv. On a fresh CI box without the extras dir the conftest is a silent no-op, so existing test boxes keep working. Set CHAOSENGINE_TEST_TRACE_EXTRAS=1 to log which extras path got loaded for a given run. Runners (e2e_test_suite.py, cache-strategy-matrix.py) now print an actionable hint when the backend is not reachable: open the ChaosEngineAI app, rather than just backend not reachable; aborting. Both still exit 2/3 respectively so CI gates stay reliable. Docs (testing/overview.md, testing/e2e-testing.md) updated with the canonical open-the-app-then-run-tests flow, with the headless dev backend kept as an advanced option for contributors. --- docs/testing/e2e-testing.md | 22 ++++++++++++ docs/testing/overview.md | 56 +++++++++++++++++++++++++++--- scripts/cache-strategy-matrix.py | 15 +++++++++ scripts/e2e_test_suite.py | 23 ++++++++++++- tests/conftest.py | 58 ++++++++++++++++++++++++++++++++ 5 files changed, 168 insertions(+), 6 deletions(-) create mode 100644 tests/conftest.py diff --git a/docs/testing/e2e-testing.md b/docs/testing/e2e-testing.md index 20c67b3..5344c86 100644 --- a/docs/testing/e2e-testing.md +++ b/docs/testing/e2e-testing.md @@ -60,6 +60,24 @@ alarm. ### Full sweep (every phase, every check) +The canonical run path is **against the installed app**, so the suite +exercises the same embedded runtime + extras dir that users have: + +```bash +# 1. Open ChaosEngineAI (Tauri shell launches the backend on 8876) +# 2. From any shell: +.venv/bin/python scripts/e2e_test_suite.py +``` + +The runner errors with an actionable hint if the backend isn't +reachable (exit code 2). It will not silently fall back to a custom +dev backend. + +#### Headless dev backend (advanced) + +For contributors iterating on the suite itself or running it in CI +without the desktop shell: + ```bash # In one shell — keep the backend running for the entire suite ./scripts/chaosengine-cli serve @@ -68,6 +86,10 @@ alarm. ./scripts/e2e_test_suite.py ``` +This works but doesn't exercise the `python-build-standalone` Python +that ships in the desktop bundle — for release validation, prefer the +installed-app path. + Wall time depends on hardware and which models are on disk. M-series with 27B MLX models on hand: 10–25 minutes. Add another 10–20 if Phase 4 / 5 actually run generation (depends on installed image/video pipelines). diff --git a/docs/testing/overview.md b/docs/testing/overview.md index 0aa4810..f18ae38 100644 --- a/docs/testing/overview.md +++ b/docs/testing/overview.md @@ -26,21 +26,67 @@ release. ## Required commands +ChaosEngineAI tests run against **the installed app's runtime** — the +same torch / diffusers / mlx / nunchaku / etc. wheels users have +installed via the in-app "Install GPU runtime" + per-feature install +buttons. No custom dev setup. The flow is: + +1. Open the ChaosEngineAI app (the Tauri shell launches the backend + on port 8876 and adds the persistent extras dir to its `PYTHONPATH`). +2. From any shell, run the test suites below. + ```bash -# Python tests +# Python tests — auto-loads the app's extras dir via tests/conftest.py .venv/bin/python -m pytest tests/ -q -# TypeScript tests +# TypeScript tests — no backend dependency npm test # Type-check npx tsc --noEmit -# E2E smoke -./scripts/chaosengine-cli serve & # one shell -./scripts/e2e_test_suite.py --smoke # another shell +# E2E smoke — talks to the running app on 127.0.0.1:8876 +.venv/bin/python scripts/e2e_test_suite.py --smoke +``` + +### Why the app's extras, not the dev venv? + +The dev `.venv` ships with FastAPI + pytest + huggingface-hub but +deliberately **without** torch / diffusers / mlx / nunchaku / +sageattention / triattention / vllm. Those heavy packages live in the +persistent extras directory at: + +- Windows: `%LOCALAPPDATA%\ChaosEngineAI\extras\cp{XY}\site-packages` +- macOS: `~/Library/Application Support/ChaosEngineAI/extras/cp{XY}/site-packages` +- Linux: `${XDG_DATA_HOME}/ChaosEngineAI/extras/cp{XY}/site-packages` + +`tests/conftest.py` auto-discovers that path at pytest collection time +and adds it to `sys.path` (via [`ensure_extras_on_sys_path`](https://github.com/cryptopoly/ChaosEngineAI/blob/staging/backend_service/runtime_paths.py)), +so `import torch` in a test resolves against the same wheel a user +runs. A torch upgrade landing via the in-app installer is reflected in +the next `pytest` run automatically — no `pip install` dance required. + +Set `CHAOSENGINE_TEST_TRACE_EXTRAS=1` to log which extras path got +prepended for a given run (useful when debugging "is this test +hitting the install I think it is?"). + +### Headless dev backend (advanced) + +Contributors who want to run the suite without the Tauri shell open +can stand up the backend headlessly: + +```bash +# One shell — runs the FastAPI app under the dev venv +.venv/bin/python -m backend_service.app --port 8876 + +# OR (gets the embedded runtime via Tauri's stage script) +npm run tauri:dev ``` +This works, but won't exercise the exact `python-build-standalone` +binary the desktop bundle ships — for release-blocking validation, +prefer the production-app path above. + ## Where the tests live | Path | What's tested | diff --git a/scripts/cache-strategy-matrix.py b/scripts/cache-strategy-matrix.py index f5e2c7d..a0f1ab0 100755 --- a/scripts/cache-strategy-matrix.py +++ b/scripts/cache-strategy-matrix.py @@ -473,7 +473,22 @@ def main() -> int: try: caps = probe_backend(args.port) except ConnectionError as exc: + # The matrix runner is meant to exercise the installed app's + # runtime, the same way ``e2e_test_suite.py`` does. A failure to + # reach the backend almost always means "the app isn't open" — + # surface that clearly instead of just echoing the ConnectionError. print(f" ! {exc}", file=sys.stderr) + print("", file=sys.stderr) + print( + "Open the ChaosEngineAI app and re-run this command — the matrix " + "is designed to exercise the production embedded runtime + extras.", + file=sys.stderr, + ) + print( + f"(advanced: `npm run tauri:dev` or `python -m backend_service.app " + f"--port {args.port}` works for dev runs, but won't match the user-install path)", + file=sys.stderr, + ) return 3 print(f" available strategies: {sorted(caps.available_strategies)}") print(f" dflash={caps.dflash_available} ddtree={caps.ddtree_available} turbo-binary={caps.has_turbo_binary}") diff --git a/scripts/e2e_test_suite.py b/scripts/e2e_test_suite.py index 5c77be8..5c302f1 100755 --- a/scripts/e2e_test_suite.py +++ b/scripts/e2e_test_suite.py @@ -844,7 +844,28 @@ def main(argv: list[str] | None = None) -> int: phase0 = phase_0(cap) phases.append(phase0) _write_reports(Path(args.report_dir), started, ended, phases, cap) - print("[e2e] backend not reachable; aborting", file=sys.stderr, flush=True) + # Comprehensive E2E runs against the installed ChaosEngineAI app, + # not a custom dev backend — so the actionable hint always points + # at "open the app". The headless dev path is mentioned as a + # fallback for contributors who already know it exists. + print("", file=sys.stderr, flush=True) + print( + f"[e2e] backend not reachable at http://{_HOST}:{_PORT}/api/health.", + file=sys.stderr, + flush=True, + ) + print( + "[e2e] open the ChaosEngineAI app and re-run this command — the suite " + "exercises the production embedded runtime.", + file=sys.stderr, + flush=True, + ) + print( + "[e2e] (advanced: `npm run tauri:dev` or `python -m backend_service.app " + f"--port {_PORT}` from .venv works too, but won't match the user-install path)", + file=sys.stderr, + flush=True, + ) return 2 phases: list[PhaseResult] = [] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..a117529 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,58 @@ +"""pytest collection-time hook to make tests resolve against the +installed ChaosEngineAI app's extras dir. + +The dev ``.venv`` deliberately ships **without** torch / diffusers / +mlx / vllm / nunchaku / sageattention / triattention. Those packages +live in the persistent extras directory the desktop app populates via +``/api/setup/install-gpu-bundle`` and friends — the same path the +production embedded runtime puts on ``PYTHONPATH`` at backend launch. + +Importing torch in the dev venv would fork the install state from +what real users run. So instead of asking developers to ``pip install`` +into ``.venv``, this conftest reuses the production extras dir at +collection time so: + + - Tests that touch ``torch`` / ``diffusers`` etc. resolve them + against the same wheels the user's actual app uses. + - "No custom test setup" — open the app, run ``pytest``, you're + testing the production install. + - A torch upgrade landing via the in-app installer is reflected in + the next pytest run automatically. + - CI boxes without the extras dir get a silent no-op: tests that + require torch will still fail in the same place they did before + (the import line), but tests that don't need it run normally. + +The append-vs-prepend decision is delegated to +``ensure_extras_on_sys_path`` — repo-local shims (notably the +``turboquant_mlx`` adapter that wraps the upstream +``turboquant-mlx-full`` install) must keep import authority over the +raw upstream packages, so the helper appends rather than prepends. + +This is a pytest-native conftest, not a fixture. The side effect runs +once when pytest collects ``tests/``, before any test module imports. +""" + +from __future__ import annotations + +import os +import sys + +# We import the helper through ``backend_service`` so the editable +# install of this repo (``pip install -e .``) is what provides the +# import path. No special bootstrap needed — pytest's rootdir handling +# already finds ``backend_service`` via the installed package. +from backend_service.runtime_paths import ensure_extras_on_sys_path + + +_INSERTED = ensure_extras_on_sys_path() + + +# Surface what we wired in via ``-v -s`` so CI logs and local +# debugging make it obvious which extras dir the run pulled from. +# Silent in the default ``-q`` output so it doesn't add noise. +if _INSERTED and os.environ.get("CHAOSENGINE_TEST_TRACE_EXTRAS"): + print( + f"[conftest] appended extras to sys.path: {[str(p) for p in _INSERTED]}", + file=sys.stderr, + flush=True, + ) From 1cbb9b14fd606bbe87ff3b97bfe6207945c5fe52 Mon Sep 17 00:00:00 2001 From: Cryptopoly <31970407+cryptopoly@users.noreply.github.com> Date: Sun, 17 May 2026 11:20:37 +0100 Subject: [PATCH 03/15] feat: AcceleratorCard component + catalog (FU-056 Phase 2) Reusable card for the six CUDA-side accelerators (nunchaku, sageattention, dflash-mlx, dflash-cuda, triattention, kvpress). Three placement variants share one component so the per-feature surfaces in Phases 3-6 stay in sync without re-implementing the three states (idle / installing / installed / failed) per surface: - card: full banner with title, claim, applies-to, size pill, primary action. Lands in the Image / Video Studio runtime banners and the Diagnostics Boost Pack. - pill: compact horizontal chip with 4-bit-style copy. Lands on catalog variant cards in the Discover / Models tabs. - row: table form for Diagnostics Boost Pack's scannable view. State ownership: parent owns the install lifecycle (which package is in flight, success/failure, captured pip output). The card only owns the log-expanded toggle. Mirrors the CudaTorchLogPanel contract so the card is cheap to render in many places without duplicating polling work. New catalog (src/components/acceleratorCatalog.ts) is the single source of truth for each accelerator's pip name, capability flag, speedup claim, size, install mode, and platform gate. Adding a seventh accelerator is one entry here, one Phase 1 capability flag, and one row in the backend's _INSTALLABLE_PIP_PACKAGES. NativeBackendStatus (src/types/server.ts) extended with the 13 FU-056 Phase 1 fields plus the older vllm/mtplx/ggufMtp fields that were already on the wire but missing from the TS interface. All fields optional so a backend running an older build than the frontend doesn't break the type contract. Tests (28 new): catalog shape pinning + getAccelerator lookup + isPlatformCompatible matrix + readInstalled / readVersion / platformLabel / actionLabelFor branch coverage. Vitest harness stays at pure-function level - no React Testing Library yet, per the existing src/components/__tests__/ convention. CSS: .accelerator-card / -pill / -row variants in styles.css, matching the existing .torch-upgrade-pill colour vocabulary (rgba(80, 140, 220, ...) for the not-installed accent, rgba(80, 180, 100, ...) for installed, --border + --surface tokens for the chrome). --- src/components/AcceleratorCard.tsx | 323 ++++++++++++++++++ .../__tests__/AcceleratorCard.test.tsx | 119 +++++++ .../__tests__/acceleratorCatalog.test.ts | 100 ++++++ src/components/acceleratorCatalog.ts | 203 +++++++++++ src/styles.css | 201 +++++++++++ src/types/server.ts | 23 ++ 6 files changed, 969 insertions(+) create mode 100644 src/components/AcceleratorCard.tsx create mode 100644 src/components/__tests__/AcceleratorCard.test.tsx create mode 100644 src/components/__tests__/acceleratorCatalog.test.ts create mode 100644 src/components/acceleratorCatalog.ts diff --git a/src/components/AcceleratorCard.tsx b/src/components/AcceleratorCard.tsx new file mode 100644 index 0000000..f76e875 --- /dev/null +++ b/src/components/AcceleratorCard.tsx @@ -0,0 +1,323 @@ +import { useState } from "react"; + +import type { NativeBackendStatus } from "../types/server"; +import { + type AcceleratorMeta, + isPlatformCompatible, +} from "./acceleratorCatalog"; + +/** + * Reusable card for the six CUDA-side accelerators (FU-056 Phase 2). + * + * Three placement variants share one component so the per-feature + * surfaces in Phases 3–6 stay in sync without re-implementing the + * three states (idle / installing / installed / failed) per surface: + * + * - ``card`` (default) — full-width banner with title, speedup + * claim, applies-to copy, size pill, primary action. Lives in the + * Diagnostics Boost Pack and the Image / Video Studio runtime + * banners. + * - ``pill`` — compact horizontal chip with "🚀 Label +Nx [Install]" + * copy. Lives on catalog variant cards in the Discover / Models + * tabs. + * - ``row`` — table-friendly form with name + applies-to + status + + * action laid out as columns. Used by the Diagnostics Boost Pack + * to render all six accelerators in one scannable view. + * + * State ownership: the *parent* owns the install lifecycle (which + * package is in flight, success / failure of the most recent attempt, + * output captured from the install pipe). The card itself only owns + * the "log expanded?" toggle. This mirrors the + * ``CudaTorchLogPanel`` / ``TorchUpgradePill`` contract — keeps the + * card cheap to render in many places without each instance + * duplicating polling work. + */ + +export interface AcceleratorCardProps { + /** Catalog row for the accelerator this card represents. */ + meta: AcceleratorMeta; + /** Live capability snapshot. Used to read ``meta.capabilityField`` + * + ``meta.versionField`` for installed-state display. */ + capabilities: NativeBackendStatus | null; + /** Card layout density. Defaults to ``"card"``. */ + variant?: "card" | "pill" | "row"; + /** True while *this specific* accelerator's install is in flight. + * The parent owns this state; the card just renders accordingly. */ + installing?: boolean; + /** Last error message from a failed install attempt. ``null`` after + * a successful install or before any attempt. */ + installError?: string | null; + /** Captured pip output from the last install (success or fail). + * Surfaced inside a collapsible ``
`` so success runs stay + * compact but failures expose the diagnostic. */ + installOutput?: string | null; + /** Fired when the user clicks Install / Retry. Parent should call + * ``installPipPackage(meta.pipPackage)`` then ``refreshWorkspace()``. */ + onInstall: (pipPackage: string) => void; + /** Optional click handler for the platform-mismatch tooltip — lets + * the parent surface a "this won't run on your hardware" toast. */ + onPlatformMismatch?: (meta: AcceleratorMeta) => void; + /** Force-show the card even when ``platformGate`` says it's + * incompatible. The Diagnostics Boost Pack uses this so users can + * see every accelerator; per-feature surfaces leave it false. */ + showIncompatible?: boolean; +} + +/** Exported for unit-test reach: ``true`` iff capabilities reports + * this accelerator's flag as ``=== true``. Older backends without + * FU-056 fields read as ``false`` (the fields are optional on the + * shared TS interface). Never throws. */ +export function readInstalled( + meta: AcceleratorMeta, + capabilities: NativeBackendStatus | null, +): boolean { + if (!capabilities) return false; + const value = capabilities[meta.capabilityField]; + return value === true; +} + +/** Exported for unit-test reach: returns the version string when the + * backend exposed it, else ``null``. ``"0.0.0"`` and other zero-prefix + * versions count as present — we don't filter on semver shape. */ +export function readVersion( + meta: AcceleratorMeta, + capabilities: NativeBackendStatus | null, +): string | null { + if (!capabilities) return null; + const value = capabilities[meta.versionField]; + return typeof value === "string" && value.length > 0 ? value : null; +} + +/** Exported for unit-test reach: human-readable platform requirement. */ +export function platformLabel(gate: AcceleratorMeta["platformGate"]): string { + switch (gate) { + case "cuda": + return "CUDA only"; + case "apple-silicon": + return "Apple Silicon only"; + case "any": + return "Cross-platform"; + } +} + +/** Exported for unit-test reach: maps the (installed / installing / + * failed / idle, sync / async) matrix onto the button copy. Returns + * ``null`` when no action button should render (i.e. the install is + * already complete). */ +export function actionLabelFor(args: { + installed: boolean; + installing: boolean; + hasError: boolean; + installMode: AcceleratorMeta["installMode"]; +}): string | null { + if (args.installed) return null; + if (args.installing) return "Installing…"; + if (args.hasError) return "Retry"; + return args.installMode === "async" ? "Install (background)" : "Install"; +} + +export function AcceleratorCard(props: AcceleratorCardProps) { + const { + meta, + capabilities, + variant = "card", + installing = false, + installError = null, + installOutput = null, + onInstall, + onPlatformMismatch, + showIncompatible = false, + } = props; + + const installed = readInstalled(meta, capabilities); + const version = readVersion(meta, capabilities); + const compatible = capabilities ? isPlatformCompatible(meta, capabilities) : true; + const [logOpen, setLogOpen] = useState(Boolean(installError)); + + // When the affordance is shown on a platform that physically can't + // run the accelerator and the surface isn't a "show everything" + // diagnostic — hide it. Cleaner than rendering a disabled card the + // user can't act on. + if (!compatible && !showIncompatible) { + return null; + } + + const handleInstall = () => { + if (!compatible) { + onPlatformMismatch?.(meta); + return; + } + onInstall(meta.pipPackage); + }; + + const statusBadge = (() => { + if (installed) { + return ( + + {version ? `✓ v${version}` : "✓ Installed"} + + ); + } + if (installing) { + return ( + + Installing… + + ); + } + if (installError) { + return ( + + Install failed + + ); + } + return null; + })(); + + const actionLabel = actionLabelFor({ + installed, + installing, + hasError: Boolean(installError), + installMode: meta.installMode, + }); + + if (variant === "pill") { + return ( + + + {installed ? "✓ " : "🚀 "} + {meta.shortLabel} + + {!installed && ( + + )} + + ); + } + + if (variant === "row") { + return ( + + + {meta.label} + {meta.appliesTo} + + {meta.sizeOnDiskLabel} + {platformLabel(meta.platformGate)} + {statusBadge} + + {actionLabel && ( + + )} + + + ); + } + + // Default: full card. + return ( +
+
+

+ {installed ? "✓ " : "🚀 "} + {meta.label} +

+ {statusBadge} +
+ +

{meta.speedupClaim}

+

+ Applies to:{" "} + {meta.appliesTo} +

+ +
+ {meta.sizeOnDiskLabel} + {platformLabel(meta.platformGate)} + + {meta.installMode === "async" ? "Background install" : "Quick install"} + + + {meta.followUp} + +
+ + {actionLabel && ( +
+ +
+ )} + + {installError && ( +
setLogOpen((event.target as HTMLDetailsElement).open)} + > + + Install failure — show output + +

{installError}

+ {installOutput && ( +
{installOutput}
+ )} +
+ )} + + {installed && installOutput && !installError && ( +
+ + Install output + +
{installOutput}
+
+ )} +
+ ); +} diff --git a/src/components/__tests__/AcceleratorCard.test.tsx b/src/components/__tests__/AcceleratorCard.test.tsx new file mode 100644 index 0000000..f7da04c --- /dev/null +++ b/src/components/__tests__/AcceleratorCard.test.tsx @@ -0,0 +1,119 @@ +import { describe, expect, it } from "vitest"; + +import type { NativeBackendStatus } from "../../types/server"; +import { + actionLabelFor, + platformLabel, + readInstalled, + readVersion, +} from "../AcceleratorCard"; +import { ACCELERATOR_CATALOG, getAccelerator } from "../acceleratorCatalog"; + +/** + * No JSX render harness in the repo today (per + * src/components/__tests__/ErrorBoundary.test.ts comment). We pin the + * card's *pure-function* contract instead — the same helpers the + * component body calls, exported for direct test reach. + */ + +function makeCaps(overrides: Partial = {}): NativeBackendStatus { + return { + pythonExecutable: "/x/python", + mlxAvailable: false, + mlxLmAvailable: false, + mlxUsable: false, + ggufAvailable: false, + converterAvailable: false, + ...overrides, + }; +} + +describe("readInstalled", () => { + const nunchaku = getAccelerator("nunchaku")!; + + it("returns false when capabilities is null", () => { + expect(readInstalled(nunchaku, null)).toBe(false); + }); + + it("returns false when the field is missing (older backend)", () => { + expect(readInstalled(nunchaku, makeCaps())).toBe(false); + }); + + it("returns true when the capability field is true", () => { + expect(readInstalled(nunchaku, makeCaps({ nunchakuAvailable: true }))).toBe(true); + }); + + it("returns false when the capability field is explicitly false", () => { + expect(readInstalled(nunchaku, makeCaps({ nunchakuAvailable: false }))).toBe(false); + }); +}); + +describe("readVersion", () => { + const nunchaku = getAccelerator("nunchaku")!; + + it("returns null when capabilities is null", () => { + expect(readVersion(nunchaku, null)).toBeNull(); + }); + + it("returns null when the version field is missing or empty", () => { + expect(readVersion(nunchaku, makeCaps())).toBeNull(); + expect(readVersion(nunchaku, makeCaps({ nunchakuVersion: "" }))).toBeNull(); + }); + + it("returns the version string when present", () => { + expect(readVersion(nunchaku, makeCaps({ nunchakuVersion: "1.2.1" }))).toBe("1.2.1"); + }); + + it("returns null when the version is explicitly null", () => { + expect(readVersion(nunchaku, makeCaps({ nunchakuVersion: null }))).toBeNull(); + }); +}); + +describe("platformLabel", () => { + it("maps every gate to a human-readable string", () => { + expect(platformLabel("cuda")).toBe("CUDA only"); + expect(platformLabel("apple-silicon")).toBe("Apple Silicon only"); + expect(platformLabel("any")).toBe("Cross-platform"); + }); + + it("covers every catalog platformGate value", () => { + // Pins that every catalog entry uses a gate platformLabel knows + // how to render — a new gate value would force this test to fail. + for (const entry of ACCELERATOR_CATALOG) { + const label = platformLabel(entry.platformGate); + expect(label.length).toBeGreaterThan(0); + } + }); +}); + +describe("actionLabelFor", () => { + it("returns null when already installed (no button rendered)", () => { + expect( + actionLabelFor({ installed: true, installing: false, hasError: false, installMode: "sync" }), + ).toBeNull(); + }); + + it("returns ``Installing…`` mid-flight (overrides error)", () => { + expect( + actionLabelFor({ installed: false, installing: true, hasError: true, installMode: "sync" }), + ).toBe("Installing…"); + }); + + it("returns ``Retry`` after a failed attempt", () => { + expect( + actionLabelFor({ installed: false, installing: false, hasError: true, installMode: "sync" }), + ).toBe("Retry"); + }); + + it("returns ``Install`` for fresh sync installs", () => { + expect( + actionLabelFor({ installed: false, installing: false, hasError: false, installMode: "sync" }), + ).toBe("Install"); + }); + + it("returns ``Install (background)`` for async installs", () => { + expect( + actionLabelFor({ installed: false, installing: false, hasError: false, installMode: "async" }), + ).toBe("Install (background)"); + }); +}); diff --git a/src/components/__tests__/acceleratorCatalog.test.ts b/src/components/__tests__/acceleratorCatalog.test.ts new file mode 100644 index 0000000..bdfa5be --- /dev/null +++ b/src/components/__tests__/acceleratorCatalog.test.ts @@ -0,0 +1,100 @@ +import { describe, expect, it } from "vitest"; + +import type { NativeBackendStatus } from "../../types/server"; +import { + ACCELERATOR_CATALOG, + type AcceleratorMeta, + getAccelerator, + isPlatformCompatible, +} from "../acceleratorCatalog"; + +/** + * The catalog is the source of truth for "which accelerators exist". + * Tests pin its shape so a typo in a pip-package name, a missing + * capability field, or a stale entry can't ship silently — every + * downstream surface (Phase 3-6) reads this registry verbatim. + */ + +describe("ACCELERATOR_CATALOG", () => { + it("ships exactly the six accelerators FU-056 Phase 1 wired probes for", () => { + const ids = ACCELERATOR_CATALOG.map((entry) => entry.id).sort(); + expect(ids).toEqual([ + "dflash-cuda", + "dflash-mlx", + "kvpress", + "nunchaku", + "sageattention", + "triattention", + ]); + }); + + it.each(ACCELERATOR_CATALOG.map((entry) => [entry.id, entry]))( + "%s catalog entry has all required fields", + (_id, entry) => { + expect(entry.label.length).toBeGreaterThan(0); + expect(entry.shortLabel.length).toBeGreaterThan(0); + expect(entry.pipPackage.length).toBeGreaterThan(0); + expect(entry.capabilityField.length).toBeGreaterThan(0); + expect(entry.versionField.length).toBeGreaterThan(0); + expect(entry.speedupClaim.length).toBeGreaterThan(0); + expect(entry.appliesTo.length).toBeGreaterThan(0); + expect(entry.sizeOnDiskLabel.length).toBeGreaterThan(0); + expect(["sync", "async"]).toContain(entry.installMode); + expect(["cuda", "apple-silicon", "any"]).toContain(entry.platformGate); + // FU row reference must look like "FU-NNN" (followUp string can + // pair multiple FUs separated by "/", e.g. "FU-003 / FU-002"). + expect(entry.followUp).toMatch(/FU-\d{3}/); + }, + ); + + it("capability field names follow the Phase 1 ``*Available`` convention", () => { + for (const entry of ACCELERATOR_CATALOG) { + expect(entry.capabilityField).toMatch(/Available$/); + expect(entry.versionField).toMatch(/Version$/); + } + }); + + it("getAccelerator resolves known ids", () => { + expect(getAccelerator("nunchaku")?.label).toBe("Nunchaku"); + expect(getAccelerator("sageattention")?.label).toBe("SageAttention"); + expect(getAccelerator("dflash-cuda")?.platformGate).toBe("cuda"); + expect(getAccelerator("dflash-mlx")?.platformGate).toBe("apple-silicon"); + }); + + it("getAccelerator returns undefined for unknown ids", () => { + expect(getAccelerator("flash-attn-3")).toBeUndefined(); + expect(getAccelerator("")).toBeUndefined(); + }); +}); + +describe("isPlatformCompatible", () => { + const cudaCaps = { mlxAvailable: false } as Pick< + NativeBackendStatus, + "mlxAvailable" + >; + const mlxCaps = { mlxAvailable: true } as Pick< + NativeBackendStatus, + "mlxAvailable" + >; + + it("``any`` platform-gated entries are always compatible", () => { + const fake: AcceleratorMeta = { + ...ACCELERATOR_CATALOG[0], + platformGate: "any", + }; + expect(isPlatformCompatible(fake, cudaCaps)).toBe(true); + expect(isPlatformCompatible(fake, mlxCaps)).toBe(true); + }); + + it("``cuda`` entries match when mlx is unavailable", () => { + const nunchaku = ACCELERATOR_CATALOG.find((e) => e.id === "nunchaku")!; + expect(isPlatformCompatible(nunchaku, cudaCaps)).toBe(true); + expect(isPlatformCompatible(nunchaku, mlxCaps)).toBe(false); + }); + + it("``apple-silicon`` entries match when mlx is available", () => { + const dflashMlx = ACCELERATOR_CATALOG.find((e) => e.id === "dflash-mlx")!; + expect(isPlatformCompatible(dflashMlx, cudaCaps)).toBe(false); + expect(isPlatformCompatible(dflashMlx, mlxCaps)).toBe(true); + }); +}); diff --git a/src/components/acceleratorCatalog.ts b/src/components/acceleratorCatalog.ts new file mode 100644 index 0000000..1e35dde --- /dev/null +++ b/src/components/acceleratorCatalog.ts @@ -0,0 +1,203 @@ +/** + * Accelerator registry (FU-056 Phase 2). + * + * Source of truth for the six CUDA-side accelerators the in-app install + * UX surfaces. Each entry pairs a stable ``id`` with the metadata that + * downstream components need to render a "Recommended" badge, an + * Install button, an "Installed ✓" pill, or a Boost Pack row: + * + * - ``pipPackage`` — argument to ``POST /api/setup/install-package``. + * Must match a key in the backend's ``_INSTALLABLE_PIP_PACKAGES`` + * allow-list ([backend_service/routes/setup/__init__.py]). + * - ``capabilityField`` / ``versionField`` — the ``NativeBackendStatus`` + * keys to read for installed state + display version. Wired in + * FU-056 Phase 1 on the backend. + * - ``speedupClaim`` / ``appliesTo`` — copy for the "🚀 Nunchaku +3× + * available" pill. Marketing-honest: never claim more than the + * model card / upstream benchmark reports for the *typical* case + * a user will hit. + * - ``sizeOnDiskLabel`` — rough human-readable on-disk footprint + * (compressed download + extracted wheel). Sourced from the + * CLAUDE.md FU rows that registered each package. + * - ``installMode`` — ``"sync"`` for ~5 min installs that we can hold + * a single HTTP call open for; ``"async"`` for the >5 min builds + * (triattention compiles flash-attn from source; vLLM ships a + * ~2 GB wheel) that need the background-job + poll-status shape. + * - ``platformGate`` — when set, the affordance hides on platforms + * where the accelerator can't run at all (e.g. dflash-mlx on + * Windows, vLLM native on macOS). Diagnostic surfaces that show + * "everything" can override this to render a disabled row with + * an explanation. + * + * Adding a 7th accelerator is one entry here + one Phase 1 capability + * flag + one row in ``_INSTALLABLE_PIP_PACKAGES``. No component edit. + */ + +import type { NativeBackendStatus } from "../types/server"; + +export type AcceleratorId = + | "nunchaku" + | "sageattention" + | "dflash-mlx" + | "dflash-cuda" + | "triattention" + | "kvpress"; + +export type PlatformGate = "cuda" | "apple-silicon" | "any"; + +export interface AcceleratorMeta { + id: AcceleratorId; + /** Human-readable label shown in cards + Boost Pack rows. */ + label: string; + /** Short noun phrase suitable for a pill: "4-bit FLUX/SD3" not "Adds 4-bit support". */ + shortLabel: string; + /** Pip name as it appears in ``_INSTALLABLE_PIP_PACKAGES``. */ + pipPackage: string; + /** Capability flag on ``NativeBackendStatus`` (FU-056 Phase 1). */ + capabilityField: keyof NativeBackendStatus; + /** Version string field (may be ``null`` when installed without a __version__). */ + versionField: keyof NativeBackendStatus; + /** One-line copy explaining the speedup. Used in the "🚀 X available" pill. */ + speedupClaim: string; + /** Models / pipelines this accelerator applies to. Free-text — humans read this. */ + appliesTo: string; + /** Rough on-disk footprint label, e.g. "~50 MB". */ + sizeOnDiskLabel: string; + /** ``sync`` = one HTTP call held open; ``async`` = background job + status poll. */ + installMode: "sync" | "async"; + /** Platforms where this can actually run. Affordances hide on the wrong platform. */ + platformGate: PlatformGate; + /** FU row in CLAUDE.md that registered or owns this accelerator. For provenance. */ + followUp: string; + /** Optional doc link slug under ``docs/features/`` for a "Learn more" affordance. */ + docsSlug?: string; +} + +export const ACCELERATOR_CATALOG: ReadonlyArray = [ + { + id: "nunchaku", + label: "Nunchaku", + shortLabel: "SVDQuant 4-bit", + pipPackage: "nunchaku", + capabilityField: "nunchakuAvailable", + versionField: "nunchakuVersion", + speedupClaim: "≈3× faster FLUX/SD3.5/Qwen-Image on CUDA", + appliesTo: "FLUX.1, SD3.5, Qwen-Image, SANA, PixArt-Σ", + sizeOnDiskLabel: "~50 MB", + installMode: "sync", + platformGate: "cuda", + followUp: "FU-023", + }, + { + id: "sageattention", + label: "SageAttention", + shortLabel: "Fast attention DiT", + pipPackage: "sageattention", + capabilityField: "sageattentionAvailable", + versionField: "sageattentionVersion", + speedupClaim: "Stacks with FBCache for ~1.4× extra on DiT pipelines", + appliesTo: "Any CUDA DiT image / video pipeline", + sizeOnDiskLabel: "~30 MB", + installMode: "sync", + platformGate: "cuda", + followUp: "FU-016", + }, + { + id: "dflash-mlx", + label: "DFlash (MLX)", + shortLabel: "Speculative decoding", + pipPackage: "dflash-mlx", + capabilityField: "dflashMlxAvailable", + versionField: "dflashMlxVersion", + speedupClaim: "≈1.5-2× tokens/sec on Qwen3.x and DeepSeek chat models", + appliesTo: "Apple Silicon — any LLM with a registered draft model", + sizeOnDiskLabel: "~80 MB", + installMode: "sync", + platformGate: "apple-silicon", + followUp: "FU-031", + docsSlug: "dflash", + }, + { + id: "dflash-cuda", + label: "DFlash (CUDA)", + shortLabel: "Speculative decoding", + pipPackage: "dflash", + capabilityField: "dflashCudaAvailable", + versionField: "dflashCudaVersion", + speedupClaim: "≈1.5-2× tokens/sec on Qwen3.x and DeepSeek chat models", + appliesTo: "CUDA — any LLM with a registered draft model", + sizeOnDiskLabel: "~80 MB", + installMode: "sync", + platformGate: "cuda", + followUp: "FU-048", + docsSlug: "dflash", + }, + { + id: "triattention", + label: "TriAttention", + shortLabel: "KV compressor + LongLive", + // The full pip git+url is resolved server-side by the install-package + // registry — the client only needs the package name as the registry + // key. Keeps the catalog readable + avoids leaking the upstream pin + // into the frontend bundle. + pipPackage: "triattention", + capabilityField: "triattentionAvailable", + versionField: "triattentionVersion", + speedupClaim: "Real-time long Wan video + 2-3× KV compression on long-context LLMs", + appliesTo: "Wan 2.1 1.3B (LongLive), long-context chat models", + sizeOnDiskLabel: "~2 GB (pulls vllm)", + installMode: "async", + platformGate: "cuda", + followUp: "FU-003 / FU-002", + }, + { + id: "kvpress", + label: "kvpress", + shortLabel: "KV cache compression", + pipPackage: "kvpress", + capabilityField: "kvpressAvailable", + versionField: "kvpressVersion", + speedupClaim: "8-32× KV-cache compression on long-context CUDA inference", + appliesTo: "CUDA — any HF transformer with KV cache", + sizeOnDiskLabel: "~40 MB", + installMode: "sync", + platformGate: "cuda", + followUp: "FU-027", + }, +]; + +/** Lookup an entry by id. Returns ``undefined`` for unknown ids so the + * caller can render a "missing catalog row" diagnostic rather than + * crashing — relevant for forward-compat when a backend probe lists a + * new accelerator the frontend doesn't know about yet. */ +export function getAccelerator(id: string): AcceleratorMeta | undefined { + return ACCELERATOR_CATALOG.find((entry) => entry.id === id); +} + +/** True when this accelerator's ``platformGate`` is satisfied by the + * current ``NativeBackendStatus``. The caller can use this to hide + * irrelevant cards (e.g. dflash-mlx on Windows) or to dim them with an + * explanation tooltip. ``any`` always satisfies. */ +export function isPlatformCompatible( + meta: AcceleratorMeta, + capabilities: Pick, +): boolean { + switch (meta.platformGate) { + case "any": + return true; + case "apple-silicon": + // ``mlxAvailable`` is the strongest signal we have for "this is an + // Apple Silicon box where MLX worker subprocesses can spawn". + // ``platform.system() === "Darwin"`` would catch Intel Macs too, but + // none of the MLX-side accelerators run on Intel anyway, so MLX + // availability is the better gate. + return Boolean(capabilities.mlxAvailable); + case "cuda": + // We don't have a single ``cudaAvailable`` capability flag today + // (the vllm probe carries it implicitly). For Phase 2 we approximate + // "this is a CUDA box" with "MLX is NOT available" — i.e. not an + // Apple Silicon box. A more precise probe lands in Phase 8 alongside + // the WSL bridge work, when we surface ``cudaAvailable`` explicitly. + return !capabilities.mlxAvailable; + } +} diff --git a/src/styles.css b/src/styles.css index 5cc83bc..10b6647 100644 --- a/src/styles.css +++ b/src/styles.css @@ -7782,6 +7782,207 @@ select.text-input { overflow: auto; } +/* -- FU-056 accelerator card --------------------------------------- */ +.accelerator-card { + display: flex; + flex-direction: column; + gap: 8px; + padding: 12px 14px; + background: var(--surface); + border: 1px solid var(--border); + border-radius: var(--radius-md); + font-size: 0.88rem; +} +.accelerator-card.accelerator-card-installed { + background: rgba(80, 180, 100, 0.06); + border-color: rgba(80, 180, 100, 0.32); +} +.accelerator-card.accelerator-card-incompatible { + opacity: 0.55; +} +.accelerator-card-header { + display: flex; + align-items: center; + justify-content: space-between; + gap: 10px; +} +.accelerator-card-title { + margin: 0; + font-size: 0.95rem; + font-weight: 600; +} +.accelerator-card-claim { + margin: 0; + color: var(--muted-strong); + font-size: 0.86rem; +} +.accelerator-card-applies { + margin: 0; + color: var(--muted); + font-size: 0.82rem; +} +.accelerator-card-applies-label { + color: var(--muted-strong); + font-weight: 500; +} +.accelerator-card-meta { + display: flex; + flex-wrap: wrap; + gap: 6px 10px; + font-size: 0.78rem; + color: var(--muted); + margin-top: 2px; +} +.accelerator-card-meta-item { + display: inline-flex; + align-items: center; + padding: 2px 7px; + background: rgba(255, 255, 255, 0.04); + border: 1px solid rgba(255, 255, 255, 0.06); + border-radius: 999px; + font-size: 0.74rem; +} +.accelerator-card-meta-follow-up { + font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; + color: var(--accent); +} +.accelerator-card-status { + display: inline-block; + padding: 2px 8px; + font-size: 0.72rem; + font-weight: 600; + border-radius: 10px; + letter-spacing: 0.02em; +} +.accelerator-card-status-installed { + background: rgba(80, 180, 100, 0.22); + color: #8fd99e; +} +.accelerator-card-status-installing { + background: rgba(220, 170, 80, 0.22); + color: #e7c382; +} +.accelerator-card-status-failed { + background: rgba(220, 100, 100, 0.25); + color: #ec9c9c; +} +.accelerator-card-actions { + display: flex; + flex-wrap: wrap; + gap: 8px; + align-items: center; + margin-top: 4px; +} +.accelerator-card-action { + appearance: none; + border: 1px solid rgba(143, 180, 255, 0.45); + background: rgba(80, 140, 220, 0.18); + color: var(--accent-strong); + border-radius: 8px; + padding: 5px 12px; + font-size: 0.84rem; + font-weight: 500; + cursor: pointer; + transition: background 0.12s ease; +} +.accelerator-card-action:hover:not(:disabled) { + background: rgba(80, 140, 220, 0.30); +} +.accelerator-card-action:disabled { + opacity: 0.55; + cursor: not-allowed; +} +.accelerator-card-action-primary { + font-weight: 600; +} +.accelerator-card-log { + margin-top: 4px; + font-size: 0.82rem; +} +.accelerator-card-log-summary { + cursor: pointer; + color: var(--muted-strong); +} +.accelerator-card-log-error { + margin: 6px 0 4px 0; + color: #ec9c9c; + font-size: 0.82rem; +} +.accelerator-card-log-output { + margin: 0; + padding: 8px 10px; + background: rgba(0, 0, 0, 0.35); + color: rgba(226, 232, 240, 0.92); + border-radius: 6px; + font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; + font-size: 0.74rem; + white-space: pre-wrap; + overflow-wrap: anywhere; + max-height: 240px; + overflow: auto; +} + +/* Compact pill variant — for catalog variant cards (Phase 3 / 4). */ +.accelerator-card-pill { + display: inline-flex; + align-items: center; + gap: 6px; + padding: 3px 8px 3px 9px; + background: rgba(80, 140, 220, 0.12); + border: 1px solid rgba(80, 140, 220, 0.32); + border-radius: 999px; + font-size: 0.78rem; + flex-direction: row; +} +.accelerator-card-pill.accelerator-card-installed { + background: rgba(80, 180, 100, 0.10); + border-color: rgba(80, 180, 100, 0.32); +} +.accelerator-card-pill-label { + font-weight: 500; + color: var(--text); +} +.accelerator-card-action-pill { + padding: 1px 8px; + font-size: 0.72rem; + border-radius: 999px; +} + +/* Table-row variant — for the Diagnostics Boost Pack (Phase 6). */ +tr.accelerator-card-row { + border-bottom: 1px solid rgba(255, 255, 255, 0.04); +} +tr.accelerator-card-row.accelerator-card-installed { + background: rgba(80, 180, 100, 0.04); +} +tr.accelerator-card-row.accelerator-card-incompatible { + opacity: 0.55; +} +.accelerator-card-row-label { + padding: 8px 10px; + display: flex; + flex-direction: column; + gap: 2px; +} +.accelerator-card-row-label strong { + font-weight: 600; +} +.accelerator-card-row-applies { + color: var(--muted); + font-size: 0.78rem; +} +.accelerator-card-row-size, +.accelerator-card-row-platform { + padding: 8px 10px; + color: var(--muted); + font-size: 0.82rem; +} +.accelerator-card-row-status, +.accelerator-card-row-action { + padding: 8px 10px; + text-align: right; +} + /* -- Diagnostics panel --------------------------------------------- */ .diagnostics-body { display: flex; diff --git a/src/types/server.ts b/src/types/server.ts index d5009d4..3d1c1e5 100644 --- a/src/types/server.ts +++ b/src/types/server.ts @@ -122,5 +122,28 @@ export interface NativeBackendStatus { llamaServerPath?: string | null; llamaServerTurboPath?: string | null; converterAvailable: boolean; + // FU-047 + downstream — already on the wire, kept optional so older + // backends without these fields don't break the TS contract. + vllmAvailable?: boolean; + vllmVersion?: string | null; + mtplxAvailable?: boolean; + mtplxPythonPath?: string | null; + ggufMtpAvailable?: boolean; + // FU-056 Phase 1 — per-accelerator import probes. Optional so a + // backend running an older build than the frontend doesn't crash the + // capability-readers; consumers should treat missing as ``false``. + nunchakuAvailable?: boolean; + nunchakuVersion?: string | null; + sageattentionAvailable?: boolean; + sageattentionVersion?: string | null; + dflashMlxAvailable?: boolean; + dflashMlxVersion?: string | null; + dflashCudaAvailable?: boolean; + dflashCudaVersion?: string | null; + triattentionAvailable?: boolean; + triattentionVersion?: string | null; + kvpressAvailable?: boolean; + kvpressVersion?: string | null; + wsl2Available?: boolean; probing?: boolean; } From b00b40991fcfdd8164125912c1565ac49dd74270 Mon Sep 17 00:00:00 2001 From: Cryptopoly <31970407+cryptopoly@users.noreply.github.com> Date: Sun, 17 May 2026 11:42:34 +0100 Subject: [PATCH 04/15] feat: Diagnostics Boost Pack panel (FU-056 Phase 6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First end-to-end UX slice for FU-056. The Diagnostics tab gains a Boost Pack section listing all six CUDA-side accelerators (nunchaku, sageattention, dflash-mlx, dflash-cuda, triattention, kvpress) as a single scannable table. Status pill + Install / Retry button per row; click installs via the existing POST /api/setup/install-package endpoint, output captured into a collapsible details, then capabilities re-probe so the "Installed v1.2.1" pill flips without a parent refetch. Self-probes capabilities on mount via refreshCapabilities() so the panel works standalone — DiagnosticsPanel only passes backendOnline. Per-accelerator install state lives in a record keyed by pip name, so multiple installs can run concurrently if the user is impatient (the backend serialises pip writes at the OS-FS layer). Renders every catalog row with showIncompatible=true: this is the "see everything" surface, not a per-feature gate. Apple-Silicon and CUDA accelerators both list; the platform column tells the user which apply to their box, and disabled state + tooltip blocks an ill-fitting install. Phases 3-5 will filter per surface. Closes the first observable loop: Phase 1 probe → Phase 2 card (row variant) → install → re-probe → installed state. Same Component renders pill + card + row, so the per-feature surfaces in Phases 3-5 ride the same diff. No new tests — the pure logic (readInstalled, readVersion, actionLabelFor, platformLabel, isPlatformCompatible) is already pinned by Phase 2's 28 unit tests. The Boost Pack itself is wiring: fetch capabilities, dispatch install, re-fetch on success. Mirrors the existing CudaTorchLogPanel pattern. --- .../settings/AcceleratorsBoostPack.tsx | 197 ++++++++++++++++++ src/features/settings/DiagnosticsPanel.tsx | 2 + 2 files changed, 199 insertions(+) create mode 100644 src/features/settings/AcceleratorsBoostPack.tsx diff --git a/src/features/settings/AcceleratorsBoostPack.tsx b/src/features/settings/AcceleratorsBoostPack.tsx new file mode 100644 index 0000000..daaaea5 --- /dev/null +++ b/src/features/settings/AcceleratorsBoostPack.tsx @@ -0,0 +1,197 @@ +import { useCallback, useEffect, useState } from "react"; + +import { AcceleratorCard } from "../../components/AcceleratorCard"; +import { ACCELERATOR_CATALOG } from "../../components/acceleratorCatalog"; +import { installPipPackage, refreshCapabilities } from "../../api"; +import type { NativeBackendStatus } from "../../types/server"; + +/** + * The Diagnostics tab's "Boost Pack" section (FU-056 Phase 6). + * + * Single panel listing every CUDA-side accelerator the catalog + * registers, with current install state + one-click install. The + * "everything in one place" surface for users who want to see the + * full accelerator landscape; per-feature surfaces (Phases 3-5) + * inherit the same ``AcceleratorCard`` component but show only the + * accelerators relevant to that tab. + * + * State ownership + * --------------- + * This panel self-probes capabilities on mount (``refreshCapabilities`` + * hits ``/api/setup/refresh-capabilities``) and re-probes after each + * successful install so the Installed pills flip without a parent + * refetch. Per-accelerator install state lives in ``installStates`` + * keyed by ``pipPackage`` — the card itself stays stateless beyond + * its "log expanded" toggle. + * + * The panel intentionally renders **every** entry in + * ``ACCELERATOR_CATALOG`` regardless of platform (``showIncompatible`` + * is true). The user-experience choice here: this is the diagnostics + * surface, the user wants visibility into what exists across the + * ecosystem, not just what their current box can run. Per-feature + * surfaces will gate by platform so wrong-platform affordances don't + * appear next to a FLUX model card. + */ + +export interface AcceleratorsBoostPackProps { + /** Set false until the backend health check has cleared. + * Capabilities fetch needs the backend up. */ + backendOnline: boolean; +} + +interface InstallState { + installing: boolean; + error: string | null; + output: string | null; +} + +const EMPTY_INSTALL_STATE: InstallState = { + installing: false, + error: null, + output: null, +}; + +export function AcceleratorsBoostPack({ backendOnline }: AcceleratorsBoostPackProps) { + const [capabilities, setCapabilities] = useState(null); + const [capError, setCapError] = useState(null); + const [installStates, setInstallStates] = useState>({}); + + const probe = useCallback(async () => { + if (!backendOnline) return; + try { + const next = await refreshCapabilities(); + // ``refreshCapabilities`` returns a generic ``Record`` because + // it serves several consumers; the FU-056 Phase 1 fields are + // optional on ``NativeBackendStatus`` so this cast is safe even + // when the backend is older than the frontend. + setCapabilities(next as unknown as NativeBackendStatus); + setCapError(null); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + setCapError(message); + } + }, [backendOnline]); + + useEffect(() => { + if (backendOnline) { + void probe(); + } + }, [backendOnline, probe]); + + const handleInstall = useCallback( + async (pipPackage: string) => { + // Guard against double-clicks on the same accelerator. Other + // accelerators can still install concurrently — the backend's + // ``/api/setup/install-package`` endpoint serialises pip writes + // for us at the OS-FS layer. + const existing = installStates[pipPackage]; + if (existing?.installing) return; + + setInstallStates((prev) => ({ + ...prev, + [pipPackage]: { installing: true, error: null, output: null }, + })); + + try { + const result = await installPipPackage(pipPackage); + if (result.ok) { + setInstallStates((prev) => ({ + ...prev, + [pipPackage]: { + installing: false, + error: null, + output: result.output ?? null, + }, + })); + await probe(); + } else { + setInstallStates((prev) => ({ + ...prev, + [pipPackage]: { + installing: false, + error: "Install command exited non-zero.", + output: result.output ?? null, + }, + })); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + setInstallStates((prev) => ({ + ...prev, + [pipPackage]: { + installing: false, + error: message, + output: null, + }, + })); + } + }, + [installStates, probe], + ); + + // Ordered: Apple Silicon, CUDA, cross-platform — but really, all + // surfaced together. The user can scan the platformGate column to + // decide what their box supports. Keep the catalog order verbatim + // so the table is stable across runs. + const rows = ACCELERATOR_CATALOG; + + return ( +
+
+

+ Boost Pack +

+

+ Optional accelerators for image, video, and chat inference. Each is a + single pip install away — click Install on the rows your hardware + supports. +

+
+ + {capError ? ( +

+ Could not read accelerator capabilities: {capError} +

+ ) : null} + + {!backendOnline ? ( +

+ Backend offline — start the sidecar to read accelerator state. +

+ ) : null} + + + + + + + + + + + + + {rows.map((meta) => { + const state = installStates[meta.pipPackage] ?? EMPTY_INSTALL_STATE; + return ( + + ); + })} + +
AcceleratorSizePlatformStatusAction
+
+ ); +} diff --git a/src/features/settings/DiagnosticsPanel.tsx b/src/features/settings/DiagnosticsPanel.tsx index 88d09f3..41da875 100644 --- a/src/features/settings/DiagnosticsPanel.tsx +++ b/src/features/settings/DiagnosticsPanel.tsx @@ -12,6 +12,7 @@ import { type InstallResult, type StorageTopResponse, } from "../../api"; +import { AcceleratorsBoostPack } from "./AcceleratorsBoostPack"; // In-app troubleshooting panel. Surfaces OS, hardware, runtime paths, // GPU state, env vars, and the backend log tail without asking users to @@ -413,6 +414,7 @@ export function DiagnosticsPanel({ backendOnline, onRestartServer, busyAction }: ) : null} + ); From 233b4d7e125f8ff4b46e243eb5adfa26680e1cb4 Mon Sep 17 00:00:00 2001 From: Cryptopoly <31970407+cryptopoly@users.noreply.github.com> Date: Sun, 17 May 2026 11:57:08 +0100 Subject: [PATCH 05/15] feat: Image Studio accelerator surfaces (FU-056 Phase 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires accelerator install affordances into the three Image surfaces users actually look at when picking + running a model: 1. Image Models tab — every installed FLUX / SD3.5 / Qwen-Image / SANA / PixArt row gets read-only pills next to the style tags: "🚀 SVDQuant 4-bit" + "🚀 Fast attention DiT" when the accelerator is missing, "✓ ..." when present. UNet pipelines (SD1.5 / SDXL) show no pills — neither nunchaku nor sageattention applies. 2. Image Discover tab — same pills on catalog variant cards in the same position. Lets users see acceleration potential before committing to a download. 3. Image Studio runtime banner — new "Performance boosters" section between the torch-upgrade pill and the model-load summary. Card variants of the same accelerators with full Install / Retry buttons. Self-contained install state: clicks POST /api/setup/install-package, capture the response capabilities, and overlay them onto the parent-provided snapshot so the card flips to "✓ Installed v..." without waiting for the next workspace refetch. The pills on the Models / Discover tabs are deliberately read-only — the install action lives in Studio's runtime banner so install state stays concentrated. A new optional onInstall prop on AcceleratorCard drives this: when omitted, the card renders as passive info. New helper getApplicableAccelerators(repo) maps a model repo to the accelerator IDs that apply. Pattern-matches on the family slug (FLUX.1, sd3.5, qwen-image, sana, pixart-sigma) so we don't have to edit catalog/image_models.py to land this — the catalog-side recommendedAccelerators metadata pattern is reserved for Phase 7 when the i18n + per-variant overrides land together. 7 new unit tests pin the matrix (FLUX, SD3.5, Qwen-Image, SANA, PixArt for nunchaku+sageattention; Wan / HunyuanVideo / LTX / CogVideoX / Mochi for sageattention-only; Wan2.1-T2V-1.3B for the triattention LongLive bonus; SDXL / SD1.5 return empty). NativeBackendStatus threads from App.tsx → ImageModelsTab, ImageDiscoverTab, ImageStudioTab → ImageStudioRuntimeBanner → ImageStudioBoosters. The prop is optional everywhere so older backends without FU-056 Phase 1 fields collapse pills to their "available" state rather than crashing the tab. Deferred to a follow-up commit: the post-generation suggestion toast (fires when a non-Nunchaku FLUX gen takes >12s on CUDA). The discovery + install surfaces in this commit already give users a clean path to install accelerators contextually; the toast adds a nudge but the install affordance is reachable without it. --- src/App.tsx | 3 + src/components/AcceleratorCard.tsx | 23 ++- .../__tests__/acceleratorCatalog.test.ts | 83 ++++++++++ src/components/acceleratorCatalog.ts | 78 +++++++++ src/features/images/ImageDiscoverTab.tsx | 27 ++++ src/features/images/ImageModelsTab.tsx | 28 ++++ src/features/images/ImageStudioBoosters.tsx | 152 ++++++++++++++++++ .../images/ImageStudioRuntimeBanner.tsx | 18 +++ src/features/images/ImageStudioTab.tsx | 7 + 9 files changed, 414 insertions(+), 5 deletions(-) create mode 100644 src/features/images/ImageStudioBoosters.tsx diff --git a/src/App.tsx b/src/App.tsx index c215221..9bb8e5e 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -1228,6 +1228,7 @@ export default function App() { activeImageDownloads={imgState.activeImageDownloads} selectedImageVariant={imgState.selectedImageVariant} fileRevealLabel={fileRevealLabel} + nativeBackends={nativeBackends} onActiveTabChange={setActiveTab} onOpenImageStudio={imgState.openImageStudio} onImageDownload={(repo) => void imgState.handleImageDownload(repo)} @@ -1244,6 +1245,7 @@ export default function App() { imageCatalog={imgState.imageCatalog} activeImageDownloads={imgState.activeImageDownloads} fileRevealLabel={fileRevealLabel} + nativeBackends={nativeBackends} onActiveTabChange={setActiveTab} onOpenImageStudio={imgState.openImageStudio} onImageDownload={(repo) => void imgState.handleImageDownload(repo)} @@ -1272,6 +1274,7 @@ export default function App() { imageBusy={imgState.imageBusy} imageBusyLabel={imgState.imageBusyLabel} backendOnline={backendOnline} + nativeBackends={nativeBackends} activeImageDownloads={imgState.activeImageDownloads} imagePrompt={imgState.imagePrompt} onImagePromptChange={imgState.setImagePrompt} diff --git a/src/components/AcceleratorCard.tsx b/src/components/AcceleratorCard.tsx index f76e875..ee05839 100644 --- a/src/components/AcceleratorCard.tsx +++ b/src/components/AcceleratorCard.tsx @@ -52,8 +52,14 @@ export interface AcceleratorCardProps { * compact but failures expose the diagnostic. */ installOutput?: string | null; /** Fired when the user clicks Install / Retry. Parent should call - * ``installPipPackage(meta.pipPackage)`` then ``refreshWorkspace()``. */ - onInstall: (pipPackage: string) => void; + * ``installPipPackage(meta.pipPackage)`` then ``refreshWorkspace()``. + * + * Optional: when omitted, the card renders **read-only** — status + * pill + meta only, no action button. Used by discovery surfaces + * (the Image Models / Discover tabs) where the install action lives + * in a sibling surface (the Image Studio runtime banner) so the + * install state stays in one place rather than scattered. */ + onInstall?: (pipPackage: string) => void; /** Optional click handler for the platform-mismatch tooltip — lets * the parent surface a "this won't run on your hardware" toast. */ onPlatformMismatch?: (meta: AcceleratorMeta) => void; @@ -142,7 +148,14 @@ export function AcceleratorCard(props: AcceleratorCardProps) { return null; } + // Read-only mode: when no ``onInstall`` is wired we render the card + // as a passive informational element — no Install button, no Retry, + // no platform-mismatch toast. The discovery surfaces use this so + // they don't accidentally become install dispatchers. + const readOnly = onInstall === undefined; + const handleInstall = () => { + if (readOnly) return; if (!compatible) { onPlatformMismatch?.(meta); return; @@ -196,7 +209,7 @@ export function AcceleratorCard(props: AcceleratorCardProps) { {installed ? "✓ " : "🚀 "} {meta.shortLabel} - {!installed && ( + {!installed && !readOnly && ( - ) : null} + {!dflashInstalled && !isGgufBackend && canInstallDflashForModel && onInstallPackage ? (() => { + // FU-056 Phase 5: pick the right pip package by backend. + // MLX backend → ``dflash-mlx`` (Apple Silicon git+url); + // vLLM backend → ``dflash`` (PyPI CUDA wheel). Previously + // hard-coded to ``dflash-mlx`` which silently installed + // the wrong package on Windows / Linux CUDA boxes. + const pkg = dflashPackageFor(selectedBackend); + const inFlight = installingPackage === pkg; + return ( + + ); + })() : null}