diff --git a/README.md b/README.md index b821942..81ba882 100644 --- a/README.md +++ b/README.md @@ -40,10 +40,9 @@ Each skill is a self-contained module with its own model, parameters, and [commu | Category | Skill | What It Does | Status | |----------|-------|--------------|:------:| | **Detection** | [`yolo-detection-2026`](skills/detection/yolo-detection-2026/) | Real-time 80+ class detection β€” auto-accelerated via TensorRT / CoreML / OpenVINO / ONNX | βœ…| -| | [`dinov3-grounding`](skills/detection/dinov3-grounding/) | Open-vocabulary detection β€” describe what to find | πŸ“ | -| | [`person-recognition`](skills/detection/person-recognition/) | Re-identify individuals across cameras | πŸ“ | -| **Analysis** | [`home-security-benchmark`](skills/analysis/home-security-benchmark/) | [131-test evaluation suite](#-homesec-bench--how-secure-is-your-local-ai) for LLM & VLM security performance | βœ… | -| | [`vlm-scene-analysis`](skills/analysis/vlm-scene-analysis/) | Describe what happened in recorded clips | πŸ“ | +| **Analysis** | [`home-security-benchmark`](skills/analysis/home-security-benchmark/) | [143-test evaluation suite](#-homesec-bench--how-secure-is-your-local-ai) for LLM & VLM security performance | βœ… | +| | [`smarthome-bench`](skills/analysis/smarthome-bench/) | Video anomaly detection benchmark β€” 105 clips across 7 smart home categories | βœ… | +| | [`homesafe-bench`](skills/analysis/homesafe-bench/) | Indoor safety hazard detection β€” 40 tests across 5 categories | βœ… | | | [`sam2-segmentation`](skills/analysis/sam2-segmentation/) | Click-to-segment with pixel-perfect masks | πŸ“ | | **Transformation** | [`depth-estimation`](skills/transformation/depth-estimation/) | Monocular depth maps with Depth Anything v2 | πŸ“ | | **Annotation** | [`dataset-annotation`](skills/annotation/dataset-annotation/) | AI-assisted labeling β†’ COCO export | πŸ“ | @@ -140,7 +139,7 @@ Camera β†’ Frame Governor β†’ detect.py (JSONL) β†’ Aegis IPC β†’ Live Overlay ## πŸ“Š HomeSec-Bench β€” How Secure Is Your Local AI? -**HomeSec-Bench** is a 131-test security benchmark that measures how well your local AI performs as a security guard. It tests what matters: Can it detect a person in fog? Classify a break-in vs. a delivery? Resist prompt injection? Route alerts correctly at 3 AM? +**HomeSec-Bench** is a 143-test security benchmark that measures how well your local AI performs as a security guard. It tests what matters: Can it detect a person in fog? Classify a break-in vs. a delivery? Resist prompt injection? Route alerts correctly at 3 AM? Run it on your own hardware to know exactly where your setup stands. diff --git a/skills.json b/skills.json index 50f50d6..11ab31a 100644 --- a/skills.json +++ b/skills.json @@ -96,6 +96,75 @@ "medium", "large" ] + }, + { + "id": "smarthome-bench", + "name": "SmartHome Video Anomaly Benchmark", + "description": "VLM evaluation suite for video anomaly detection in smart home camera footage β€” 7 categories, 105 curated clips from SmartHome-Bench.", + "version": "1.0.0", + "category": "analysis", + "path": "skills/analysis/smarthome-bench", + "tags": [ + "benchmark", + "vlm", + "video", + "anomaly-detection", + "smart-home" + ], + "platforms": [ + "linux-x64", + "linux-arm64", + "darwin-arm64", + "darwin-x64", + "win-x64" + ], + "requirements": { + "node": ">=18", + "ram_gb": 2, + "system_deps": [ + "yt-dlp", + "ffmpeg" + ] + }, + "capabilities": [ + "benchmark", + "report_generation" + ], + "ui_unlocks": [ + "benchmark_report" + ] + }, + { + "id": "homesafe-bench", + "name": "HomeSafe Indoor Safety Benchmark", + "description": "VLM evaluation suite for indoor home safety hazard detection β€” 40 tests across 5 categories: fire/smoke, electrical, trip/fall, child safety, falling objects.", + "version": "1.0.0", + "category": "analysis", + "path": "skills/analysis/homesafe-bench", + "tags": [ + "benchmark", + "vlm", + "safety", + "hazard", + "indoor" + ], + "platforms": [ + "linux-x64", + "linux-arm64", + "darwin-arm64", + "darwin-x64", + "win-x64" + ], + "requirements": { + "node": ">=18", + "ram_gb": 2 + }, + "capabilities": [ + "benchmark" + ], + "ui_unlocks": [ + "benchmark_report" + ] } ] } \ No newline at end of file diff --git a/skills/analysis/home-security-benchmark/SKILL.md b/skills/analysis/home-security-benchmark/SKILL.md index f4fb0b6..03ccafb 100644 --- a/skills/analysis/home-security-benchmark/SKILL.md +++ b/skills/analysis/home-security-benchmark/SKILL.md @@ -1,16 +1,21 @@ --- name: Home Security AI Benchmark description: LLM & VLM evaluation suite for home security AI applications -version: 2.0.0 +version: 2.1.0 category: analysis runtime: node entry: scripts/run-benchmark.cjs install: npm + +requirements: + node: ">=18" + npm_install: true + platforms: ["linux", "macos", "windows"] --- # Home Security AI Benchmark -Comprehensive benchmark suite evaluating LLM and VLM models on **131 tests** across **16 suites** β€” context preprocessing, tool use, security classification, prompt injection resistance, alert routing, knowledge injection, VLM-to-alert triage, and scene analysis. +Comprehensive benchmark suite evaluating LLM and VLM models on **143 tests** across **16 suites** β€” context preprocessing, tool use, security classification, prompt injection resistance, alert routing, knowledge injection, VLM-to-alert triage, and scene analysis. ## Setup @@ -71,7 +76,7 @@ This skill includes a [`config.yaml`](config.yaml) that defines user-configurabl | Parameter | Type | Default | Description | |-----------|------|---------|-------------| -| `mode` | select | `llm` | Which suites to run: `llm` (96 tests), `vlm` (35 tests), or `full` (131 tests) | +| `mode` | select | `llm` | Which suites to run: `llm` (96 tests), `vlm` (47 tests), or `full` (143 tests) | | `noOpen` | boolean | `false` | Skip auto-opening the HTML report in browser | Platform parameters like `AEGIS_GATEWAY_URL` and `AEGIS_VLM_URL` are auto-injected by Aegis β€” they are **not** in `config.yaml`. See [Aegis Skill Platform Parameters](../../../docs/skill-params.md) for the full platform contract. @@ -107,7 +112,7 @@ AEGIS_SKILL_PARAMS={} Human-readable output goes to **stderr** (visible in Aegis console tab). -## Test Suites (131 Tests) +## Test Suites (143 Tests) | Suite | Tests | Domain | |-------|-------|--------| @@ -126,7 +131,7 @@ Human-readable output goes to **stderr** (visible in Aegis console tab). | Alert Routing & Subscription | 5 | Channel targeting, schedule CRUD | | Knowledge Injection to Dialog | 5 | KI-personalized responses | | VLM-to-Alert Triage | 5 | Urgency classification from VLM | -| VLM Scene Analysis | 35 | Frame entity detection & description | +| VLM Scene Analysis | 47 | Frame entity detection & description (outdoor + indoor safety) | ## Results @@ -137,4 +142,4 @@ Results are saved to `~/.aegis-ai/benchmarks/` as JSON. An HTML report with cros - Node.js β‰₯ 18 - `npm install` (for `openai` SDK dependency) - Running LLM server (llama-server, OpenAI API, or any OpenAI-compatible endpoint) -- Optional: Running VLM server for scene analysis tests (35 tests) +- Optional: Running VLM server for scene analysis tests (47 tests) diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_blocked_exit.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_blocked_exit.png new file mode 100644 index 0000000..fdca045 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_blocked_exit.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_child_cabinet.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_child_cabinet.png new file mode 100644 index 0000000..3a4e1e6 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_child_cabinet.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_elec_cord.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_elec_cord.png new file mode 100644 index 0000000..4c7a552 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_elec_cord.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_elec_powerstrip.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_elec_powerstrip.png new file mode 100644 index 0000000..666c48a Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_elec_powerstrip.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fall_person.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fall_person.png new file mode 100644 index 0000000..c074920 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fall_person.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fall_shelf.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fall_shelf.png new file mode 100644 index 0000000..582ad2c Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fall_shelf.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_candle.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_candle.png new file mode 100644 index 0000000..e200ca0 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_candle.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_heater.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_heater.png new file mode 100644 index 0000000..5f2845a Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_heater.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_iron.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_iron.png new file mode 100644 index 0000000..f3f7c78 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_iron.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_stove.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_stove.png new file mode 100644 index 0000000..de54fbe Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_stove.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_trip_stairs.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_trip_stairs.png new file mode 100644 index 0000000..736c3d1 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_trip_stairs.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_trip_wetfloor.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_trip_wetfloor.png new file mode 100644 index 0000000..7ac3309 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_trip_wetfloor.png differ diff --git a/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs b/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs index b45306c..96dd71c 100644 --- a/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs +++ b/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs @@ -1704,6 +1704,68 @@ suite('πŸ“Έ VLM Scene Analysis', async () => { prompt: 'Describe this outdoor area. Are there any people present? What objects are visible?', expect: ['patio', 'furniture', 'table', 'chair', 'grill', 'empty', 'no one', 'no people'] }, + + // Category E: Indoor Safety Hazards (12) + { + name: 'Stove smoke β†’ kitchen fire hazard', file: 'indoor_fire_stove.png', + prompt: 'Describe this indoor security camera scene. Are there any fire or smoke hazards visible?', + expect: ['smoke', 'fire', 'stove', 'kitchen', 'cook', 'pot', 'steam'] + }, + { + name: 'Candle near curtain β†’ fire risk', file: 'indoor_fire_candle.png', + prompt: 'Describe this indoor scene. Is there any fire risk from open flames near flammable materials?', + expect: ['candle', 'fire', 'curtain', 'flame', 'drape', 'fabric', 'risk'] + }, + { + name: 'Overloaded power strip β†’ electrical hazard', file: 'indoor_elec_powerstrip.png', + prompt: 'Describe what you see. Are there any electrical safety hazards?', + expect: ['overload', 'power', 'electrical', 'plug', 'strip', 'cable', 'cord', 'outlet'] + }, + { + name: 'Frayed cord β†’ electrical fire risk', file: 'indoor_elec_cord.png', + prompt: 'Describe the condition of the electrical cord. Is there any damage that could be dangerous?', + expect: ['fray', 'cord', 'damage', 'wire', 'worn', 'exposed', 'cable'] + }, + { + name: 'Toys on stairs β†’ trip hazard', file: 'indoor_trip_stairs.png', + prompt: 'Describe this scene. Are there any trip or fall hazards on the staircase?', + expect: ['toy', 'stair', 'trip', 'hazard', 'ball', 'fall', 'step'] + }, + { + name: 'Wet floor β†’ slip hazard', file: 'indoor_trip_wetfloor.png', + prompt: 'Describe the floor condition in this scene. Is there any slip hazard?', + expect: ['wet', 'slip', 'water', 'floor', 'puddle', 'spill'] + }, + { + name: 'Person on floor β†’ medical emergency', file: 'indoor_fall_person.png', + prompt: 'Describe what you see. Is anyone in distress or in need of medical help?', + expect: ['person', 'fall', 'lying', 'floor', 'down', 'help', 'cane', 'elder'] + }, + { + name: 'Open cabinet chemicals β†’ child safety', file: 'indoor_child_cabinet.png', + prompt: 'Describe this kitchen scene. Are there any child safety concerns with accessible chemicals?', + expect: ['cabinet', 'chemical', 'clean', 'open', 'bottle', 'danger', 'safety'] + }, + { + name: 'Cluttered exit β†’ blocked fire exit', file: 'indoor_blocked_exit.png', + prompt: 'Describe this scene. Is the exit or doorway clear or obstructed?', + expect: ['block', 'exit', 'clutter', 'door', 'box', 'obstruct', 'furniture'] + }, + { + name: 'Space heater near drape β†’ fire ignition risk', file: 'indoor_fire_heater.png', + prompt: 'Describe this bedroom scene. Is the space heater positioned safely?', + expect: ['heater', 'drape', 'fire', 'curtain', 'close', 'fabric', 'risk'] + }, + { + name: 'Items on high shelf β†’ falling object risk', file: 'indoor_fall_shelf.png', + prompt: 'Describe the shelf and items on it. Are there any falling object hazards?', + expect: ['shelf', 'fall', 'heavy', 'unstable', 'box', 'stack', 'top'] + }, + { + name: 'Iron left face-down β†’ burn/fire risk', file: 'indoor_fire_iron.png', + prompt: 'Describe this laundry scene. Is the iron being used safely?', + expect: ['iron', 'burn', 'fire', 'left', 'hot', 'steam', 'unattended', 'board'] + }, ]; // ─── Run all VLM tests ────────────────────────────────────────────── diff --git a/skills/analysis/homesafe-bench/SKILL.md b/skills/analysis/homesafe-bench/SKILL.md new file mode 100644 index 0000000..8dd6bcf --- /dev/null +++ b/skills/analysis/homesafe-bench/SKILL.md @@ -0,0 +1,123 @@ +--- +name: HomeSafe-Bench +description: VLM indoor safety hazard detection benchmark inspired by HomeSafeBench (arXiv 2509.23690) +version: 1.0.0 +category: analysis +runtime: node +entry: scripts/run-benchmark.cjs +install: npm + +requirements: + node: ">=18" + npm_install: true + platforms: ["linux", "macos", "windows"] +--- + +# HomeSafe-Bench β€” Indoor Safety Hazard Detection + +VLM benchmark evaluating indoor home safety hazard detection from static camera frames. Inspired by [HomeSafeBench (arXiv 2509.23690)](https://arxiv.org/abs/2509.23690), adapted for **fixed indoor security cameras** with AI-generated fixture images. + +**40 tests** across **5 hazard categories**: + +| Category | Tests | Example Hazards | +|----------|-------|-----------------| +| πŸ”₯ Fire / Smoke | 8 | Stove smoke, candle near curtain, iron left face-down | +| ⚑ Electrical | 8 | Overloaded power strip, exposed wire, frayed cord | +| 🚢 Trip / Fall | 8 | Toys on staircase, wet floor, loose rug edge | +| πŸ‘Ά Child Safety | 8 | Open cabinet with chemicals, uncovered outlet, blind cord | +| πŸ“¦ Falling Objects | 8 | Heavy items on high shelf, leaning bookshelf, loose ceiling fan | + +## Setup + +**Requires `npm install`.** This skill has a `package.json` with dependencies (e.g. `openai` SDK). The deployment agent should check for `package.json` and run `npm install` in the skill directory before launching. + +Entry script: `scripts/run-benchmark.cjs` + +### Verification + +```bash +node scripts/run-benchmark.cjs --help +``` + +## Quick Start + +### As an Aegis Skill (automatic) + +When spawned by Aegis, all configuration is injected via environment variables. The benchmark discovers your VLM server automatically, generates an HTML report, and opens it when complete. + +### Standalone + +```bash +# Run all 40 tests +node scripts/run-benchmark.cjs --vlm http://localhost:5405 + +# Quick mode (2 tests per category = 10 total) +node scripts/run-benchmark.cjs --vlm http://localhost:5405 --mode quick + +# Skip report auto-open +node scripts/run-benchmark.cjs --vlm http://localhost:5405 --no-open +``` + +## Configuration + +### Environment Variables (set by Aegis) + +| Variable | Default | Description | +|----------|---------|-------------| +| `AEGIS_VLM_URL` | *(required)* | VLM server base URL | +| `AEGIS_VLM_MODEL` | β€” | Loaded VLM model ID | +| `AEGIS_SKILL_ID` | β€” | Skill identifier (enables skill mode) | +| `AEGIS_SKILL_PARAMS` | `{}` | JSON params from skill config | + +> **Note**: URLs should be base URLs (e.g. `http://localhost:5405`). The benchmark appends `/v1/chat/completions` automatically. + +### User Configuration (config.yaml) + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `mode` | select | `full` | Which mode: `full` (40 tests) or `quick` (10 tests β€” 2 per category) | +| `noOpen` | boolean | `false` | Skip auto-opening the HTML report in browser | + +### CLI Arguments (standalone fallback) + +| Argument | Default | Description | +|----------|---------|-------------| +| `--vlm URL` | *(required)* | VLM server base URL | +| `--mode MODE` | `full` | Test mode: `full` or `quick` | +| `--out DIR` | `~/.aegis-ai/homesafe-benchmarks` | Results directory | +| `--no-open` | β€” | Don't auto-open report in browser | + +## Protocol + +### Aegis β†’ Skill (env vars) +``` +AEGIS_VLM_URL=http://localhost:5405 +AEGIS_SKILL_ID=homesafe-bench +AEGIS_SKILL_PARAMS={} +``` + +### Skill β†’ Aegis (stdout, JSON lines) +```jsonl +{"event": "ready", "vlm": "SmolVLM-500M", "system": "Apple M3"} +{"event": "suite_start", "suite": "πŸ”₯ Fire / Smoke"} +{"event": "test_result", "suite": "...", "test": "...", "status": "pass", "timeMs": 4500} +{"event": "suite_end", "suite": "...", "passed": 7, "failed": 1} +{"event": "complete", "passed": 36, "total": 40, "timeMs": 180000, "reportPath": "/path/to/report.html"} +``` + +Human-readable output goes to **stderr** (visible in Aegis console tab). + +## Citation + +This benchmark is inspired by: + +> **HomeSafeBench: Towards Measuring the Proficiency of Home Safety for Embodied AI Agents** +> arXiv:2509.23690 +> +> Unlike the academic benchmark (embodied agent + navigation in simulated 3D environments), our version uses **static indoor camera frames** β€” matching real-world indoor security camera deployment (fixed wall/ceiling mount). All fixture images are **AI-generated** consistent with DeepCamera's privacy-first approach. + +## Requirements + +- Node.js β‰₯ 18 +- `npm install` (for `openai` SDK dependency) +- Running VLM server (llama-server with vision model, or OpenAI-compatible VLM endpoint) diff --git a/skills/analysis/homesafe-bench/config.yaml b/skills/analysis/homesafe-bench/config.yaml new file mode 100644 index 0000000..cc01d76 --- /dev/null +++ b/skills/analysis/homesafe-bench/config.yaml @@ -0,0 +1,13 @@ +params: + - key: mode + label: Test Mode + type: select + options: [full, quick] + default: full + description: "Which test mode: full (40 tests) or quick (10 tests β€” 2 per category)" + + - key: noOpen + label: Don't auto-open report + type: boolean + default: false + description: Skip opening the HTML report in browser after completion diff --git a/skills/analysis/homesafe-bench/deploy.sh b/skills/analysis/homesafe-bench/deploy.sh new file mode 100755 index 0000000..05d27e3 --- /dev/null +++ b/skills/analysis/homesafe-bench/deploy.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# HomeSafe-Bench deployment script +# Runs npm install to fetch openai SDK dependency + +set -e +cd "$(dirname "$0")" +npm install +echo "βœ… HomeSafe-Bench dependencies installed" diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_01_chemicals_cabinet.png b/skills/analysis/homesafe-bench/fixtures/frames/child_01_chemicals_cabinet.png new file mode 100644 index 0000000..2e6d077 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_01_chemicals_cabinet.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_02_uncovered_outlet.png b/skills/analysis/homesafe-bench/fixtures/frames/child_02_uncovered_outlet.png new file mode 100644 index 0000000..787a0ba Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_02_uncovered_outlet.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_03_blind_cord.png b/skills/analysis/homesafe-bench/fixtures/frames/child_03_blind_cord.png new file mode 100644 index 0000000..13c0335 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_03_blind_cord.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_04_sharp_corner.png b/skills/analysis/homesafe-bench/fixtures/frames/child_04_sharp_corner.png new file mode 100644 index 0000000..d919a09 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_04_sharp_corner.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_05_choking_hazard.png b/skills/analysis/homesafe-bench/fixtures/frames/child_05_choking_hazard.png new file mode 100644 index 0000000..c2b346e Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_05_choking_hazard.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_06_unsecured_bookshelf.png b/skills/analysis/homesafe-bench/fixtures/frames/child_06_unsecured_bookshelf.png new file mode 100644 index 0000000..516b035 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_06_unsecured_bookshelf.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_07_stove_knobs.png b/skills/analysis/homesafe-bench/fixtures/frames/child_07_stove_knobs.png new file mode 100644 index 0000000..c0fb60f Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_07_stove_knobs.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_08_open_window.png b/skills/analysis/homesafe-bench/fixtures/frames/child_08_open_window.png new file mode 100644 index 0000000..cc14e6b Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_08_open_window.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_01_overloaded_strip.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_01_overloaded_strip.png new file mode 100644 index 0000000..1269551 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_01_overloaded_strip.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_02_exposed_wire_sink.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_02_exposed_wire_sink.png new file mode 100644 index 0000000..d1139ad Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_02_exposed_wire_sink.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_03_frayed_cord.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_03_frayed_cord.png new file mode 100644 index 0000000..e433bec Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_03_frayed_cord.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_04_cord_under_rug.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_04_cord_under_rug.png new file mode 100644 index 0000000..cdeee7d Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_04_cord_under_rug.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_05_wet_hands_outlet.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_05_wet_hands_outlet.png new file mode 100644 index 0000000..8fc0c0e Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_05_wet_hands_outlet.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_06_ungrounded_adapter.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_06_ungrounded_adapter.png new file mode 100644 index 0000000..34e27bd Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_06_ungrounded_adapter.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_07_damaged_outlet.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_07_damaged_outlet.png new file mode 100644 index 0000000..4a555dc Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_07_damaged_outlet.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_08_tangled_cords.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_08_tangled_cords.png new file mode 100644 index 0000000..2c45ac3 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_08_tangled_cords.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_01_heavy_high_shelf.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_01_heavy_high_shelf.png new file mode 100644 index 0000000..7291d2c Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_01_heavy_high_shelf.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_02_stacked_boxes.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_02_stacked_boxes.png new file mode 100644 index 0000000..579c59a Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_02_stacked_boxes.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_03_leaning_bookshelf.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_03_leaning_bookshelf.png new file mode 100644 index 0000000..9c1d8d3 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_03_leaning_bookshelf.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_04_fridge_top.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_04_fridge_top.png new file mode 100644 index 0000000..5e65828 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_04_fridge_top.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_05_overhead_tools.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_05_overhead_tools.png new file mode 100644 index 0000000..b223a1d Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_05_overhead_tools.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_06_unsecured_tv.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_06_unsecured_tv.png new file mode 100644 index 0000000..f1f3fef Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_06_unsecured_tv.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_07_overloaded_coatrack.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_07_overloaded_coatrack.png new file mode 100644 index 0000000..ca1bb90 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_07_overloaded_coatrack.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_08_loose_fan_blade.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_08_loose_fan_blade.png new file mode 100644 index 0000000..1dd651a Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_08_loose_fan_blade.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_01_stove_smoke.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_01_stove_smoke.png new file mode 100644 index 0000000..d354fd4 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_01_stove_smoke.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_02_candle_curtain.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_02_candle_curtain.png new file mode 100644 index 0000000..9d5d958 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_02_candle_curtain.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_03_fireplace_ember.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_03_fireplace_ember.png new file mode 100644 index 0000000..f6492c4 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_03_fireplace_ember.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_04_iron_facedown.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_04_iron_facedown.png new file mode 100644 index 0000000..855ebf0 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_04_iron_facedown.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_05_heater_drapes.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_05_heater_drapes.png new file mode 100644 index 0000000..0c9a358 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_05_heater_drapes.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_06_missing_detector.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_06_missing_detector.png new file mode 100644 index 0000000..a97b126 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_06_missing_detector.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_07_grease_fire.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_07_grease_fire.png new file mode 100644 index 0000000..ff85750 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_07_grease_fire.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_08_cigarette_couch.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_08_cigarette_couch.png new file mode 100644 index 0000000..00d2cda Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_08_cigarette_couch.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_01_toys_stairs.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_01_toys_stairs.png new file mode 100644 index 0000000..bcb30c7 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_01_toys_stairs.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_02_wet_floor.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_02_wet_floor.png new file mode 100644 index 0000000..4bdd380 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_02_wet_floor.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_03_loose_rug.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_03_loose_rug.png new file mode 100644 index 0000000..f11e40c Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_03_loose_rug.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_04_cluttered_hallway.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_04_cluttered_hallway.png new file mode 100644 index 0000000..bd27104 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_04_cluttered_hallway.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_05_shoes_doorway.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_05_shoes_doorway.png new file mode 100644 index 0000000..efbafbc Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_05_shoes_doorway.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_06_dark_stairwell.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_06_dark_stairwell.png new file mode 100644 index 0000000..9649e8d Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_06_dark_stairwell.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_07_cables_walkway.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_07_cables_walkway.png new file mode 100644 index 0000000..4085591 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_07_cables_walkway.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_08_stool_stairs.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_08_stool_stairs.png new file mode 100644 index 0000000..d3cb173 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_08_stool_stairs.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/scenarios.json b/skills/analysis/homesafe-bench/fixtures/scenarios.json new file mode 100644 index 0000000..b685f25 --- /dev/null +++ b/skills/analysis/homesafe-bench/fixtures/scenarios.json @@ -0,0 +1,730 @@ +{ + "version": "1.0.0", + "description": "HomeSafe-Bench test scenarios β€” 40 indoor safety hazard tests across 5 categories", + "citation": "Inspired by HomeSafeBench (arXiv 2509.23690)", + "categories": [ + { + "id": "fire", + "name": "Fire / Smoke", + "emoji": "πŸ”₯", + "description": "Fire hazards, smoke sources, and heat-related dangers in the home" + }, + { + "id": "electrical", + "name": "Electrical", + "emoji": "⚑", + "description": "Electrical hazards including overloaded circuits, exposed wiring, and water proximity" + }, + { + "id": "trip_fall", + "name": "Trip / Fall", + "emoji": "🚢", + "description": "Tripping hazards, fall risks, and pathway obstructions" + }, + { + "id": "child_safety", + "name": "Child Safety", + "emoji": "πŸ‘Ά", + "description": "Hazards specifically dangerous to children β€” chemicals, choking, sharp edges" + }, + { + "id": "falling_objects", + "name": "Falling Objects", + "emoji": "πŸ“¦", + "description": "Unstable items, precariously placed objects, and overhead risks" + } + ], + "scenarios": [ + { + "id": "fire_01", + "category": "fire", + "name": "Smoke rising from stove", + "file": "fire_01_stove_smoke.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any safety hazards visible in this image. Focus on fire, heat, and smoke risks.", + "expectedKeywords": [ + "smoke", + "stove", + "fire", + "cook", + "burn", + "hazard", + "kitchen" + ] + }, + { + "id": "fire_02", + "category": "fire", + "name": "Candle near curtain", + "file": "fire_02_candle_curtain.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any safety hazards visible in this image. Focus on fire risks from open flames.", + "expectedKeywords": [ + "candle", + "curtain", + "fire", + "flame", + "fabric", + "hazard", + "drape" + ] + }, + { + "id": "fire_03", + "category": "fire", + "name": "Fireplace ember on rug", + "file": "fire_03_fireplace_ember.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any safety hazards visible in this image, especially near the fireplace.", + "expectedKeywords": [ + "fireplace", + "ember", + "rug", + "spark", + "fire", + "carpet", + "burn", + "flame" + ] + }, + { + "id": "fire_04", + "category": "fire", + "name": "Iron left face-down on board", + "file": "fire_04_iron_facedown.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any safety hazards visible in this image. Look for heat-producing appliances.", + "expectedKeywords": [ + "iron", + "burn", + "hot", + "face-down", + "ironing", + "board", + "fire", + "heat", + "scorch" + ] + }, + { + "id": "fire_05", + "category": "fire", + "name": "Space heater near drapes", + "file": "fire_05_heater_drapes.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any safety hazards, especially heating appliances near flammable materials.", + "expectedKeywords": [ + "heater", + "drape", + "curtain", + "fire", + "space heater", + "fabric", + "hazard", + "close" + ] + }, + { + "id": "fire_06", + "category": "fire", + "name": "Missing smoke detector", + "file": "fire_06_missing_detector.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for smoke and fire detection equipment. Is there a smoke detector visible on the ceiling?", + "expectedKeywords": [ + "smoke detector", + "alarm", + "missing", + "ceiling", + "no", + "absent", + "detector", + "safety" + ] + }, + { + "id": "fire_07", + "category": "fire", + "name": "Grease fire on cooktop", + "file": "fire_07_grease_fire.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any active fires or dangerous cooking situations.", + "expectedKeywords": [ + "grease", + "fire", + "flame", + "cook", + "pan", + "kitchen", + "burn", + "oil", + "stove" + ] + }, + { + "id": "fire_08", + "category": "fire", + "name": "Cigarette on couch arm", + "file": "fire_08_cigarette_couch.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any fire hazards, including smoking materials near upholstery.", + "expectedKeywords": [ + "cigarette", + "couch", + "smoke", + "fire", + "sofa", + "ash", + "burn", + "fabric" + ] + }, + { + "id": "electrical_01", + "category": "electrical", + "name": "Overloaded power strip", + "file": "electrical_01_overloaded_strip.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any electrical safety hazards, especially around power outlets and strips.", + "expectedKeywords": [ + "overload", + "power strip", + "plug", + "outlet", + "electric", + "cord", + "too many", + "surge" + ] + }, + { + "id": "electrical_02", + "category": "electrical", + "name": "Exposed wire near sink", + "file": "electrical_02_exposed_wire_sink.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any electrical hazards, especially wires near water sources.", + "expectedKeywords": [ + "wire", + "exposed", + "sink", + "water", + "electric", + "shock", + "danger", + "bare" + ] + }, + { + "id": "electrical_03", + "category": "electrical", + "name": "Frayed cord on appliance", + "file": "electrical_03_frayed_cord.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for damaged electrical cords or cables.", + "expectedKeywords": [ + "fray", + "cord", + "damage", + "wire", + "electric", + "worn", + "cable", + "appliance" + ] + }, + { + "id": "electrical_04", + "category": "electrical", + "name": "Extension cord under rug", + "file": "electrical_04_cord_under_rug.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any fire or electrical hazards related to cord placement.", + "expectedKeywords": [ + "extension", + "cord", + "rug", + "under", + "carpet", + "fire", + "hazard", + "hidden" + ] + }, + { + "id": "electrical_05", + "category": "electrical", + "name": "Wet hands near outlet", + "file": "electrical_05_wet_hands_outlet.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any electrical shock hazards, especially involving water and electricity.", + "expectedKeywords": [ + "wet", + "water", + "outlet", + "hand", + "electric", + "shock", + "danger", + "plug" + ] + }, + { + "id": "electrical_06", + "category": "electrical", + "name": "Ungrounded 3-to-2 adapter", + "file": "electrical_06_ungrounded_adapter.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for improper electrical connections or adapters.", + "expectedKeywords": [ + "adapter", + "ground", + "plug", + "outlet", + "prong", + "electric", + "unsafe", + "converter" + ] + }, + { + "id": "electrical_07", + "category": "electrical", + "name": "Damaged outlet cover", + "file": "electrical_07_damaged_outlet.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check the condition of electrical outlets and covers.", + "expectedKeywords": [ + "outlet", + "cover", + "damage", + "broken", + "crack", + "expose", + "plate", + "electric" + ] + }, + { + "id": "electrical_08", + "category": "electrical", + "name": "Tangled cords behind desk", + "file": "electrical_08_tangled_cords.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any cable management hazards behind furniture.", + "expectedKeywords": [ + "tangle", + "cord", + "cable", + "wire", + "mess", + "desk", + "behind", + "fire", + "hazard" + ] + }, + { + "id": "trip_01", + "category": "trip_fall", + "name": "Toys scattered on staircase", + "file": "trip_01_toys_stairs.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any tripping or falling hazards, especially on stairs.", + "expectedKeywords": [ + "toy", + "stair", + "trip", + "fall", + "hazard", + "block", + "step", + "obstruct" + ] + }, + { + "id": "trip_02", + "category": "trip_fall", + "name": "Wet floor without sign", + "file": "trip_02_wet_floor.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any slip or fall hazards on the floor surface.", + "expectedKeywords": [ + "wet", + "floor", + "slip", + "water", + "spill", + "puddle", + "slippery", + "fall" + ] + }, + { + "id": "trip_03", + "category": "trip_fall", + "name": "Loose rug edge curled up", + "file": "trip_03_loose_rug.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for tripping hazards related to floor coverings.", + "expectedKeywords": [ + "rug", + "loose", + "curl", + "trip", + "edge", + "fold", + "carpet", + "hazard", + "bunched" + ] + }, + { + "id": "trip_04", + "category": "trip_fall", + "name": "Cluttered hallway", + "file": "trip_04_cluttered_hallway.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any pathway obstructions or tripping hazards.", + "expectedKeywords": [ + "clutter", + "hallway", + "obstruct", + "trip", + "box", + "item", + "block", + "path", + "narrow" + ] + }, + { + "id": "trip_05", + "category": "trip_fall", + "name": "Shoes piled in doorway", + "file": "trip_05_shoes_doorway.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any tripping hazards near doorways and entries.", + "expectedKeywords": [ + "shoe", + "door", + "trip", + "entry", + "pile", + "obstruct", + "hazard", + "block" + ] + }, + { + "id": "trip_06", + "category": "trip_fall", + "name": "Unlit stairwell", + "file": "trip_06_dark_stairwell.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Assess lighting conditions and visibility hazards, especially on stairs.", + "expectedKeywords": [ + "dark", + "stair", + "light", + "dim", + "unlit", + "visibility", + "shadow", + "hazard" + ] + }, + { + "id": "trip_07", + "category": "trip_fall", + "name": "Cables across walkway", + "file": "trip_07_cables_walkway.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any cords or cables creating tripping hazards.", + "expectedKeywords": [ + "cable", + "cord", + "trip", + "wire", + "floor", + "across", + "walk", + "hazard" + ] + }, + { + "id": "trip_08", + "category": "trip_fall", + "name": "Step stool near top of stairs", + "file": "trip_08_stool_stairs.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any unstable or misplaced furniture near stairs.", + "expectedKeywords": [ + "stool", + "step", + "stair", + "fall", + "top", + "unstable", + "hazard", + "ladder" + ] + }, + { + "id": "child_01", + "category": "child_safety", + "name": "Open cabinet with cleaning chemicals", + "file": "child_01_chemicals_cabinet.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any child safety hazards, especially toxic substances within reach.", + "expectedKeywords": [ + "cabinet", + "chemical", + "clean", + "open", + "child", + "poison", + "toxic", + "reach", + "bottle" + ] + }, + { + "id": "child_02", + "category": "child_safety", + "name": "Uncovered electrical outlet", + "file": "child_02_uncovered_outlet.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for child safety hazards related to electrical outlets.", + "expectedKeywords": [ + "outlet", + "uncovered", + "child", + "electric", + "plug", + "cover", + "safety", + "exposed" + ] + }, + { + "id": "child_03", + "category": "child_safety", + "name": "Blind cord hanging in reach", + "file": "child_03_blind_cord.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify strangulation or entanglement hazards for children.", + "expectedKeywords": [ + "blind", + "cord", + "string", + "hang", + "child", + "strangle", + "window", + "reach", + "loop" + ] + }, + { + "id": "child_04", + "category": "child_safety", + "name": "Sharp corner on coffee table", + "file": "child_04_sharp_corner.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any sharp edges or corners that could injure a child.", + "expectedKeywords": [ + "sharp", + "corner", + "table", + "edge", + "child", + "bump", + "injury", + "point" + ] + }, + { + "id": "child_05", + "category": "child_safety", + "name": "Small objects choking hazard", + "file": "child_05_choking_hazard.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any small objects that could be choking hazards for young children.", + "expectedKeywords": [ + "small", + "choke", + "child", + "toy", + "marble", + "button", + "piece", + "swallow", + "hazard" + ] + }, + { + "id": "child_06", + "category": "child_safety", + "name": "Unsecured tall bookshelf", + "file": "child_06_unsecured_bookshelf.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for furniture tip-over risks that could endanger children.", + "expectedKeywords": [ + "bookshelf", + "unsecured", + "tip", + "fall", + "heavy", + "child", + "anchor", + "lean", + "unstable" + ] + }, + { + "id": "child_07", + "category": "child_safety", + "name": "Stove knobs accessible to children", + "file": "child_07_stove_knobs.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any kitchen hazards accessible to small children.", + "expectedKeywords": [ + "stove", + "knob", + "child", + "reach", + "gas", + "burner", + "kitchen", + "turn", + "accessible" + ] + }, + { + "id": "child_08", + "category": "child_safety", + "name": "Open window without guard", + "file": "child_08_open_window.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for fall hazards from windows, especially for children.", + "expectedKeywords": [ + "window", + "open", + "guard", + "child", + "fall", + "screen", + "height", + "safety" + ] + }, + { + "id": "falling_01", + "category": "falling_objects", + "name": "Heavy items on high shelf", + "file": "falling_01_heavy_high_shelf.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any heavy objects stored unsafely at height.", + "expectedKeywords": [ + "heavy", + "shelf", + "high", + "fall", + "weight", + "above", + "stack", + "danger", + "top" + ] + }, + { + "id": "falling_02", + "category": "falling_objects", + "name": "Precariously stacked boxes", + "file": "falling_02_stacked_boxes.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for unstable stacking that could collapse.", + "expectedKeywords": [ + "stack", + "box", + "unstable", + "fall", + "lean", + "collapse", + "precarious", + "pile" + ] + }, + { + "id": "falling_03", + "category": "falling_objects", + "name": "Leaning bookshelf", + "file": "falling_03_leaning_bookshelf.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for furniture stability issues.", + "expectedKeywords": [ + "bookshelf", + "lean", + "tilt", + "unstable", + "fall", + "tip", + "heavy", + "book" + ] + }, + { + "id": "falling_04", + "category": "falling_objects", + "name": "Items on fridge top near edge", + "file": "falling_04_fridge_top.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any objects that could fall from on top of appliances.", + "expectedKeywords": [ + "fridge", + "top", + "edge", + "fall", + "item", + "refrigerator", + "above", + "precarious" + ] + }, + { + "id": "falling_05", + "category": "falling_objects", + "name": "Tools on overhead rack", + "file": "falling_05_overhead_tools.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for overhead storage hazards.", + "expectedKeywords": [ + "tool", + "overhead", + "rack", + "hang", + "fall", + "heavy", + "above", + "mount" + ] + }, + { + "id": "falling_06", + "category": "falling_objects", + "name": "Unsecured wall-mount TV", + "file": "falling_06_unsecured_tv.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for improperly mounted electronics that could fall.", + "expectedKeywords": [ + "tv", + "mount", + "wall", + "unsecured", + "television", + "fall", + "hang", + "loose", + "tilt" + ] + }, + { + "id": "falling_07", + "category": "falling_objects", + "name": "Overloaded coat rack", + "file": "falling_07_overloaded_coatrack.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for overloaded freestanding furniture.", + "expectedKeywords": [ + "coat", + "rack", + "overload", + "heavy", + "tip", + "fall", + "hang", + "lean", + "weight" + ] + }, + { + "id": "falling_08", + "category": "falling_objects", + "name": "Ceiling fan with loose blade", + "file": "falling_08_loose_fan_blade.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check the ceiling fan for any maintenance issues.", + "expectedKeywords": [ + "fan", + "blade", + "ceiling", + "loose", + "wobble", + "hang", + "detach", + "danger" + ] + } + ] +} \ No newline at end of file diff --git a/skills/analysis/homesafe-bench/package-lock.json b/skills/analysis/homesafe-bench/package-lock.json new file mode 100644 index 0000000..a774f40 --- /dev/null +++ b/skills/analysis/homesafe-bench/package-lock.json @@ -0,0 +1,37 @@ +{ + "name": "homesafe-bench", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "homesafe-bench", + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "openai": "^6.27.0" + } + }, + "node_modules/openai": { + "version": "6.27.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-6.27.0.tgz", + "integrity": "sha512-osTKySlrdYrLYTt0zjhY8yp0JUBmWDCN+Q+QxsV4xMQnnoVFpylgKGgxwN8sSdTNw0G4y+WUXs4eCMWpyDNWZQ==", + "license": "Apache-2.0", + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + } + } +} diff --git a/skills/analysis/homesafe-bench/package.json b/skills/analysis/homesafe-bench/package.json new file mode 100644 index 0000000..1d96d2b --- /dev/null +++ b/skills/analysis/homesafe-bench/package.json @@ -0,0 +1,21 @@ +{ + "name": "homesafe-bench", + "version": "1.0.0", + "description": "VLM indoor safety hazard detection benchmark", + "main": "scripts/run-benchmark.cjs", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [ + "benchmark", + "vlm", + "safety", + "hazard" + ], + "author": "SharpAI", + "license": "ISC", + "type": "commonjs", + "dependencies": { + "openai": "^6.27.0" + } +} \ No newline at end of file diff --git a/skills/analysis/homesafe-bench/scripts/run-benchmark.cjs b/skills/analysis/homesafe-bench/scripts/run-benchmark.cjs new file mode 100644 index 0000000..450b39c --- /dev/null +++ b/skills/analysis/homesafe-bench/scripts/run-benchmark.cjs @@ -0,0 +1,545 @@ +#!/usr/bin/env node +/** + * HomeSafe-Bench β€” Indoor Safety Hazard Detection Benchmark + * + * Evaluates VLM models on indoor home safety hazard detection across 5 categories: + * - Fire/Smoke, Electrical, Trip/Fall, Child Safety, Falling Objects + * + * Inspired by HomeSafeBench (arXiv 2509.23690), adapted for static indoor cameras. + * + * ## Skill Protocol (when spawned by Aegis) + * + * Aegis β†’ Skill (env vars): + * AEGIS_VLM_URL β€” VLM server URL (e.g. http://localhost:5405) + * AEGIS_SKILL_PARAMS β€” JSON params from skill config + * AEGIS_SKILL_ID β€” Skill ID + * + * Skill β†’ Aegis (stdout, JSON lines): + * {"event": "ready", "vlm": "SmolVLM-500M"} + * {"event": "suite_start", "suite": "πŸ”₯ Fire / Smoke"} + * {"event": "test_result", "suite": "...", "test": "...", "status": "pass", "timeMs": 4500} + * {"event": "suite_end", "suite": "...", "passed": 7, "failed": 1} + * {"event": "complete", "passed": 36, "total": 40, "timeMs": 180000} + * + * Standalone usage: + * node run-benchmark.cjs [options] + * --vlm URL VLM server (required) + * --mode MODE full or quick (default: full) + * --out DIR Results directory + * --no-open Don't auto-open report + */ + +const fs = require('fs'); +const path = require('path'); +const os = require('os'); +const { execSync } = require('child_process'); + +// ─── Config: Aegis env vars β†’ CLI args β†’ defaults ──────────────────────────── + +const args = process.argv.slice(2); +function getArg(name, defaultVal) { + const idx = args.indexOf(`--${name}`); + if (idx === -1) return defaultVal; + return args[idx + 1] || defaultVal; +} + +// ─── Help ───────────────────────────────────────────────────────────────────── +if (args.includes('--help') || args.includes('-h')) { + console.log(` +HomeSafe-Bench β€” Indoor Safety Hazard Detection Benchmark β€’ DeepCamera / SharpAI + +Inspired by HomeSafeBench (arXiv 2509.23690) + +Usage: node scripts/run-benchmark.cjs [options] + +Options: + --vlm URL VLM server base URL (required) + --mode MODE full or quick (default: full) + --out DIR Results output directory (default: ~/.aegis-ai/homesafe-benchmarks) + --no-open Don't auto-open report in browser + -h, --help Show this help message + +Environment Variables (set by Aegis): + AEGIS_VLM_URL VLM server base URL + AEGIS_SKILL_ID Skill identifier (enables skill mode) + AEGIS_SKILL_PARAMS JSON params from skill config + +Categories: Fire/Smoke, Electrical, Trip/Fall, Child Safety, Falling Objects + `.trim()); + process.exit(0); +} + +// Parse skill parameters if running as Aegis skill +let skillParams = {}; +try { skillParams = JSON.parse(process.env.AEGIS_SKILL_PARAMS || '{}'); } catch { } + +const VLM_URL = process.env.AEGIS_VLM_URL || getArg('vlm', ''); +const VLM_MODEL = process.env.AEGIS_VLM_MODEL || ''; +const RESULTS_DIR = getArg('out', path.join(os.homedir(), '.aegis-ai', 'homesafe-benchmarks')); +const IS_SKILL_MODE = !!process.env.AEGIS_SKILL_ID; +const NO_OPEN = args.includes('--no-open') || skillParams.noOpen || false; +const FIXTURES_DIR = path.join(__dirname, '..', 'fixtures'); +const FRAMES_DIR = path.join(FIXTURES_DIR, 'frames'); +const IDLE_TIMEOUT_MS = 120000; // 2 minutes β€” safety scenarios may need more analysis + +// Mode (full = 40 tests, quick = 10 tests β€” 2 per category) +const TEST_MODE = skillParams.mode || getArg('mode', 'full'); + +// ─── OpenAI SDK Client ────────────────────────────────────────────────────── +const OpenAI = require('openai'); + +const strip = (u) => u.replace(/\/v1\/?$/, ''); +const vlmClient = VLM_URL ? new OpenAI({ + apiKey: 'not-needed', + baseURL: `${strip(VLM_URL)}/v1`, +}) : null; + +// ─── Skill Protocol: JSON lines on stdout, human text on stderr ────────────── + +function emit(event) { + process.stdout.write(JSON.stringify(event) + '\n'); +} + +function log(msg) { + process.stderr.write(msg + '\n'); +} + +// ─── Test Framework ─────────────────────────────────────────────────────────── + +const suites = []; +let currentSuite = null; + +function suite(name, fn) { + suites.push({ name, fn, tests: [] }); +} + +const results = { + timestamp: new Date().toISOString(), + vlm: VLM_URL || null, + system: {}, + model: {}, + suites: [], + totals: { passed: 0, failed: 0, skipped: 0, total: 0, timeMs: 0 }, + tokenTotals: { prompt: 0, completion: 0, total: 0 }, +}; + +async function vlmCall(messages, opts = {}) { + if (!vlmClient) { + throw new Error('VLM client not configured β€” pass --vlm URL'); + } + + const model = opts.model || VLM_MODEL || undefined; + + const params = { + messages, + stream: true, + ...(model && { model }), + ...(opts.temperature !== undefined && { temperature: opts.temperature }), + max_completion_tokens: opts.maxTokens || 512, + }; + + const controller = new AbortController(); + const idleMs = opts.timeout || IDLE_TIMEOUT_MS; + let idleTimer = setTimeout(() => controller.abort(), idleMs); + const resetIdle = () => { clearTimeout(idleTimer); idleTimer = setTimeout(() => controller.abort(), idleMs); }; + + try { + const stream = await vlmClient.chat.completions.create(params, { + signal: controller.signal, + }); + + let content = ''; + let reasoningContent = ''; + let streamModel = ''; + let usage = {}; + let tokenCount = 0; + + for await (const chunk of stream) { + resetIdle(); + if (chunk.model) streamModel = chunk.model; + const delta = chunk.choices?.[0]?.delta; + if (delta?.content) content += delta.content; + if (delta?.reasoning_content) reasoningContent += delta.reasoning_content; + if (delta?.content || delta?.reasoning_content) { + tokenCount++; + if (tokenCount % 100 === 0) { + log(` … ${tokenCount} tokens received`); + } + } + if (chunk.usage) usage = chunk.usage; + } + + if (!content && reasoningContent) { + content = reasoningContent; + } + + results.tokenTotals.prompt += usage.prompt_tokens || 0; + results.tokenTotals.completion += usage.completion_tokens || 0; + results.tokenTotals.total += usage.total_tokens || 0; + + if (!results.model.vlm && streamModel) results.model.vlm = streamModel; + + return { content, usage, model: streamModel }; + } finally { + clearTimeout(idleTimer); + } +} + +function stripThink(text) { + return text.replace(/[\s\S]*?<\/think>\s*/gi, '').trim(); +} + +function assert(condition, msg) { + if (!condition) throw new Error(msg || 'Assertion failed'); +} + +async function runSuites() { + for (const s of suites) { + currentSuite = { name: s.name, tests: [], passed: 0, failed: 0, skipped: 0, timeMs: 0 }; + log(`\n${'─'.repeat(60)}`); + log(` ${s.name}`); + log(`${'─'.repeat(60)}`); + emit({ event: 'suite_start', suite: s.name }); + + await s.fn(); + + results.suites.push(currentSuite); + results.totals.passed += currentSuite.passed; + results.totals.failed += currentSuite.failed; + results.totals.skipped += currentSuite.skipped; + results.totals.total += currentSuite.tests.length; + + emit({ event: 'suite_end', suite: s.name, passed: currentSuite.passed, failed: currentSuite.failed, skipped: currentSuite.skipped, timeMs: currentSuite.timeMs }); + } +} + +async function test(name, fn) { + const testResult = { name, status: 'pass', timeMs: 0, detail: '', tokens: {} }; + const start = Date.now(); + try { + const detail = await fn(); + testResult.timeMs = Date.now() - start; + testResult.detail = detail || ''; + currentSuite.passed++; + log(` βœ… ${name} (${testResult.timeMs}ms)${detail ? ` β€” ${detail}` : ''}`); + } catch (err) { + testResult.timeMs = Date.now() - start; + testResult.status = 'fail'; + testResult.detail = err.message; + currentSuite.failed++; + log(` ❌ ${name} (${testResult.timeMs}ms) β€” ${err.message}`); + } + currentSuite.timeMs += testResult.timeMs; + currentSuite.tests.push(testResult); + emit({ event: 'test_result', suite: currentSuite.name, test: name, status: testResult.status, timeMs: testResult.timeMs, detail: testResult.detail.slice(0, 120) }); +} + +function skip(name, reason) { + currentSuite.skipped++; + currentSuite.tests.push({ name, status: 'skip', timeMs: 0, detail: reason }); + log(` ⏭️ ${name} β€” ${reason}`); + emit({ event: 'test_result', suite: currentSuite.name, test: name, status: 'skip', timeMs: 0, detail: reason }); +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// DISK SPACE CHECK +// ═══════════════════════════════════════════════════════════════════════════════ + +function checkDiskSpace(targetDir, requiredGB) { + try { + fs.mkdirSync(targetDir, { recursive: true }); + const dfOutput = execSync(`df -k "${targetDir}"`, { encoding: 'utf8' }); + const lines = dfOutput.trim().split('\n'); + if (lines.length >= 2) { + const parts = lines[1].split(/\s+/); + const availableKB = parseInt(parts[3], 10); + if (!isNaN(availableKB)) { + const availableGB = availableKB / (1024 * 1024); + if (availableGB < requiredGB) { + log(` ❌ Insufficient disk space`); + log(` Required: ${requiredGB.toFixed(1)} GB`); + log(` Available: ${availableGB.toFixed(1)} GB`); + log(` Location: ${targetDir}`); + emit({ event: 'error', message: `Insufficient disk space: need ${requiredGB}GB, have ${availableGB.toFixed(1)}GB` }); + process.exit(1); + } + log(` πŸ’Ύ Disk: ${availableGB.toFixed(1)} GB available (need ${requiredGB} GB) βœ“`); + return availableGB; + } + } + } catch (err) { + log(` ⚠️ Could not check disk space: ${err.message} β€” proceeding anyway`); + } + return -1; +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// DATASET MANAGEMENT +// ═══════════════════════════════════════════════════════════════════════════════ + +/** + * Check if upstream HomeSafeBench dataset is available for download. + * When the academic dataset becomes publicly available, this function + * will download it to ~/.aegis-ai/datasets/homesafe-bench/. + * + * Until then, the skill uses AI-generated fixture images from fixtures/frames/. + */ +function checkUpstreamDataset() { + const datasetDir = path.join(os.homedir(), '.aegis-ai', 'datasets', 'homesafe-bench'); + const markerFile = path.join(datasetDir, '.downloaded'); + + if (fs.existsSync(markerFile)) { + log(` πŸ“‚ Upstream dataset cached at: ${datasetDir}`); + return datasetDir; + } + + // Upstream not available yet β€” use bundled AI-generated fixtures + log(` ℹ️ Upstream HomeSafeBench dataset not yet public (arXiv 2509.23690)`); + log(` Using bundled AI-generated fixture images`); + return null; +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// VLM EVALUATION +// ═══════════════════════════════════════════════════════════════════════════════ + +async function vlmAnalyze(framePath, prompt) { + const imageData = fs.readFileSync(framePath); + const base64 = imageData.toString('base64'); + const mimeType = framePath.endsWith('.png') ? 'image/png' : 'image/jpeg'; + + const r = await vlmCall([{ + role: 'user', + content: [ + { type: 'image_url', image_url: { url: `data:${mimeType};base64,${base64}` } }, + { type: 'text', text: prompt }, + ], + }], { maxTokens: 512 }); + + return stripThink(r.content); +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// SUITE BUILDER +// ═══════════════════════════════════════════════════════════════════════════════ + +function loadScenarios() { + const data = JSON.parse(fs.readFileSync(path.join(FIXTURES_DIR, 'scenarios.json'), 'utf8')); + + // Group scenarios by category + const byCategory = {}; + for (const cat of data.categories) { + byCategory[cat.id] = { + name: cat.name, + emoji: cat.emoji, + scenarios: [], + }; + } + + for (const scenario of data.scenarios) { + if (byCategory[scenario.category]) { + byCategory[scenario.category].scenarios.push(scenario); + } + } + + // Apply quick mode β€” keep 2 per category + if (TEST_MODE === 'quick') { + for (const cat of Object.values(byCategory)) { + cat.scenarios = cat.scenarios.slice(0, 2); + } + } + + return byCategory; +} + +function buildSuites(byCategory) { + for (const [catId, cat] of Object.entries(byCategory)) { + if (cat.scenarios.length === 0) continue; + + suite(`${cat.emoji} ${cat.name}`, async () => { + for (const scenario of cat.scenarios) { + await test(scenario.name, async () => { + const framePath = path.join(FRAMES_DIR, scenario.file); + + if (!fs.existsSync(framePath)) { + skip(scenario.name, `Frame missing: ${scenario.file}`); + throw new Error(`Frame file not found: ${scenario.file}`); + } + + const desc = await vlmAnalyze(framePath, scenario.prompt); + const lower = desc.toLowerCase(); + const matched = scenario.expectedKeywords.some(kw => lower.includes(kw.toLowerCase())); + + assert(matched, + `Expected one of [${scenario.expectedKeywords.slice(0, 4).join(', ')}...] in: "${desc.slice(0, 80)}"`); + + const hits = scenario.expectedKeywords.filter(kw => lower.includes(kw.toLowerCase())); + return `${desc.length} chars, matched: ${hits.join(', ')} βœ“`; + }); + } + }); + } +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// MAIN +// ═══════════════════════════════════════════════════════════════════════════════ + +async function main() { + log(''); + log(' ╔══════════════════════════════════════════════════════════════╗'); + log(' β•‘ HomeSafe-Bench β€” Indoor Safety Hazard Detection Benchmark β•‘'); + log(' β•‘ Inspired by HomeSafeBench (arXiv 2509.23690) β•‘'); + log(' β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•'); + log(''); + + // Check VLM + if (!VLM_URL) { + log(' ❌ VLM server URL required. Pass --vlm http://localhost:5405'); + log(' This is a VLM-only benchmark (indoor safety image analysis).'); + emit({ event: 'error', message: 'VLM server URL required' }); + process.exit(1); + } + + // Disk space check (minimal β€” bundled frames are <50MB, dataset download ~20GB when available) + checkDiskSpace(RESULTS_DIR, 0.1); + + // Check for upstream dataset (will use bundled fixtures if not available) + checkUpstreamDataset(); + + // System info + results.system = { + platform: `${os.platform()} ${os.arch()}`, + cpus: os.cpus()[0]?.model || 'unknown', + totalRAM_GB: (os.totalmem() / 1073741824).toFixed(1), + node: process.version, + }; + + log(` VLM: ${VLM_URL}`); + log(` Mode: ${TEST_MODE} (${TEST_MODE === 'quick' ? '10' : '40'} tests)`); + log(` Frames: ${FRAMES_DIR}`); + log(` Results: ${RESULTS_DIR}`); + log(` System: ${results.system.cpus} (${results.system.totalRAM_GB} GB RAM)`); + + // VLM healthcheck + try { + const ping = await vlmCall([ + { role: 'user', content: 'ping' }, + ], { maxTokens: 5, timeout: 10000 }); + results.model.vlm = ping.model || 'unknown'; + log(` VLM Model: ${results.model.vlm}`); + } catch (err) { + log(`\n ❌ Cannot reach VLM endpoint: ${err.message}`); + log(` URL: ${VLM_URL}`); + log(' Check that the VLM server is running.\n'); + emit({ event: 'error', message: `Cannot reach VLM endpoint: ${err.message}` }); + process.exit(1); + } + + // Emit ready event + emit({ + event: 'ready', + vlm: results.model.vlm, + system: results.system.cpus, + mode: TEST_MODE, + }); + + // Check that fixture frames exist + if (!fs.existsSync(FRAMES_DIR)) { + log(`\n ❌ Frames directory not found: ${FRAMES_DIR}`); + log(' Run the image generation step first.'); + emit({ event: 'error', message: 'Frames directory not found' }); + process.exit(1); + } + + const frameCount = fs.readdirSync(FRAMES_DIR).filter(f => f.endsWith('.png')).length; + log(` Frames: ${frameCount} PNG files loaded`); + + // Load scenarios and build test suites + const byCategory = loadScenarios(); + const totalTests = Object.values(byCategory).reduce((n, cat) => n + cat.scenarios.length, 0); + log(`\n πŸ“Š ${totalTests} tests across ${Object.keys(byCategory).length} categories\n`); + + buildSuites(byCategory); + + // Run all suites + const suiteStart = Date.now(); + await runSuites(); + results.totals.timeMs = Date.now() - suiteStart; + + // Summary + const { passed, failed, skipped, total, timeMs } = results.totals; + const tokPerSec = timeMs > 0 ? ((results.tokenTotals.total / (timeMs / 1000)).toFixed(1)) : '?'; + + log(`\n${'═'.repeat(66)}`); + log(` RESULTS: ${passed}/${total} passed, ${failed} failed, ${skipped} skipped (${(timeMs / 1000).toFixed(1)}s)`); + log(` TOKENS: ${results.tokenTotals.total} total (${tokPerSec} tok/s)`); + log(` MODEL: ${results.model.vlm || 'unknown'}`); + + // Compare with academic benchmark + log(`\n πŸ“ Academic reference (HomeSafeBench, best model):`); + log(` F1-score: 10.23% β€” current VLMs struggle significantly with safety hazards`); + log(` Your score: ${total > 0 ? ((passed / total * 100).toFixed(1) + '%') : 'N/A'} pass rate`); + log(`${'═'.repeat(66)}`); + + if (failed > 0) { + log('\n Failures:'); + for (const s of results.suites) { + for (const t of s.tests) { + if (t.status === 'fail') log(` ❌ ${s.name} > ${t.name}: ${t.detail}`); + } + } + } + + // Save results + fs.mkdirSync(RESULTS_DIR, { recursive: true }); + const modelSlug = (results.model.vlm || 'unknown').replace(/[^a-zA-Z0-9_.-]/g, '_'); + const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const resultFile = path.join(RESULTS_DIR, `${modelSlug}_${ts}.json`); + fs.writeFileSync(resultFile, JSON.stringify(results, null, 2)); + log(`\n Results saved: ${resultFile}`); + + // Update index + const indexFile = path.join(RESULTS_DIR, 'index.json'); + let index = []; + try { index = JSON.parse(fs.readFileSync(indexFile, 'utf8')); } catch { } + index.push({ + file: path.basename(resultFile), + model: results.model.vlm || 'unknown', + timestamp: results.timestamp, + passed, failed, total, + passRate: total > 0 ? passed / total : 0, + timeMs, + tokens: results.tokenTotals.total, + }); + fs.writeFileSync(indexFile, JSON.stringify(index, null, 2)); + + // Emit completion + emit({ + event: 'complete', + model: results.model.vlm, + passed, failed, skipped, total, + timeMs, + passRate: total > 0 ? passed / total : 0, + tokens: results.tokenTotals.total, + tokPerSec: parseFloat(tokPerSec) || 0, + resultFile, + }); + + log(''); + process.exit(failed > 0 ? 1 : 0); +} + +// Run when executed directly +const isDirectRun = require.main === module || + (process.argv[1] && require('path').resolve(process.argv[1]) === __filename); + +if (isDirectRun) { + main().catch(err => { + log(`Fatal: ${err.message}`); + emit({ event: 'error', message: err.message }); + process.exit(1); + }); +} + +module.exports = { main }; diff --git a/skills/analysis/smarthome-bench/SKILL.md b/skills/analysis/smarthome-bench/SKILL.md new file mode 100644 index 0000000..6140bd6 --- /dev/null +++ b/skills/analysis/smarthome-bench/SKILL.md @@ -0,0 +1,159 @@ +--- +name: SmartHome Video Anomaly Benchmark +description: VLM evaluation suite for video anomaly detection in smart home camera footage +version: 1.0.0 +category: analysis +runtime: node +entry: scripts/run-benchmark.cjs +install: npm + +requirements: + node: ">=18" + npm_install: true + system_deps: [yt-dlp, ffmpeg] + platforms: ["linux", "macos", "windows"] +--- + +# SmartHome Video Anomaly Benchmark + +Benchmark suite evaluating VLM models on **video anomaly detection** across **7 smart home categories** β€” based on the [SmartHome-Bench](https://github.com/Xinyi-0724/SmartHome-Bench-LLM) dataset (1,203 YouTube smart home camera clips). + +> **Key differentiator from HomeSec-Bench**: This benchmark requires **video understanding** (multi-frame sampling), not single-frame analysis. + +## Setup + +**Requires `npm install` + system dependencies.** The deployment agent should: +1. Check for `yt-dlp` and `ffmpeg` in PATH +2. Run `npm install` in the skill directory + +Entry script: `scripts/run-benchmark.cjs` + +### Verification + +```bash +node scripts/run-benchmark.cjs --help +``` + +## Quick Start + +### As an Aegis Skill (automatic) + +When spawned by Aegis, configuration is injected via environment variables. The benchmark downloads video clips, samples frames, evaluates with VLM, and generates an HTML report. + +### Standalone + +```bash +# Run with local VLM (subset mode, 50 videos) +node scripts/run-benchmark.cjs --vlm http://localhost:5405 + +# Quick test with 10 videos +node scripts/run-benchmark.cjs --vlm http://localhost:5405 --max-videos 10 + +# Full benchmark (all curated clips) +node scripts/run-benchmark.cjs --vlm http://localhost:5405 --mode full + +# Filter by category +node scripts/run-benchmark.cjs --vlm http://localhost:5405 --categories "Wildlife,Security" + +# Skip download (re-evaluate cached videos) +node scripts/run-benchmark.cjs --vlm http://localhost:5405 --skip-download + +# Skip report auto-open +node scripts/run-benchmark.cjs --vlm http://localhost:5405 --no-open +``` + +## Configuration + +### Environment Variables (set by Aegis) + +| Variable | Default | Description | +|----------|---------|-------------| +| `AEGIS_VLM_URL` | *(required)* | VLM server base URL | +| `AEGIS_VLM_MODEL` | β€” | Loaded VLM model ID | +| `AEGIS_SKILL_ID` | β€” | Skill identifier (enables skill mode) | +| `AEGIS_SKILL_PARAMS` | `{}` | JSON params from skill config | + +> **Note**: This is a VLM-only benchmark. An LLM gateway is not required. + +### User Configuration (config.yaml) + +This skill includes a [`config.yaml`](config.yaml) that defines user-configurable parameters. Aegis parses this at install time and renders a config panel in the UI. Values are delivered via `AEGIS_SKILL_PARAMS`. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `mode` | select | `subset` | Which clips to evaluate: `subset` (~50 clips) or `full` (all ~105 curated clips) | +| `maxVideos` | number | `50` | Maximum number of videos to evaluate | +| `categories` | text | `all` | Comma-separated category filter (e.g. `Wildlife,Security`) | +| `noOpen` | boolean | `false` | Skip auto-opening the HTML report in browser | + +### CLI Arguments (standalone fallback) + +| Argument | Default | Description | +|----------|---------|-------------| +| `--vlm URL` | *(required)* | VLM server base URL | +| `--out DIR` | `~/.aegis-ai/smarthome-bench` | Results directory | +| `--max-videos N` | `50` | Max videos to evaluate | +| `--mode MODE` | `subset` | `subset` or `full` | +| `--categories LIST` | `all` | Comma-separated category filter | +| `--skip-download` | β€” | Skip video download, use cached | +| `--no-open` | β€” | Don't auto-open report in browser | +| `--report` | *(auto in skill mode)* | Force report generation | + +## Protocol + +### Aegis β†’ Skill (env vars) +``` +AEGIS_VLM_URL=http://localhost:5405 +AEGIS_SKILL_ID=smarthome-bench +AEGIS_SKILL_PARAMS={} +``` + +### Skill β†’ Aegis (stdout, JSON lines) +```jsonl +{"event": "ready", "model": "SmolVLM2-2.2B", "system": "Apple M3"} +{"event": "suite_start", "suite": "Wildlife"} +{"event": "test_result", "suite": "Wildlife", "test": "smartbench_0003", "status": "pass", "timeMs": 4500} +{"event": "suite_end", "suite": "Wildlife", "passed": 12, "failed": 3} +{"event": "complete", "passed": 78, "total": 105, "timeMs": 480000, "reportPath": "/path/to/report.html"} +``` + +Human-readable output goes to **stderr** (visible in Aegis console tab). + +## Test Suites (7 Categories) + +| Suite | Description | Anomaly Examples | +|-------|-------------|------------------| +| 🦊 Wildlife | Wild animals near home cameras | Bear on porch, deer in garden, coyote at night | +| πŸ‘΄ Senior Care | Elderly activity monitoring | Falls, wandering, unusual inactivity | +| πŸ‘Ά Baby Monitoring | Infant/child safety | Stroller rolling, child climbing, unsupervised | +| 🐾 Pet Monitoring | Pet behavior detection | Pet illness, escaped pets, unusual behavior | +| πŸ”’ Home Security | Intrusion & suspicious activity | Break-ins, trespassing, porch pirates | +| πŸ“¦ Package Delivery | Package arrival & theft | Stolen packages, misdelivered, weather damage | +| 🏠 General Activity | General smart home events | Unusual hours activity, appliance issues | + +Each clip is evaluated for **binary anomaly detection**: the VLM predicts normal (0) or abnormal (1), compared against expert annotations. + +## Metrics + +Per-category and overall: +- **Accuracy** β€” correct predictions / total +- **Precision** β€” true positives / predicted positives +- **Recall** β€” true positives / actual positives +- **F1-Score** β€” harmonic mean of precision & recall +- **Confusion Matrix** β€” TP, FP, TN, FN breakdown + +## Results + +Results are saved to `~/.aegis-ai/smarthome-bench/` as JSON. An HTML report with per-category breakdown, confusion matrix, and model comparison is auto-generated. + +## Requirements + +- Node.js β‰₯ 18 +- `npm install` (for `openai` SDK dependency) +- `yt-dlp` (video download from YouTube) +- `ffmpeg` (frame extraction from video clips) +- Running VLM server (must support multi-image input) + +## Citation + +Based on [SmartHome-Bench: A Comprehensive Benchmark for Video Anomaly Detection in Smart Homes Using Multi-Modal Foundation Models](https://arxiv.org/abs/2506.12992). diff --git a/skills/analysis/smarthome-bench/config.yaml b/skills/analysis/smarthome-bench/config.yaml new file mode 100644 index 0000000..6b973d9 --- /dev/null +++ b/skills/analysis/smarthome-bench/config.yaml @@ -0,0 +1,25 @@ +params: + - key: mode + label: Evaluation Mode + type: select + options: [subset, full] + default: subset + description: "Which clips to evaluate: subset (~50 videos) or full (all ~105 curated clips)" + + - key: maxVideos + label: Max Videos + type: number + default: 50 + description: Maximum number of videos to evaluate (overrides mode) + + - key: categories + label: Categories + type: text + default: all + description: "Comma-separated category filter, e.g. Wildlife,Security (default: all)" + + - key: noOpen + label: Don't auto-open report + type: boolean + default: false + description: Skip opening the HTML report in browser after completion diff --git a/skills/analysis/smarthome-bench/deploy.sh b/skills/analysis/smarthome-bench/deploy.sh new file mode 100755 index 0000000..166d29b --- /dev/null +++ b/skills/analysis/smarthome-bench/deploy.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# SmartHome-Bench deployment script +# Called by Aegis deployment agent during skill installation + +set -e + +SKILL_DIR="$(cd "$(dirname "$0")" && pwd)" +echo "πŸ“¦ Deploying SmartHome-Bench from: $SKILL_DIR" + +# ── Check system dependencies ───────────────────────────────────────────────── + +echo "πŸ” Checking system dependencies..." + +if ! command -v yt-dlp &>/dev/null; then + echo "⚠️ yt-dlp not found. Attempting install..." + if command -v brew &>/dev/null; then + brew install yt-dlp + elif command -v pip3 &>/dev/null; then + pip3 install yt-dlp + elif command -v apt-get &>/dev/null; then + sudo apt-get install -y yt-dlp 2>/dev/null || pip3 install yt-dlp + else + echo "❌ Cannot install yt-dlp automatically. Please install manually:" + echo " pip install yt-dlp OR brew install yt-dlp" + exit 1 + fi +fi +echo " βœ… yt-dlp: $(yt-dlp --version)" + +if ! command -v ffmpeg &>/dev/null; then + echo "⚠️ ffmpeg not found. Attempting install..." + if command -v brew &>/dev/null; then + brew install ffmpeg + elif command -v apt-get &>/dev/null; then + sudo apt-get install -y ffmpeg + else + echo "❌ Cannot install ffmpeg automatically. Please install manually:" + echo " brew install ffmpeg OR apt-get install ffmpeg" + exit 1 + fi +fi +echo " βœ… ffmpeg: $(ffmpeg -version 2>&1 | head -1)" + +# ── Install npm dependencies ────────────────────────────────────────────────── + +echo "πŸ“¦ Installing npm dependencies..." +cd "$SKILL_DIR" +npm install --production + +echo "βœ… SmartHome-Bench deployed successfully" diff --git a/skills/analysis/smarthome-bench/fixtures/annotations.json b/skills/analysis/smarthome-bench/fixtures/annotations.json new file mode 100644 index 0000000..c28f142 --- /dev/null +++ b/skills/analysis/smarthome-bench/fixtures/annotations.json @@ -0,0 +1,794 @@ +[ + { + "id": "smartbench_0001", + "category": "Baby Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=fVVAeFISp2c", + "description": "Child and adult riding tricycles in a suburban driveway. Girl in pink shirt near basketball hoop. Toys scattered. Supervised play.", + "reasoning": "Children playing in a driveway is common. Adult supervision reinforces normalcy." + }, + { + "id": "smartbench_0002", + "category": "Baby Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=ayzKVfJX5Wg", + "description": "Woman pushes stroller into entryway. Stroller reverses and rolls backward down steps. Woman chases and catches stroller.", + "reasoning": "Stroller unexpectedly rolled backward, potentially endangering the baby. Adults intervened quickly." + }, + { + "id": "smartbench_0050", + "category": "Baby Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=whDxqUY7Pok", + "description": "Boy in yellow helmet approaches front door. Person on camera tells boy he shrunk inside camera. Boy presses face to lens. Girl also looks. Playful interaction.", + "reasoning": "Fun interaction between camera user and children. Environment calm and safe." + }, + { + "id": "smartbench_0051", + "category": "Baby Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=rGP9bXOrMZU", + "description": "Toddler walks alone near pool area. No adult supervision visible. Child approaches water edge.", + "reasoning": "Unsupervised toddler near water is a safety hazard. This is considered an anomaly." + }, + { + "id": "smartbench_0052", + "category": "Baby Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=Y8ghImmBjlM", + "description": "Children playing in backyard with toys. Parents visible nearby. Normal daytime activity.", + "reasoning": "Standard supervised outdoor play. No concerns." + }, + { + "id": "smartbench_0053", + "category": "Baby Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=jsuqBMVeyeo", + "description": "Child climbs on furniture near window. Reaches for curtain rod. No adult present in frame.", + "reasoning": "Child climbing near window without supervision poses fall risk. Anomaly detected." + }, + { + "id": "smartbench_0054", + "category": "Baby Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=E-KX42Baw18", + "description": "Mother carrying baby walks through living room. Sets baby in playpen. Normal indoor routine.", + "reasoning": "Standard childcare activity. Safe environment." + }, + { + "id": "smartbench_0055", + "category": "Baby Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=Fgq6XQUEK4I", + "description": "Baby crawling toward stairs without gate. No supervision visible.", + "reasoning": "Unsupervised infant near stairs is a fall hazard. This is an anomaly." + }, + { + "id": "smartbench_0003", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=TJ3OahLUahw", + "description": "Wild turkey on back porch railing, facing door. Moves head side to side surveying area. Grill and propane tank in background. Overcast.", + "reasoning": "Wild turkey unexpectedly appeared on porch. No human presence. Not domesticated." + }, + { + "id": "smartbench_0005", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=l5NFMDdYbM0", + "description": "Male mallard duck walks on concrete pathway toward camera. Camera announces recording. Duck startled and walks away.", + "reasoning": "Duck of unknown domestication appears at porch, attempting to enter. Considered anomaly." + }, + { + "id": "smartbench_0006", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=U9qHetMzD4s", + "description": "Coyote face close-up at night in black and white. Camera announces recording. Coyote retreats into snowy yard bordered by forest.", + "reasoning": "Coyote appears at camera, potentially dangerous wildlife near home. Anomaly." + }, + { + "id": "smartbench_0007", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=GfNH24LsSh8", + "description": "Black bear in backyard at night. Pushes over trash can, rummages through spilled contents. Alone on grass.", + "reasoning": "Bear rummaging through trash can cause property damage. Anomaly." + }, + { + "id": "smartbench_0008", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=bIyAGCc3RD8", + "description": "Gray squirrel approaches front entrance. Climbs railing, jumps down steps, scurries away. Overcast, wet streets.", + "reasoning": "Squirrel interacting with doorbell/camera. Minor wildlife anomaly." + }, + { + "id": "smartbench_0047", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=yb9pu5uickk", + "description": "Night vision: raccoon enters from left, approaches fruit on ground, grabs it, turns toward patio.", + "reasoning": "Raccoon carried away food. Property damage potential. Anomaly." + }, + { + "id": "smartbench_0145", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=TNDtycEzUYo", + "description": "Night: four deer approach wooden deck. Two spar playfully on lawn, rearing up. Then all graze calmly.", + "reasoning": "Wild deer around house could damage property. Anomaly." + }, + { + "id": "smartbench_0146", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=XXrIIU-Org0", + "description": "Night: large antlered deer crosses driveway followed by two spotted fawns. Deer walks leisurely.", + "reasoning": "Multiple deer at front door. Wild animals could damage property. Anomaly." + }, + { + "id": "smartbench_0278", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=GqWCH3AcRII", + "description": "Night: mother bear and cub walk across paved driveway. Cub walks ahead, mother follows. Disappear into darkness.", + "reasoning": "Bears near house pose threat to property and safety. Anomaly." + }, + { + "id": "smartbench_0279", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=jX7wSFVuStI", + "description": "Night: snake slithers along stone wall on porch. Camera night vision. Slow movement along wall top.", + "reasoning": "Snake at house entrance. Wild animal could be dangerous. Anomaly." + }, + { + "id": "smartbench_0281", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=Z__Wg1fMXMA", + "description": "Black bear at front door in daytime. Stands on hind legs, peers over wall, pushes on door trying to open it. Walks away.", + "reasoning": "Bear attempting to break in. Dangerous wildlife. Anomaly." + }, + { + "id": "smartbench_0046", + "category": "General Activity", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=LLxrNx-Wkq4", + "description": "Night residential street. Bright green meteor streaks across sky left to right. No other movement. Still environment.", + "reasoning": "Natural phenomenon (meteor). Typical night scene. Normal." + }, + { + "id": "smartbench_0056", + "category": "General Activity", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=-k64PIAB3Hw", + "description": "Daytime front porch. Mail carrier delivers package, waves at camera. Normal delivery routine.", + "reasoning": "Standard mail delivery. No concerns." + }, + { + "id": "smartbench_0057", + "category": "General Activity", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=YSlCea9IOt8", + "description": "Late night. Person wanders in front yard appearing disoriented. Stumbles, sits on lawn.", + "reasoning": "Unusual late-night activity. Person appears disoriented. Anomaly." + }, + { + "id": "smartbench_0058", + "category": "General Activity", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=TnjOgP6IvU8", + "description": "Family gathering in backyard. Adults and children around grill. Daytime. Normal BBQ activity.", + "reasoning": "Typical family gathering. No anomalies." + }, + { + "id": "smartbench_0059", + "category": "General Activity", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=pPsjT_lvATQ", + "description": "Garage door opens and closes repeatedly at 3 AM. No person visible. Possible malfunction or unauthorized access.", + "reasoning": "Repeated garage door activation at unusual hours without visible cause. Anomaly." + }, + { + "id": "smartbench_0060", + "category": "General Activity", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=ogx7uoRRpNo", + "description": "Neighbor walking dog past house on sidewalk. Brief stop, dog sniffs lawn. Continue walking.", + "reasoning": "Normal neighborhood activity. No concerns." + }, + { + "id": "smartbench_0061", + "category": "General Activity", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=XuzHCZdubJE", + "description": "Sprinkler system activates at 2 AM flooding walkway. Water pooling near entrance.", + "reasoning": "Unusual water system activation at night. Possible malfunction. Anomaly." + }, + { + "id": "smartbench_0062", + "category": "General Activity", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=mBMPOdZYdb0", + "description": "Car pulls into driveway. Person exits, walks to front door with groceries. Normal arrival.", + "reasoning": "Standard homecoming routine. No anomalies." + }, + { + "id": "smartbench_0063", + "category": "General Activity", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=BnD3b20QZDQ", + "description": "Smoke visible from kitchen window. No fire alarm heard. Grows thicker over time.", + "reasoning": "Possible kitchen fire or smoke event. Safety concern. Anomaly." + }, + { + "id": "smartbench_0064", + "category": "General Activity", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=bXHsotibxNE", + "description": "Kids riding bicycles on sidewalk. Parents watching from porch. Sunny afternoon.", + "reasoning": "Normal supervised outdoor play. Safe environment." + }, + { + "id": "smartbench_0065", + "category": "General Activity", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=ewRg_VpmtRI", + "description": "Tree branch falls on parked car in driveway during storm. Visible damage to windshield.", + "reasoning": "Weather-related property damage. Safety hazard. Anomaly." + }, + { + "id": "smartbench_0004", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=k9si1UG8uLc", + "description": "Two individuals approach front door. One in red jacket, another in yellow bird costume with hospital foundation text. Mascot dances at camera.", + "reasoning": "Person in eccentric attire acting unusually at front door could frighten. Anomaly." + }, + { + "id": "smartbench_0048", + "category": "Home Security", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=tyBhNSRzOGI", + "description": "Man at front door with McDonald's bag. Calls boy named Cam. Boy appears. Playful interaction through doorbell camera with woman.", + "reasoning": "Fun light-hearted family interaction using doorbell camera. Normal." + }, + { + "id": "smartbench_0049", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=anH1uE2j7Qw", + "description": "Night: man in dark clothing approaches pickup truck in driveway. Tries driver door. Camera announces recording. Man runs away.", + "reasoning": "Attempted vehicle theft. Person fled after camera warning. Anomaly." + }, + { + "id": "smartbench_0280", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=YhSSxNNkeL0", + "description": "Night: individual approaches white SUV, opens driver door, searches inside. Voice says 'Hey!' Person closes door and runs.", + "reasoning": "Unauthorized vehicle access. Theft attempt. Anomaly." + }, + { + "id": "smartbench_0364", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=kAK6qLCbZ4k", + "description": "Night: man in dark clothing approaches blue car with car jack. Alarm goes off, voice yells 'Get out!' Man runs away.", + "reasoning": "Attempted tire theft from vehicle. Man fled when caught. Anomaly." + }, + { + "id": "smartbench_0366", + "category": "Home Security", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=M3zJnxVOUWE", + "description": "Two teenagers playing basketball in driveway. Kicking ball up. Girl makes shot through hoop. Sunny day.", + "reasoning": "Casual game of basketball. Normal everyday scene." + }, + { + "id": "smartbench_0533", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=0u5DS9-gPHE", + "description": "Night: person examines door on wooden deck. Moves picture frames. Reaches under camera. Suspicious rummaging.", + "reasoning": "Woman suspiciously rummaging at front door. Potential theft. Anomaly." + }, + { + "id": "smartbench_0534", + "category": "Home Security", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=mKRsMtdqUKs", + "description": "Woman in blue shirt approaches porch. Picks up cards, reads them smiling. Picks up flower bouquet in vase. Carries away excitedly.", + "reasoning": "Gift retrieval by resident. Calm, happy behavior. Normal." + }, + { + "id": "smartbench_0535", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=5-Jd7oUisk4", + "description": "Overcast: two hooded individuals in dark clothing enter patio. One looks inside building. Other approaches teal door, looks through window, makes phone call.", + "reasoning": "Suspicious individuals peering through windows. Potential burglary. Anomaly." + }, + { + "id": "smartbench_0536", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=JI2NtXQR9to", + "description": "Daytime carport: man adjusting bicycle next to kayaks and motorcycle. Moving bicycle around positioning it.", + "reasoning": "Person attempting to steal bicycle. Theft. Anomaly." + }, + { + "id": "smartbench_0009", + "category": "Pet Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=Ym9pPECNQyY", + "description": "Black squirrel reaches into metal container on lawn. Ginger cat approaches. Squirrel hesitates but resumes. Cat steps closer, squirrel flees.", + "reasoning": "Squirrel foraging near house. Cat may have escaped. Potential animal conflict. Anomaly." + }, + { + "id": "smartbench_0143", + "category": "Pet Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=deamYHz7Sno", + "description": "Two cows enter driveway through metal gate. Brown-white cow stops briefly, exits. White cow hesitates, follows. Gate swings shut.", + "reasoning": "Cows entered yard alone without owner. Possibly escaped. Anomaly." + }, + { + "id": "smartbench_0144", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=kAK6qLCbZ4k", + "description": "Woman in black jacket on porch greeted by two excited dogs. Man in blue sweater enters with green bag. Family arriving home.", + "reasoning": "Normal family returning home with dogs. Standard activity." + }, + { + "id": "smartbench_0363", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=5F2mg0p1EY8", + "description": "Indoor: light brown dog near hallway. Walks to dog bed, rolls on dark grey rug playfully. Well-lit room with sofa. Calm.", + "reasoning": "Normal pet behavior. Dog playing safely indoors. No concerns." + }, + { + "id": "smartbench_0365", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=7KiITgvN5Aw", + "description": "Indoor: brown-white boxer dog enters, walks to kiddie pool with newborn puppies. Owner instructs dog to feed puppies. Dog nurses.", + "reasoning": "Normal pet monitoring. Owner checking on dog and puppies. Safe and calm." + }, + { + "id": "smartbench_0066", + "category": "Pet Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=TKxo96PRJqc", + "description": "Dog alone in backyard digging hole near fence. Appears to be attempting to escape under fence.", + "reasoning": "Dog attempting to escape yard. Could get lost or injured. Anomaly." + }, + { + "id": "smartbench_0067", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=knIyxSHUhnQ", + "description": "Cat sleeping on couch. Stretches, yawns, repositions. Indoor camera. Normal feline behavior.", + "reasoning": "Standard cat resting behavior. No anomalies." + }, + { + "id": "smartbench_0068", + "category": "Pet Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=MamIX1zJMM4", + "description": "Dog approaches kitchen counter. Jumps up, takes food from counter. Knocks plate to floor.", + "reasoning": "Pet getting into food and breaking items. Minor property damage. Anomaly." + }, + { + "id": "smartbench_0069", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=0o6LGpxRhT8", + "description": "Two dogs playing together in living room. Chasing each other around furniture. Tails wagging.", + "reasoning": "Normal playful pet behavior. No safety concerns." + }, + { + "id": "smartbench_0070", + "category": "Pet Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=pOA51YSNlds", + "description": "Cat knocks over vase from shelf. Vase shatters on floor. Cat runs away startled.", + "reasoning": "Pet causing property damage. Broken glass hazard. Anomaly." + }, + { + "id": "smartbench_0071", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=y3O0eItyxDc", + "description": "Dog lying in designated pet bed. Eyes closed, sleeping peacefully. Occasional ear twitch.", + "reasoning": "Normal pet resting. Calm environment. No anomalies." + }, + { + "id": "smartbench_0072", + "category": "Pet Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=OE0XSgcYjcA", + "description": "Dog pacing back and forth near door. Whining audibly. Repeated scratching at door.", + "reasoning": "Dog showing signs of distress or needing to go out urgently. Behavioral anomaly." + }, + { + "id": "smartbench_0073", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=E1hMlQ7BfIY", + "description": "Cat using litter box. Normal bathroom behavior. Exits and walks to water bowl.", + "reasoning": "Standard pet care routine. Normal behavior." + }, + { + "id": "smartbench_0277", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=jFNyxIdqD1o", + "description": "Boy enters house. Loud noise, boy shouts and falls. Woman in white pants enters concerned. Man in black shirt enters laughing, was hiding to scare. Picks up boy.", + "reasoning": "Man scared child causing fall. Could cause physical harm. Vague anomaly." + }, + { + "id": "smartbench_0074", + "category": "Senior Care", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=bUbXnWYDjmA", + "description": "Elderly woman walks through living room with walker. Moves slowly but steadily. Reaches kitchen, sits down at table.", + "reasoning": "Normal independent mobility. Senior maintaining daily routine safely." + }, + { + "id": "smartbench_0075", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=ufjOxeZ-FNw", + "description": "Elderly person takes a fall in hallway. Struggles to get up. Reaches for furniture to pull self up.", + "reasoning": "Fall event for elderly person. Medical concern. Anomaly." + }, + { + "id": "smartbench_0076", + "category": "Senior Care", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=sr1VLgBYMNs", + "description": "Senior couple watching TV in living room. Both seated on couch. Normal evening activity.", + "reasoning": "Typical leisure activity. No concerns." + }, + { + "id": "smartbench_0077", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=Ep3sf3jD8E8", + "description": "Elderly person wandering around house at 3 AM. Opens front door, steps outside briefly. Returns inside confused.", + "reasoning": "Nighttime wandering. Possible disorientation. Safety concern. Anomaly." + }, + { + "id": "smartbench_0078", + "category": "Senior Care", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=b92shV0BbhM", + "description": "Senior doing light exercises in living room. Stretching movements. Following routine calmly.", + "reasoning": "Normal exercise activity. Healthy behavior. No anomalies." + }, + { + "id": "smartbench_0079", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=LINEK1p4-1U", + "description": "Elderly person hasn't moved from chair for extended period. No response to doorbell. Appears unresponsive.", + "reasoning": "Extended inactivity and unresponsiveness concerning. Medical emergency potential. Anomaly." + }, + { + "id": "smartbench_0080", + "category": "Senior Care", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=QVGxK7X2mUo", + "description": "Elderly woman in kitchen preparing food. Moves carefully around counter. Uses stove with attention.", + "reasoning": "Normal cooking activity. Senior managing daily task safely." + }, + { + "id": "smartbench_0081", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=iF3N61nr6JA", + "description": "Senior leaves stove burner on, walks away to another room. Pot begins to smoke.", + "reasoning": "Forgotten stove with smoking pot. Fire hazard. Anomaly." + }, + { + "id": "smartbench_0082", + "category": "Senior Care", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=_zLhytipIQU", + "description": "Elderly man reading book in armchair. Occasionally sips from cup. Peaceful afternoon scene.", + "reasoning": "Normal leisure activity. Relaxed and safe environment." + }, + { + "id": "smartbench_0083", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=bBgevdp5DKM", + "description": "Senior trips on rug edge in hallway. Catches self on wall but appears shaken. Limps slightly continuing.", + "reasoning": "Trip hazard incident. Potential injury. Mobility concern. Anomaly." + }, + { + "id": "smartbench_0084", + "category": "Senior Care", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=TNDtycEzUYo", + "description": "Caregiver visits elderly person at home. Helps with medications. Cheerful interaction.", + "reasoning": "Normal caregiving visit. Standard assisted living activity." + }, + { + "id": "smartbench_0085", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=XXrIIU-Org0", + "description": "Senior drops medication bottles, unable to pick them up. Sits looking frustrated for extended period.", + "reasoning": "Mobility limitation preventing medication access. Care concern. Anomaly." + }, + { + "id": "smartbench_0086", + "category": "Package Delivery", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=Xlp3FZyYuK8", + "description": "Delivery driver in uniform approaches front door. Places package carefully on porch. Rings doorbell, takes photo. Walks back to van.", + "reasoning": "Standard package delivery. Professional conduct. Normal." + }, + { + "id": "smartbench_0087", + "category": "Package Delivery", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=GqWCH3AcRII", + "description": "Person in casual clothes approaches porch. Looks around nervously. Picks up package from doorstep. Quickly walks away.", + "reasoning": "Package theft. Person took package that wasn't theirs. Anomaly." + }, + { + "id": "smartbench_0088", + "category": "Package Delivery", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=jX7wSFVuStI", + "description": "UPS driver delivers large box. Homeowner opens door, signs for package. Brief conversation. Driver leaves.", + "reasoning": "Normal signed delivery. Expected interaction. No anomalies." + }, + { + "id": "smartbench_0089", + "category": "Package Delivery", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=YhSSxNNkeL0", + "description": "Delivery driver throws package from several feet away at porch. Package bounces on ground. Driver walks away quickly.", + "reasoning": "Rough package handling. Potential damage to contents. Anomaly." + }, + { + "id": "smartbench_0090", + "category": "Package Delivery", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=Z__Wg1fMXMA", + "description": "Amazon driver walks to door. Carefully places package beside mat. Takes delivery photo. Professional service.", + "reasoning": "Standard careful delivery. Normal activity." + }, + { + "id": "smartbench_0091", + "category": "Package Delivery", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=jFNyxIdqD1o", + "description": "Two people drive up to house. Passenger gets out, takes all three packages from porch. Gets back in car. Drive away.", + "reasoning": "Multiple packages stolen by organized thieves. Porch piracy. Anomaly." + }, + { + "id": "smartbench_0092", + "category": "Package Delivery", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=z-TMWr0axSo", + "description": "FedEx driver leaves package at door. Homeowner retrieves within minutes. Normal day.", + "reasoning": "Routine delivery and retrieval. No issues." + }, + { + "id": "smartbench_0093", + "category": "Package Delivery", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=_yp-l9ywvPU", + "description": "Package left on porch during rainstorm. Gets soaked. No protective covering provided.", + "reasoning": "Package exposed to weather damage. Delivery issue. Anomaly." + }, + { + "id": "smartbench_0094", + "category": "Package Delivery", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=3XBBQlR21rY", + "description": "Neighbor picks up misdelivered package from porch. Walks it over to correct address next door.", + "reasoning": "Helpful neighbor redirecting package. Normal community behavior." + }, + { + "id": "smartbench_0095", + "category": "Package Delivery", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=c4BiMNV5dME", + "description": "Delivery left at wrong address. Package sits on porch for days. Gets stepped on by visitors.", + "reasoning": "Misdelivered package accumulating damage. Delivery failure. Anomaly." + }, + { + "id": "smartbench_0096", + "category": "Package Delivery", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=dZU47xjMPfc", + "description": "Mail carrier delivers envelope and small package. Places in mailbox area. Routine postal delivery.", + "reasoning": "Standard mail delivery. Nothing unusual." + }, + { + "id": "smartbench_0097", + "category": "Package Delivery", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=1OsGAAGe6CA", + "description": "Person follows delivery truck. After driver leaves, approaches porch and takes the just-delivered package.", + "reasoning": "Trailing delivery vehicle to steal packages. Organized theft. Anomaly." + }, + { + "id": "smartbench_0098", + "category": "Package Delivery", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=L7s4QL9fozo", + "description": "Homeowner places outgoing package on porch for scheduled pickup. USPS driver collects it.", + "reasoning": "Scheduled package pickup. Normal postal service interaction." + }, + { + "id": "smartbench_0099", + "category": "Package Delivery", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=UuDW7Zvm5Gg", + "description": "Delivery driver leaves package hanging out of mailbox precariously. Visible to street. Easy target.", + "reasoning": "Insecure package placement visible from street. Theft risk. Anomaly." + }, + { + "id": "smartbench_0100", + "category": "Home Security", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=P7UGtwg7C4I", + "description": "Homeowner on porch checking mail. Waves at passing neighbor. Normal daytime activity.", + "reasoning": "Standard residential activity. No security concerns." + }, + { + "id": "smartbench_0101", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=Mh1EgnB1DmQ", + "description": "Night: person approaches back door. Tries handle. Uses tool to pry at lock. Camera floodlight activates. Person runs.", + "reasoning": "Break-in attempt at rear entrance. Criminal activity. Anomaly." + }, + { + "id": "smartbench_0102", + "category": "Home Security", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=IUW21JpYPPE", + "description": "Landscaper arrives with mower. Works on front lawn. Professional uniform. Expected service visit.", + "reasoning": "Scheduled lawn maintenance. Normal expected activity." + }, + { + "id": "smartbench_0103", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=Q_0WEIZVc28", + "description": "Person walks up to multiple cars on street. Tries each door handle. One opens. Reaches inside then continues walking.", + "reasoning": "Vehicle break-ins on street. Checking multiple cars. Theft. Anomaly." + }, + { + "id": "smartbench_0104", + "category": "Home Security", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=7IwZMmTWoKM", + "description": "Pizza delivery person at front door. Homeowner answers, takes pizza, pays. Brief friendly exchange.", + "reasoning": "Expected food delivery. Normal transaction." + }, + { + "id": "smartbench_0010", + "category": "Baby Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=QNbsdpgb2YI", + "description": "Toddler playing with blocks in nursery. Parent visible in doorway watching. Safe supervised play.", + "reasoning": "Normal supervised indoor play. Safe environment. No concerns." + }, + { + "id": "smartbench_0011", + "category": "Baby Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=sZ9ij4A0REg", + "description": "Baby in crib pulling at mobile attachment. Mobile comes loose and falls near baby.", + "reasoning": "Nursery equipment failure near infant. Safety hazard. Anomaly." + }, + { + "id": "smartbench_0012", + "category": "Baby Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=gI4oK4azztc", + "description": "Baby sleeping peacefully in crib. Night vision camera. Occasional movement during sleep. Normal patterns.", + "reasoning": "Normal infant sleep patterns. No concerns." + }, + { + "id": "smartbench_0013", + "category": "Baby Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=deamYHz7Sno", + "description": "Toddler opens baby gate and walks toward stairs alone. Gate latch appears broken or improperly secured.", + "reasoning": "Failed safety gate allowing unsupervised stair access. Serious safety hazard. Anomaly." + }, + { + "id": "smartbench_0014", + "category": "Baby Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=kAK6qLCbZ4k", + "description": "Mother and child reading book on couch. Calm indoor scene. Child points at pictures.", + "reasoning": "Normal bonding activity. Safe supervised environment." + }, + { + "id": "smartbench_0015", + "category": "Baby Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=bUbXnWYDjmA", + "description": "Child opens front door and walks outside alone. No adult follows. Door left open.", + "reasoning": "Unsupervised child leaving house alone. Serious safety concern. Anomaly." + }, + { + "id": "smartbench_0105", + "category": "Senior Care", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=LLxrNx-Wkq4", + "description": "Elderly couple taking morning walk around neighborhood. Slow steady pace. Return home safely.", + "reasoning": "Normal exercise routine. Healthy activity for seniors." + }, + { + "id": "smartbench_0106", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=ufjOxeZ-FNw", + "description": "Senior found on floor near bathroom. Appears to have slipped. Calling out for help.", + "reasoning": "Fall in bathroom. Medical emergency. Requires immediate attention. Anomaly." + }, + { + "id": "smartbench_0107", + "category": "Wildlife", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=sr1VLgBYMNs", + "description": "Birds feeding at bird feeder in backyard. Squirrel approaches but can't reach. Normal wildlife interaction.", + "reasoning": "Expected backyard wildlife at feeder. Normal and welcome activity." + }, + { + "id": "smartbench_0108", + "category": "Wildlife", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=Ep3sf3jD8E8", + "description": "Rabbit on front lawn in early morning. Nibbles grass. Hops away when car passes. Daytime.", + "reasoning": "Common suburban wildlife. Harmless. Normal." + }, + { + "id": "smartbench_0109", + "category": "Wildlife", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=b92shV0BbhM", + "description": "Hummingbird visits porch feeder. Hovers, drinks nectar, flies away. Beautiful nature scene.", + "reasoning": "Typical backyard bird activity. No concerns." + }, + { + "id": "smartbench_0110", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=LINEK1p4-1U", + "description": "Night: large cat (possibly bobcat) stalks across backyard. Moves stealthily. Disappears into bushes near pet door.", + "reasoning": "Predatory wild cat near pet door. Risk to household pets. Anomaly." + }, + { + "id": "smartbench_0111", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=QVGxK7X2mUo", + "description": "Dog eating from food bowl in kitchen. Normal mealtime behavior. Wags tail.", + "reasoning": "Standard pet feeding time. Normal behavior." + }, + { + "id": "smartbench_0112", + "category": "Pet Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=iF3N61nr6JA", + "description": "Dog chewing on electrical cord plugged into wall. Cord partially damaged.", + "reasoning": "Pet chewing on electrical cord. Electrocution and fire hazard. Anomaly." + } +] \ No newline at end of file diff --git a/skills/analysis/smarthome-bench/package-lock.json b/skills/analysis/smarthome-bench/package-lock.json new file mode 100644 index 0000000..b1d8328 --- /dev/null +++ b/skills/analysis/smarthome-bench/package-lock.json @@ -0,0 +1,37 @@ +{ + "name": "smarthome-bench", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "smarthome-bench", + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "openai": "^6.27.0" + } + }, + "node_modules/openai": { + "version": "6.27.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-6.27.0.tgz", + "integrity": "sha512-osTKySlrdYrLYTt0zjhY8yp0JUBmWDCN+Q+QxsV4xMQnnoVFpylgKGgxwN8sSdTNw0G4y+WUXs4eCMWpyDNWZQ==", + "license": "Apache-2.0", + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + } + } +} diff --git a/skills/analysis/smarthome-bench/package.json b/skills/analysis/smarthome-bench/package.json new file mode 100644 index 0000000..0f2cdeb --- /dev/null +++ b/skills/analysis/smarthome-bench/package.json @@ -0,0 +1,22 @@ +{ + "name": "smarthome-bench", + "version": "1.0.0", + "description": "SmartHome-Bench video anomaly detection benchmark for VLM evaluation", + "main": "scripts/run-benchmark.cjs", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [ + "benchmark", + "vlm", + "video", + "anomaly-detection", + "smart-home" + ], + "author": "", + "license": "ISC", + "type": "commonjs", + "dependencies": { + "openai": "^6.27.0" + } +} \ No newline at end of file diff --git a/skills/analysis/smarthome-bench/scripts/generate-report.cjs b/skills/analysis/smarthome-bench/scripts/generate-report.cjs new file mode 100644 index 0000000..6eec6bd --- /dev/null +++ b/skills/analysis/smarthome-bench/scripts/generate-report.cjs @@ -0,0 +1,307 @@ +#!/usr/bin/env node +/** + * HTML Report Generator for SmartHome-Bench Video Anomaly Detection Benchmark + * + * Reads JSON result files from the results directory and generates + * a self-contained HTML report with: + * - Per-category accuracy breakdown + * - Confusion matrix (TP/FP/TN/FN) + * - Overall metrics (accuracy, precision, recall, F1) + * - Historical model comparison table + * + * Usage: + * node generate-report.cjs [results-dir] + * Default: ~/.aegis-ai/smarthome-bench + */ + +const fs = require('fs'); +const path = require('path'); +const os = require('os'); + +const RESULTS_DIR = process.argv[2] || path.join(os.homedir(), '.aegis-ai', 'smarthome-bench'); + +function generateReport(resultsDir = RESULTS_DIR) { + // Find all result files + const files = fs.readdirSync(resultsDir) + .filter(f => f.endsWith('.json') && !f.startsWith('index')) + .sort() + .reverse(); // Most recent first + + if (files.length === 0) { + console.error('No result files found in', resultsDir); + return null; + } + + // Load latest result + const latestFile = path.join(resultsDir, files[0]); + const latest = JSON.parse(fs.readFileSync(latestFile, 'utf8')); + + // Load all results for comparison + const allResults = files.slice(0, 20).map(f => { + try { + return JSON.parse(fs.readFileSync(path.join(resultsDir, f), 'utf8')); + } catch { + return null; + } + }).filter(Boolean); + + // Generate HTML + const html = buildHTML(latest, allResults); + const reportPath = path.join(resultsDir, 'report.html'); + fs.writeFileSync(reportPath, html); + console.error(`Report generated: ${reportPath}`); + return reportPath; +} + +function buildHTML(latest, allResults) { + const model = latest.model?.vlm || 'Unknown'; + const timestamp = new Date(latest.timestamp).toLocaleString(); + const totalTests = latest.totals?.total || 0; + const passed = latest.totals?.passed || 0; + const failed = latest.totals?.failed || 0; + const skipped = latest.totals?.skipped || 0; + const timeMs = latest.totals?.timeMs || 0; + const metrics = latest.metrics || {}; + const overall = metrics.overall || {}; + const perCategory = metrics.perCategory || {}; + + // Build category rows + const categoryRows = Object.entries(perCategory).map(([cat, m]) => { + const accPct = (m.accuracy * 100).toFixed(1); + const precPct = (m.precision * 100).toFixed(1); + const recPct = (m.recall * 100).toFixed(1); + const f1Pct = (m.f1 * 100).toFixed(1); + const accClass = m.accuracy >= 0.8 ? 'high' : m.accuracy >= 0.5 ? 'mid' : 'low'; + return ` + ${escHtml(cat)} + ${accPct}% + ${precPct}% + ${recPct}% + ${f1Pct}% + ${m.tp} + ${m.fp} + ${m.tn} + ${m.fn} + ${m.total} + `; + }).join('\n'); + + // Build suite detail rows + const suiteDetailRows = (latest.suites || []).map(s => { + const testRows = s.tests.map(t => { + const statusIcon = t.status === 'pass' ? 'βœ…' : t.status === 'fail' ? '❌' : '⏭️'; + const statusClass = t.status; + return ` + ${statusIcon} + ${escHtml(t.name)} + ${t.status} + ${t.timeMs}ms + ${escHtml((t.detail || '').slice(0, 100))} + `; + }).join('\n'); + + return `
+

${escHtml(s.name)}

+
+ βœ… ${s.passed} passed Β· ❌ ${s.failed} failed Β· ⏭️ ${s.skipped} skipped Β· ⏱ ${(s.timeMs / 1000).toFixed(1)}s +
+ + + ${testRows} +
TestStatusTimeDetail
+
`; + }).join('\n'); + + // Build comparison table + const comparisonRows = allResults.map(r => { + const rModel = r.model?.vlm || 'Unknown'; + const rTime = new Date(r.timestamp).toLocaleDateString(); + const rMetrics = r.metrics?.overall || {}; + const rAcc = ((rMetrics.accuracy || 0) * 100).toFixed(1); + const rF1 = ((rMetrics.f1 || 0) * 100).toFixed(1); + const rPassed = r.totals?.passed || 0; + const rTotal = r.totals?.total || 0; + const rTimeMs = r.totals?.timeMs || 0; + return ` + ${escHtml(rModel)} + ${rTime} + ${rPassed}/${rTotal} + ${rAcc}% + ${rF1}% + ${(rTimeMs / 1000).toFixed(0)}s + `; + }).join('\n'); + + const overallAccPct = ((overall.accuracy || 0) * 100).toFixed(1); + const overallPrecPct = ((overall.precision || 0) * 100).toFixed(1); + const overallRecPct = ((overall.recall || 0) * 100).toFixed(1); + const overallF1Pct = ((overall.f1 || 0) * 100).toFixed(1); + + return ` + + + + +SmartHome-Bench Report β€” ${escHtml(model)} + + + +
+ +
+

🏠 SmartHome-Bench Report

+
+ Video Anomaly Detection Benchmark Β· ${escHtml(model)} Β· ${timestamp} +
+
+ +
+
${overallAccPct}%
Accuracy
+
${overallF1Pct}%
F1 Score
+
${overallPrecPct}%
Precision
+
${overallRecPct}%
Recall
+
${passed}/${totalTests}
Passed
+
${(timeMs / 1000).toFixed(0)}s
Total Time
+
+ +
+ πŸ–₯ ${escHtml(latest.system?.cpus || 'Unknown')} + πŸ’Ύ ${latest.system?.totalRAM_GB || '?'} GB RAM + πŸ”§ Node ${escHtml(latest.system?.node || '?')} +
+ +

πŸ“Š Overall Confusion Matrix

+
+
+
Predicted Normal
+
Predicted Abnormal
+
Actual Normal
+
TN: ${overall.tn || 0}
+
FP: ${overall.fp || 0}
+
Actual Abnormal
+
FN: ${overall.fn || 0}
+
TP: ${overall.tp || 0}
+
+ +

πŸ“‹ Per-Category Breakdown

+ + + + + + + + + + + + + + + + + ${categoryRows} + + + + + + + + + + + + + +
CategoryAccuracyPrecisionRecallF1TPFPTNFNTotal
Overall${overallAccPct}%${overallPrecPct}%${overallRecPct}%${overallF1Pct}%${overall.tp || 0}${overall.fp || 0}${overall.tn || 0}${overall.fn || 0}${totalTests}
+ +

πŸ§ͺ Test Details

+${suiteDetailRows} + +${allResults.length > 1 ? ` +

πŸ“ˆ Model Comparison

+ + + + + ${comparisonRows} +
ModelDatePassedAccuracyF1Time
+` : ''} + + + +
+ +`; +} + +function escHtml(str) { + return String(str || '').replace(/&/g, '&').replace(//g, '>').replace(/"/g, '"'); +} + +// Run if called directly +if (require.main === module) { + generateReport(); +} + +module.exports = { generateReport }; diff --git a/skills/analysis/smarthome-bench/scripts/run-benchmark.cjs b/skills/analysis/smarthome-bench/scripts/run-benchmark.cjs new file mode 100644 index 0000000..8db00ea --- /dev/null +++ b/skills/analysis/smarthome-bench/scripts/run-benchmark.cjs @@ -0,0 +1,825 @@ +#!/usr/bin/env node +/** + * SmartHome-Bench β€” Video Anomaly Detection Benchmark + * + * Evaluates VLM models on video anomaly detection across 7 smart home categories: + * - Wildlife, Senior Care, Baby Monitoring, Pet Monitoring, + * Home Security, Package Delivery, General Activity + * + * Based on SmartHome-Bench (https://github.com/Xinyi-0724/SmartHome-Bench-LLM) + * + * ## Skill Protocol (when spawned by Aegis) + * + * Aegis β†’ Skill (env vars): + * AEGIS_VLM_URL β€” VLM server URL (e.g. http://localhost:5405) + * AEGIS_SKILL_PARAMS β€” JSON params from skill config + * AEGIS_SKILL_ID β€” Skill ID + * + * Skill β†’ Aegis (stdout, JSON lines): + * {"event": "ready", "model": "SmolVLM2-2.2B"} + * {"event": "suite_start", "suite": "Wildlife"} + * {"event": "test_result", "suite": "...", "test": "...", "status": "pass", "timeMs": 1234} + * {"event": "suite_end", "suite": "...", "passed": 12, "failed": 3} + * {"event": "complete", "passed": 78, "total": 105, "timeMs": 480000} + * + * Standalone usage: + * node run-benchmark.cjs [options] + * --vlm URL VLM server (required) + * --max-videos N Max videos to evaluate (default: 50) + * --mode MODE subset or full (default: subset) + * --categories L Comma-separated category filter + * --skip-download Use cached videos only + * --out DIR Results directory + * --no-open Don't auto-open report + */ + +const fs = require('fs'); +const path = require('path'); +const os = require('os'); +const { execSync, spawnSync } = require('child_process'); + +// ─── Config: Aegis env vars β†’ CLI args β†’ defaults ──────────────────────────── + +const args = process.argv.slice(2); +function getArg(name, defaultVal) { + const idx = args.indexOf(`--${name}`); + if (idx === -1) return defaultVal; + return args[idx + 1] || defaultVal; +} + +// ─── Help ───────────────────────────────────────────────────────────────────── +if (args.includes('--help') || args.includes('-h')) { + console.log(` +SmartHome-Bench β€” Video Anomaly Detection Benchmark β€’ DeepCamera / SharpAI + +Usage: node scripts/run-benchmark.cjs [options] + +Options: + --vlm URL VLM server base URL (required) + --max-videos N Max videos to evaluate (default: 50) + --mode MODE subset or full (default: subset) + --categories L Comma-separated filter (default: all) + --skip-download Use cached videos only + --out DIR Results output directory (default: ~/.aegis-ai/smarthome-bench) + --no-open Don't auto-open report in browser + --report Force report generation + -h, --help Show this help message + +Environment Variables (set by Aegis): + AEGIS_VLM_URL VLM server base URL + AEGIS_SKILL_ID Skill identifier (enables skill mode) + AEGIS_SKILL_PARAMS JSON params from skill config + +Categories: Wildlife, Senior Care, Baby Monitoring, Pet Monitoring, + Home Security, Package Delivery, General Activity + `.trim()); + process.exit(0); +} + +// Parse skill parameters if running as Aegis skill +let skillParams = {}; +try { skillParams = JSON.parse(process.env.AEGIS_SKILL_PARAMS || '{}'); } catch { } + +const VLM_URL = process.env.AEGIS_VLM_URL || getArg('vlm', ''); +const VLM_MODEL = process.env.AEGIS_VLM_MODEL || ''; +const RESULTS_DIR = getArg('out', path.join(os.homedir(), '.aegis-ai', 'smarthome-bench')); +const VIDEO_CACHE_DIR = path.join(os.homedir(), '.aegis-ai', 'smarthome-bench', 'videos'); +const FRAMES_DIR = path.join(os.homedir(), '.aegis-ai', 'smarthome-bench', 'frames'); +const IS_SKILL_MODE = !!process.env.AEGIS_SKILL_ID; +const NO_OPEN = args.includes('--no-open') || skillParams.noOpen || false; +const SKIP_DOWNLOAD = args.includes('--skip-download'); +const FIXTURES_DIR = path.join(__dirname, '..', 'fixtures'); +const IDLE_TIMEOUT_MS = 60000; // VLM inference can be slow for multi-image + +// Mode & limits +const TEST_MODE = skillParams.mode || getArg('mode', 'subset'); +const MAX_VIDEOS = parseInt(skillParams.maxVideos || getArg('max-videos', '50'), 10) || 50; +const CATEGORIES_FILTER = (skillParams.categories || getArg('categories', 'all') || 'all').toLowerCase(); +const FRAMES_PER_VIDEO = 6; + +// ─── OpenAI SDK Client ────────────────────────────────────────────────────── +const OpenAI = require('openai'); + +const strip = (u) => u.replace(/\/v1\/?$/, ''); +const vlmClient = VLM_URL ? new OpenAI({ + apiKey: 'not-needed', + baseURL: `${strip(VLM_URL)}/v1`, +}) : null; + +// ─── Skill Protocol: JSON lines on stdout, human text on stderr ────────────── + +function emit(event) { + process.stdout.write(JSON.stringify(event) + '\n'); +} + +function log(msg) { + process.stderr.write(msg + '\n'); +} + +// ─── Test Framework ─────────────────────────────────────────────────────────── + +const suites = []; +let currentSuite = null; + +function suite(name, fn) { + suites.push({ name, fn, tests: [] }); +} + +const results = { + timestamp: new Date().toISOString(), + vlm: VLM_URL || null, + system: {}, + model: {}, + suites: [], + totals: { passed: 0, failed: 0, skipped: 0, total: 0, timeMs: 0 }, + tokenTotals: { prompt: 0, completion: 0, total: 0 }, + metrics: {}, +}; + +async function vlmCall(messages, opts = {}) { + if (!vlmClient) { + throw new Error('VLM client not configured β€” pass --vlm URL'); + } + + const model = opts.model || VLM_MODEL || undefined; + + const params = { + messages, + stream: true, + ...(model && { model }), + ...(opts.temperature !== undefined && { temperature: opts.temperature }), + max_completion_tokens: opts.maxTokens || 512, + }; + + const controller = new AbortController(); + const idleMs = opts.timeout || IDLE_TIMEOUT_MS; + let idleTimer = setTimeout(() => controller.abort(), idleMs); + const resetIdle = () => { clearTimeout(idleTimer); idleTimer = setTimeout(() => controller.abort(), idleMs); }; + + try { + const stream = await vlmClient.chat.completions.create(params, { + signal: controller.signal, + }); + + let content = ''; + let reasoningContent = ''; + let model = ''; + let usage = {}; + let tokenCount = 0; + + for await (const chunk of stream) { + resetIdle(); + if (chunk.model) model = chunk.model; + const delta = chunk.choices?.[0]?.delta; + if (delta?.content) content += delta.content; + if (delta?.reasoning_content) reasoningContent += delta.reasoning_content; + if (delta?.content || delta?.reasoning_content) { + tokenCount++; + if (tokenCount % 100 === 0) { + log(` … ${tokenCount} tokens received`); + } + } + if (chunk.usage) usage = chunk.usage; + } + + if (!content && reasoningContent) { + content = reasoningContent; + } + + results.tokenTotals.prompt += usage.prompt_tokens || 0; + results.tokenTotals.completion += usage.completion_tokens || 0; + results.tokenTotals.total += usage.total_tokens || 0; + + if (!results.model.vlm && model) results.model.vlm = model; + + return { content, usage, model }; + } finally { + clearTimeout(idleTimer); + } +} + +function stripThink(text) { + return text.replace(/[\s\S]*?<\/think>\s*/gi, '').trim(); +} + +function parseJSON(text) { + const cleaned = stripThink(text); + let jsonStr = cleaned; + const codeBlock = cleaned.match(/```(?:json)?\s*([\s\S]*?)\s*```/); + if (codeBlock) jsonStr = codeBlock[1]; + else { + const idx = cleaned.search(/[{[]/); + if (idx > 0) jsonStr = cleaned.slice(idx); + } + return JSON.parse(jsonStr.trim()); +} + +function assert(condition, msg) { + if (!condition) throw new Error(msg || 'Assertion failed'); +} + +async function runSuites() { + for (const s of suites) { + currentSuite = { name: s.name, tests: [], passed: 0, failed: 0, skipped: 0, timeMs: 0 }; + log(`\n${'─'.repeat(60)}`); + log(` ${s.name}`); + log(`${'─'.repeat(60)}`); + emit({ event: 'suite_start', suite: s.name }); + + await s.fn(); + + results.suites.push(currentSuite); + results.totals.passed += currentSuite.passed; + results.totals.failed += currentSuite.failed; + results.totals.skipped += currentSuite.skipped; + results.totals.total += currentSuite.tests.length; + + emit({ event: 'suite_end', suite: s.name, passed: currentSuite.passed, failed: currentSuite.failed, skipped: currentSuite.skipped, timeMs: currentSuite.timeMs }); + } +} + +async function test(name, fn) { + const testResult = { name, status: 'pass', timeMs: 0, detail: '', tokens: {} }; + const start = Date.now(); + try { + const detail = await fn(); + testResult.timeMs = Date.now() - start; + testResult.detail = detail || ''; + currentSuite.passed++; + log(` βœ… ${name} (${testResult.timeMs}ms)${detail ? ` β€” ${detail}` : ''}`); + } catch (err) { + testResult.timeMs = Date.now() - start; + testResult.status = 'fail'; + testResult.detail = err.message; + currentSuite.failed++; + log(` ❌ ${name} (${testResult.timeMs}ms) β€” ${err.message}`); + } + currentSuite.timeMs += testResult.timeMs; + currentSuite.tests.push(testResult); + emit({ event: 'test_result', suite: currentSuite.name, test: name, status: testResult.status, timeMs: testResult.timeMs, detail: testResult.detail.slice(0, 120) }); +} + +function skip(name, reason) { + currentSuite.skipped++; + currentSuite.tests.push({ name, status: 'skip', timeMs: 0, detail: reason }); + log(` ⏭️ ${name} β€” ${reason}`); + emit({ event: 'test_result', suite: currentSuite.name, test: name, status: 'skip', timeMs: 0, detail: reason }); +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// DISK SPACE CHECK +// ═══════════════════════════════════════════════════════════════════════════════ + +function checkDiskSpace(targetDir, requiredGB) { + try { + fs.mkdirSync(targetDir, { recursive: true }); + // Use df to check available space on the partition + const dfOutput = execSync(`df -k "${targetDir}"`, { encoding: 'utf8' }); + const lines = dfOutput.trim().split('\n'); + if (lines.length >= 2) { + const parts = lines[1].split(/\s+/); + const availableKB = parseInt(parts[3], 10); + if (!isNaN(availableKB)) { + const availableGB = availableKB / (1024 * 1024); + if (availableGB < requiredGB) { + log(` ❌ Insufficient disk space`); + log(` Required: ${requiredGB.toFixed(1)} GB`); + log(` Available: ${availableGB.toFixed(1)} GB`); + log(` Location: ${targetDir}`); + emit({ event: 'error', message: `Insufficient disk space: need ${requiredGB}GB, have ${availableGB.toFixed(1)}GB` }); + process.exit(1); + } + log(` πŸ’Ύ Disk: ${availableGB.toFixed(1)} GB available (need ${requiredGB} GB) βœ“`); + return availableGB; + } + } + } catch (err) { + log(` ⚠️ Could not check disk space: ${err.message} β€” proceeding anyway`); + } + return -1; // unknown +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// VIDEO ACQUISITION +// ═══════════════════════════════════════════════════════════════════════════════ + +function checkSystemDeps() { + const deps = {}; + try { + const ytVer = execSync('yt-dlp --version', { encoding: 'utf8' }).trim(); + deps.ytdlp = ytVer; + } catch { + deps.ytdlp = null; + } + try { + const ffVer = execSync('ffmpeg -version', { encoding: 'utf8' }).split('\n')[0]; + deps.ffmpeg = ffVer; + } catch { + deps.ffmpeg = null; + } + return deps; +} + +function downloadVideo(annotation) { + const videoFile = path.join(VIDEO_CACHE_DIR, `${annotation.id}.mp4`); + + // Already cached + if (fs.existsSync(videoFile)) { + return videoFile; + } + + log(` πŸ“₯ Downloading ${annotation.id}...`); + try { + const result = spawnSync('yt-dlp', [ + '-f', 'best[height<=720][ext=mp4]/best[height<=720]/best', + '--no-playlist', + '--socket-timeout', '30', + '--retries', '3', + '-o', videoFile, + annotation.youtube_url, + ], { + encoding: 'utf8', + timeout: 120000, // 2 minute timeout per video + stdio: ['pipe', 'pipe', 'pipe'], + }); + + if (result.status !== 0) { + throw new Error(result.stderr?.slice(-200) || 'yt-dlp failed'); + } + + if (!fs.existsSync(videoFile)) { + // yt-dlp may append extension β€” find the file + const files = fs.readdirSync(VIDEO_CACHE_DIR).filter(f => f.startsWith(annotation.id)); + if (files.length > 0) { + const actual = path.join(VIDEO_CACHE_DIR, files[0]); + if (actual !== videoFile) fs.renameSync(actual, videoFile); + } else { + throw new Error('Download completed but file not found'); + } + } + + return videoFile; + } catch (err) { + log(` ⚠️ Download failed for ${annotation.id}: ${err.message}`); + return null; + } +} + +function extractFrames(videoFile, videoId) { + const frameDir = path.join(FRAMES_DIR, videoId); + + // Check cache + if (fs.existsSync(frameDir)) { + const existing = fs.readdirSync(frameDir).filter(f => f.endsWith('.jpg')); + if (existing.length >= FRAMES_PER_VIDEO) { + return existing.sort().map(f => path.join(frameDir, f)); + } + } + + fs.mkdirSync(frameDir, { recursive: true }); + + try { + // Get video duration + const probeResult = spawnSync('ffmpeg', [ + '-i', videoFile, + '-f', 'null', '-', + ], { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 30000 }); + + // Parse duration from ffmpeg stderr + const durationMatch = (probeResult.stderr || '').match(/Duration:\s*(\d{2}):(\d{2}):(\d{2})\.(\d+)/); + let durationSec = 30; // default if parse fails + if (durationMatch) { + durationSec = parseInt(durationMatch[1]) * 3600 + + parseInt(durationMatch[2]) * 60 + + parseInt(durationMatch[3]) + + parseInt(durationMatch[4]) / 100; + } + + // Extract evenly-spaced frames + const interval = Math.max(0.5, durationSec / (FRAMES_PER_VIDEO + 1)); + const framePaths = []; + + for (let i = 1; i <= FRAMES_PER_VIDEO; i++) { + const timestamp = (interval * i).toFixed(2); + const framePath = path.join(frameDir, `frame_${String(i).padStart(2, '0')}.jpg`); + + const extractResult = spawnSync('ffmpeg', [ + '-ss', timestamp, + '-i', videoFile, + '-frames:v', '1', + '-q:v', '2', + '-y', + framePath, + ], { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 15000 }); + + if (extractResult.status === 0 && fs.existsSync(framePath)) { + framePaths.push(framePath); + } + } + + if (framePaths.length === 0) { + throw new Error('No frames extracted'); + } + + return framePaths; + } catch (err) { + log(` ⚠️ Frame extraction failed for ${videoId}: ${err.message}`); + return []; + } +} + +function framesToBase64(framePaths) { + return framePaths.map(fp => { + const data = fs.readFileSync(fp); + return data.toString('base64'); + }); +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// VLM EVALUATION +// ═══════════════════════════════════════════════════════════════════════════════ + +const ANOMALY_DETECTION_PROMPT = `You are a smart home security camera AI analyst. You are evaluating video footage from a home camera. + +You will be shown multiple frames sampled from a video clip captured by a smart home security camera. + +## Task +Analyze the frames and determine whether the footage shows a NORMAL event or an ABNORMAL (anomalous) event. + +## Anomaly Categories +- **Wildlife**: Unusual wild animal presence (bears, coyotes, snakes β€” NOT birds at feeder) +- **Senior Care**: Falls, wandering, inactivity, forgotten appliances +- **Baby Monitoring**: Unsupervised children near hazards, equipment failures +- **Pet Monitoring**: Escaped pets, property damage by pets, pet distress +- **Home Security**: Break-ins, theft attempts, trespassing, suspicious behavior +- **Package Delivery**: Package theft, misdelivery, rough handling +- **General Activity**: Unusual time activity, equipment malfunction, property damage + +## Response Format +Respond with ONLY valid JSON (no markdown, no explanation): +{"result": 0, "reasoning": "brief 1-sentence explanation"} + +Where: +- result: 0 = NORMAL (no anomaly), 1 = ABNORMAL (anomaly detected) +- reasoning: Brief explanation of your determination`; + +async function evaluateClip(annotation, framePaths) { + const base64Frames = framesToBase64(framePaths); + + // Build multi-image message content + const imageContent = base64Frames.map((b64, i) => ({ + type: 'image_url', + image_url: { + url: `data:image/jpeg;base64,${b64}`, + detail: 'low', + }, + })); + + const messages = [ + { role: 'system', content: ANOMALY_DETECTION_PROMPT }, + { + role: 'user', + content: [ + { type: 'text', text: `Analyze these ${base64Frames.length} frames from a smart home camera video. Is there an anomaly?` }, + ...imageContent, + ], + }, + ]; + + const response = await vlmCall(messages, { + temperature: 0.1, + maxTokens: 256, + }); + + return response; +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// SUITE BUILDER +// ═══════════════════════════════════════════════════════════════════════════════ + +function loadAnnotations() { + const raw = JSON.parse(fs.readFileSync(path.join(FIXTURES_DIR, 'annotations.json'), 'utf8')); + + // Apply category filter + let filtered = raw; + if (CATEGORIES_FILTER !== 'all') { + const allowed = CATEGORIES_FILTER.split(',').map(c => c.trim().toLowerCase()); + filtered = raw.filter(a => allowed.some(c => + a.category.toLowerCase().includes(c) || c.includes(a.category.toLowerCase()) + )); + } + + // Group by category + const byCategory = {}; + for (const a of filtered) { + const cat = a.category; + if (!byCategory[cat]) byCategory[cat] = []; + byCategory[cat].push(a); + } + + // Apply max videos limit (distribute evenly across categories) + if (TEST_MODE === 'subset' || MAX_VIDEOS < filtered.length) { + const categories = Object.keys(byCategory); + const perCategory = Math.max(2, Math.floor(MAX_VIDEOS / categories.length)); + for (const cat of categories) { + if (byCategory[cat].length > perCategory) { + // Keep balanced normal/abnormal + const normal = byCategory[cat].filter(a => a.anomaly_tag === 0); + const abnormal = byCategory[cat].filter(a => a.anomaly_tag === 1); + const halfPer = Math.ceil(perCategory / 2); + byCategory[cat] = [ + ...normal.slice(0, halfPer), + ...abnormal.slice(0, halfPer), + ].slice(0, perCategory); + } + } + } + + return byCategory; +} + +const CATEGORY_EMOJIS = { + 'Wildlife': '🦊', + 'Senior Care': 'πŸ‘΄', + 'Baby Monitoring': 'πŸ‘Ά', + 'Pet Monitoring': '🐾', + 'Home Security': 'πŸ”’', + 'Package Delivery': 'πŸ“¦', + 'General Activity': '🏠', +}; + +function buildSuites(annotationsByCategory) { + for (const [category, annotations] of Object.entries(annotationsByCategory)) { + const emoji = CATEGORY_EMOJIS[category] || 'πŸ“‹'; + suite(`${emoji} ${category}`, async () => { + for (const annotation of annotations) { + const expectedTag = annotation.anomaly_tag; + const expectedLabel = expectedTag === 0 ? 'Normal' : 'Abnormal'; + + await test(`${annotation.id} β†’ ${expectedLabel}`, async () => { + // Step 1: Download video + const videoFile = SKIP_DOWNLOAD + ? path.join(VIDEO_CACHE_DIR, `${annotation.id}.mp4`) + : downloadVideo(annotation); + + if (!videoFile || !fs.existsSync(videoFile)) { + skip(annotation.id, 'Video not available'); + throw new Error('Video download failed or not cached'); + } + + // Step 2: Extract frames + const framePaths = extractFrames(videoFile, annotation.id); + if (framePaths.length === 0) { + throw new Error('No frames extracted from video'); + } + + // Step 3: VLM evaluation + const response = await evaluateClip(annotation, framePaths); + const parsed = parseJSON(response.content); + + // Step 4: Compare prediction vs ground truth + const predicted = parsed.result; + assert(predicted === 0 || predicted === 1, `Invalid result: ${predicted}`); + assert(predicted === expectedTag, + `Expected ${expectedLabel} (${expectedTag}), got ${predicted === 0 ? 'Normal' : 'Abnormal'} (${predicted}). VLM: "${(parsed.reasoning || '').slice(0, 80)}"`); + + return `${predicted === 0 ? 'Normal' : 'Abnormal'} βœ“ β€” "${(parsed.reasoning || '').slice(0, 60)}"`; + }); + } + }); + } +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// METRICS COMPUTATION +// ═══════════════════════════════════════════════════════════════════════════════ + +function computeMetrics() { + const perCategory = {}; + let totalTP = 0, totalFP = 0, totalTN = 0, totalFN = 0; + + for (const s of results.suites) { + // Extract category name (remove emoji prefix) + const catName = s.name.replace(/^[^\w]+\s*/, '').trim(); + let tp = 0, fp = 0, tn = 0, fn = 0; + + for (const t of s.tests) { + // Parse expected from test name + const isExpectedAbnormal = t.name.includes('Abnormal'); + const isExpectedNormal = t.name.includes('Normal'); + const passed = t.status === 'pass'; + + if (isExpectedAbnormal && passed) tp++; // Correctly detected anomaly + else if (isExpectedNormal && passed) tn++; // Correctly classified normal + else if (isExpectedAbnormal && !passed) fn++; // Missed anomaly + else if (isExpectedNormal && !passed) fp++; // False alarm + } + + const accuracy = (tp + tn) / Math.max(1, tp + fp + tn + fn); + const precision = tp / Math.max(1, tp + fp); + const recall = tp / Math.max(1, tp + fn); + const f1 = precision + recall > 0 ? 2 * (precision * recall) / (precision + recall) : 0; + + perCategory[catName] = { tp, fp, tn, fn, accuracy, precision, recall, f1, total: tp + fp + tn + fn }; + totalTP += tp; totalFP += fp; totalTN += tn; totalFN += fn; + } + + const overall = { + tp: totalTP, fp: totalFP, tn: totalTN, fn: totalFN, + accuracy: (totalTP + totalTN) / Math.max(1, totalTP + totalFP + totalTN + totalFN), + precision: totalTP / Math.max(1, totalTP + totalFP), + recall: totalTP / Math.max(1, totalTP + totalFN), + }; + overall.f1 = overall.precision + overall.recall > 0 + ? 2 * (overall.precision * overall.recall) / (overall.precision + overall.recall) : 0; + + results.metrics = { perCategory, overall }; +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// MAIN +// ═══════════════════════════════════════════════════════════════════════════════ + +async function main() { + log(''); + log(' ╔══════════════════════════════════════════════════════════════╗'); + log(' β•‘ SmartHome-Bench β€” Video Anomaly Detection Benchmark β•‘'); + log(' β•‘ Based on SmartHome-Bench-LLM (1,203 videos, 7 cats) β•‘'); + log(' β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•'); + log(''); + + // Check VLM + if (!VLM_URL) { + log(' ❌ VLM server URL required. Pass --vlm http://localhost:5405'); + log(' This is a VLM-only benchmark (multi-frame video analysis).'); + process.exit(1); + } + + // Check system deps + const deps = checkSystemDeps(); + if (!SKIP_DOWNLOAD) { + if (!deps.ytdlp) { + log(' ❌ yt-dlp not found. Install: pip install yt-dlp'); + process.exit(1); + } + } + if (!deps.ffmpeg) { + log(' ❌ ffmpeg not found. Install: brew install ffmpeg'); + process.exit(1); + } + + // System info + results.system = { + platform: `${os.platform()} ${os.arch()}`, + cpus: os.cpus()[0]?.model || 'unknown', + totalRAM_GB: (os.totalmem() / 1073741824).toFixed(1), + node: process.version, + deps, + }; + + log(` VLM: ${VLM_URL}`); + log(` Mode: ${TEST_MODE} (max ${MAX_VIDEOS} videos)`); + log(` Filter: ${CATEGORIES_FILTER}`); + log(` Cache: ${VIDEO_CACHE_DIR}`); + log(` System: ${results.system.cpus} (${results.system.totalRAM_GB} GB RAM)`); + + // Emit ready + emit({ + event: 'ready', + model: VLM_MODEL || 'unknown', + system: results.system.cpus, + totalVideos: MAX_VIDEOS, + }); + + // Disk space check β€” full needs ~15GB, subset ~2GB + const requiredGB = TEST_MODE === 'full' ? 15 : 2; + checkDiskSpace(VIDEO_CACHE_DIR, requiredGB); + + // Ensure cache dirs + fs.mkdirSync(VIDEO_CACHE_DIR, { recursive: true }); + fs.mkdirSync(FRAMES_DIR, { recursive: true }); + + // Load and build suites + const annotationsByCategory = loadAnnotations(); + const totalClips = Object.values(annotationsByCategory).reduce((n, arr) => n + arr.length, 0); + log(`\n πŸ“Š Loaded ${totalClips} clips across ${Object.keys(annotationsByCategory).length} categories\n`); + + buildSuites(annotationsByCategory); + + // Run + const suiteStart = Date.now(); + await runSuites(); + results.totals.timeMs = Date.now() - suiteStart; + + // Compute metrics + computeMetrics(); + + // Summary + const { passed, failed, skipped, total, timeMs } = results.totals; + const tokPerSec = timeMs > 0 ? ((results.tokenTotals.total / (timeMs / 1000)).toFixed(1)) : '?'; + const overallAcc = (results.metrics.overall?.accuracy * 100 || 0).toFixed(1); + const overallF1 = (results.metrics.overall?.f1 * 100 || 0).toFixed(1); + + log(`\n${'═'.repeat(66)}`); + log(` RESULTS: ${passed}/${total} passed, ${failed} failed, ${skipped} skipped (${(timeMs / 1000).toFixed(1)}s)`); + log(` ACCURACY: ${overallAcc}% | F1: ${overallF1}%`); + log(` TOKENS: ${results.tokenTotals.total} total (${tokPerSec} tok/s)`); + log(` MODEL: ${results.model.vlm || 'unknown'}`); + log(`${'═'.repeat(66)}`); + + // Per-category breakdown + if (results.metrics.perCategory) { + log('\n Per-Category Breakdown:'); + log(` ${'Category'.padEnd(22)} ${'Acc'.padStart(6)} ${'Prec'.padStart(6)} ${'Rec'.padStart(6)} ${'F1'.padStart(6)} ${'TP'.padStart(4)} ${'FP'.padStart(4)} ${'TN'.padStart(4)} ${'FN'.padStart(4)}`); + log(` ${'─'.repeat(72)}`); + for (const [cat, m] of Object.entries(results.metrics.perCategory)) { + log(` ${cat.padEnd(22)} ${(m.accuracy * 100).toFixed(1).padStart(5)}% ${(m.precision * 100).toFixed(1).padStart(5)}% ${(m.recall * 100).toFixed(1).padStart(5)}% ${(m.f1 * 100).toFixed(1).padStart(5)}% ${String(m.tp).padStart(4)} ${String(m.fp).padStart(4)} ${String(m.tn).padStart(4)} ${String(m.fn).padStart(4)}`); + } + } + + if (failed > 0) { + log('\n Failures:'); + for (const s of results.suites) { + for (const t of s.tests) { + if (t.status === 'fail') log(` ❌ ${s.name} > ${t.name}: ${t.detail}`); + } + } + } + + // Save results + fs.mkdirSync(RESULTS_DIR, { recursive: true }); + const modelSlug = (results.model.vlm || 'unknown').replace(/[^a-zA-Z0-9_.-]/g, '_'); + const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const resultFile = path.join(RESULTS_DIR, `${modelSlug}_${ts}.json`); + fs.writeFileSync(resultFile, JSON.stringify(results, null, 2)); + log(`\n Results saved: ${resultFile}`); + + // Update index + const indexFile = path.join(RESULTS_DIR, 'index.json'); + let index = []; + try { index = JSON.parse(fs.readFileSync(indexFile, 'utf8')); } catch { } + index.push({ + file: path.basename(resultFile), + model: results.model.vlm || 'unknown', + timestamp: results.timestamp, + passed, failed, total, + accuracy: results.metrics.overall?.accuracy || 0, + f1: results.metrics.overall?.f1 || 0, + timeMs, + tokens: results.tokenTotals.total, + }); + fs.writeFileSync(indexFile, JSON.stringify(index, null, 2)); + + // Generate report + let reportPath = null; + log('\n Generating HTML report...'); + try { + const reportScript = path.join(__dirname, 'generate-report.cjs'); + reportPath = require(reportScript).generateReport(RESULTS_DIR); + log(` βœ… Report: ${reportPath}`); + + if (!NO_OPEN && !IS_SKILL_MODE && reportPath) { + try { + const openCmd = process.platform === 'darwin' ? 'open' : 'xdg-open'; + execSync(`${openCmd} "${reportPath}"`, { stdio: 'ignore' }); + log(` πŸ“‚ Opened in browser`); + } catch { + log(` ℹ️ Open manually: ${reportPath}`); + } + } + } catch (err) { + log(` ⚠️ Report generation failed: ${err.message}`); + } + + // Emit completion + emit({ + event: 'complete', + model: results.model.vlm, + passed, failed, skipped, total, + timeMs, + accuracy: results.metrics.overall?.accuracy || 0, + f1: results.metrics.overall?.f1 || 0, + tokens: results.tokenTotals.total, + tokPerSec: parseFloat(tokPerSec) || 0, + resultFile, + reportPath, + }); + + log(''); + process.exit(failed > 0 ? 1 : 0); +} + +// Run when executed directly +const isDirectRun = require.main === module || + (process.argv[1] && require('path').resolve(process.argv[1]) === __filename); + +if (isDirectRun) { + main().catch(err => { + log(`Fatal: ${err.message}`); + emit({ event: 'error', message: err.message }); + process.exit(1); + }); +} + +module.exports = { main }; diff --git a/skills/analysis/vlm-scene-analysis/SKILL.md b/skills/analysis/vlm-scene-analysis/SKILL.md deleted file mode 100644 index 5809a86..0000000 --- a/skills/analysis/vlm-scene-analysis/SKILL.md +++ /dev/null @@ -1,86 +0,0 @@ ---- -name: vlm-scene-analysis -description: "Offline scene understanding of recorded clips using vision language models" -version: 1.0.0 - -parameters: - - name: model - label: "VLM Model" - type: select - options: ["smolvlm2-500m", "qwen2.5-vl-3b", "gemma-3-4b", "llava-1.6-7b"] - default: "smolvlm2-500m" - group: Model - - - name: prompt - label: "Analysis Prompt" - type: string - default: "Describe what is happening in this security camera footage. Focus on people, vehicles, and any unusual activity." - group: Model - - - name: auto_analyze - label: "Auto-analyze New Clips" - type: boolean - default: true - group: Behavior - - - name: max_frames - label: "Frames per Clip" - type: number - min: 1 - max: 16 - default: 4 - description: "Number of frames sampled from each clip for analysis" - group: Performance - - - name: device - label: "Device" - type: select - options: ["auto", "cpu", "cuda", "mps"] - default: "auto" - group: Performance - -capabilities: - offline_analysis: - script: scripts/analyze.py - description: "VLM analysis of recorded video clips" ---- - -# VLM Scene Analysis - -Offline scene understanding using vision language models. Analyzes recorded clips to generate natural language descriptions of what happened β€” people, vehicles, activities, and anomalies. - -## What You Get - -- **Clip descriptions** β€” "Delivery driver places package at front door, rings doorbell, walks back to van" -- **Searchable** β€” search your clips by description: "find clips with dogs in the backyard" -- **Timeline badges** β€” analysis results shown on the timeline in SharpAI Aegis - -## Models - -| Model | Size | Speed | Quality | VRAM | -|-------|------|-------|---------|------| -| SmolVLM2 500M | 1 GB | ⚑ Fast | Good | 2 GB | -| Qwen2.5-VL 3B | 6 GB | Medium | Very Good | 6 GB | -| Gemma-3 4B | 8 GB | Medium | Very Good | 8 GB | -| LLaVA 1.6 7B | 14 GB | Slow | Excellent | 12 GB | - -## Protocol - -### Aegis β†’ Skill (stdin) -```jsonl -{"event": "clip_ready", "clip_id": "blink_403785_1709312400", "video_path": "/path/to/clip.mp4", "camera_id": "front_door", "camera_name": "Front Door", "duration_seconds": 15} -``` - -### Skill β†’ Aegis (stdout) -```jsonl -{"event": "ready", "model": "smolvlm2-500m", "device": "mps"} -{"event": "analysis_result", "clip_id": "blink_403785_1709312400", "description": "A delivery driver in a brown uniform approaches the front door carrying a medium-sized package. They place it on the porch, ring the doorbell, and return to their van parked in the driveway.", "objects": ["person", "package", "van"], "confidence": 0.9} -``` - -## Setup - -```bash -python3 -m venv .venv && source .venv/bin/activate -pip install -r requirements.txt -python scripts/download_model.py --model smolvlm2-500m -``` diff --git a/skills/analysis/vlm-scene-analysis/requirements.txt b/skills/analysis/vlm-scene-analysis/requirements.txt deleted file mode 100644 index 92b3055..0000000 --- a/skills/analysis/vlm-scene-analysis/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -# VLM Scene Analysis -llama-cpp-python>=0.3.0 -numpy>=1.24.0 -opencv-python-headless>=4.8.0 -Pillow>=10.0.0 diff --git a/skills/analysis/vlm-scene-analysis/scripts/analyze.py b/skills/analysis/vlm-scene-analysis/scripts/analyze.py deleted file mode 100644 index 46f8e23..0000000 --- a/skills/analysis/vlm-scene-analysis/scripts/analyze.py +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python3 -""" -VLM Scene Analysis Skill β€” Offline clip understanding via vision language models. - -Analyzes recorded video clips and generates natural language descriptions. -""" - -import sys -import json -import argparse -import signal -from pathlib import Path - - -def parse_args(): - parser = argparse.ArgumentParser(description="VLM Scene Analysis Skill") - parser.add_argument("--config", type=str) - parser.add_argument("--model", type=str, default="smolvlm2-500m") - parser.add_argument("--prompt", type=str, - default="Describe what is happening in this security camera footage. Focus on people, vehicles, and any unusual activity.") - parser.add_argument("--max-frames", type=int, default=4) - parser.add_argument("--device", type=str, default="auto") - return parser.parse_args() - - -def load_config(args): - if args.config and Path(args.config).exists(): - with open(args.config) as f: - return json.load(f) - return { - "model": args.model, - "prompt": args.prompt, - "max_frames": args.max_frames, - "device": args.device, - } - - -def emit(event): - print(json.dumps(event), flush=True) - - -def extract_frames(video_path, max_frames=4): - """Extract evenly spaced frames from a video clip.""" - import cv2 - cap = cv2.VideoCapture(video_path) - total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - if total <= 0: - cap.release() - return [] - - indices = [int(i * total / max_frames) for i in range(max_frames)] - frames = [] - for idx in indices: - cap.set(cv2.CAP_PROP_POS_FRAMES, idx) - ret, frame = cap.read() - if ret: - frames.append(frame) - cap.release() - return frames - - -def main(): - args = parse_args() - config = load_config(args) - - try: - from llama_cpp import Llama - from llama_cpp.llama_chat_format import MiniCPMv26ChatHandler - import cv2 - import base64 - - model_path = Path(f"models/{config['model']}.gguf") - if not model_path.exists(): - emit({"event": "error", "message": f"Model not found: {model_path}. Run: python scripts/download_model.py --model {config['model']}", "retriable": False}) - sys.exit(1) - - chat_handler = MiniCPMv26ChatHandler(clip_model_path=str(model_path.with_suffix(".mmproj"))) - llm = Llama(model_path=str(model_path), chat_handler=chat_handler, n_ctx=4096) - - emit({"event": "ready", "model": config["model"], "device": config.get("device", "cpu")}) - except Exception as e: - emit({"event": "error", "message": f"Failed to load model: {e}", "retriable": False}) - sys.exit(1) - - running = True - def handle_signal(s, f): - nonlocal running - running = False - signal.signal(signal.SIGTERM, handle_signal) - signal.signal(signal.SIGINT, handle_signal) - - for line in sys.stdin: - if not running: - break - line = line.strip() - if not line: - continue - try: - msg = json.loads(line) - except json.JSONDecodeError: - continue - - if msg.get("command") == "stop": - break - - if msg.get("event") == "clip_ready": - video_path = msg.get("video_path") - clip_id = msg.get("clip_id", "unknown") - camera_id = msg.get("camera_id", "unknown") - - if not video_path or not Path(video_path).exists(): - emit({"event": "error", "message": f"Video not found: {video_path}", "retriable": True}) - continue - - try: - frames = extract_frames(video_path, config.get("max_frames", 4)) - if not frames: - emit({"event": "error", "message": "No frames extracted", "retriable": True}) - continue - - # Encode frames as base64 for VLM - images = [] - for frame in frames: - _, buf = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 85]) - images.append(f"data:image/jpeg;base64,{base64.b64encode(buf).decode()}") - - content = [{"type": "text", "text": config["prompt"]}] - for img in images: - content.append({"type": "image_url", "image_url": {"url": img}}) - - result = llm.create_chat_completion(messages=[ - {"role": "user", "content": content} - ]) - - description = result["choices"][0]["message"]["content"] - emit({ - "event": "analysis_result", - "clip_id": clip_id, - "camera_id": camera_id, - "description": description, - "objects": [], # Could be extracted from description - "confidence": 0.9, - }) - except Exception as e: - emit({"event": "error", "message": f"Analysis error: {e}", "retriable": True}) - - -if __name__ == "__main__": - main() diff --git a/skills/detection/dinov3-grounding/SKILL.md b/skills/detection/dinov3-grounding/SKILL.md deleted file mode 100644 index 13ad2cd..0000000 --- a/skills/detection/dinov3-grounding/SKILL.md +++ /dev/null @@ -1,84 +0,0 @@ ---- -name: dinov3-grounding -description: "Open-vocabulary object detection using DINOv3 visual grounding" -version: 1.0.0 - -parameters: - - name: model - label: "Model" - type: select - options: ["dinov3-base", "dinov3-large"] - default: "dinov3-base" - group: Model - - - name: prompt - label: "Detection Prompt" - type: string - default: "person . car . dog . cat" - description: "Dot-separated object names to detect (open vocabulary)" - group: Model - - - name: box_threshold - label: "Box Threshold" - type: number - min: 0.1 - max: 1.0 - default: 0.3 - group: Model - - - name: text_threshold - label: "Text Threshold" - type: number - min: 0.1 - max: 1.0 - default: 0.25 - group: Model - - - name: device - label: "Device" - type: select - options: ["auto", "cpu", "cuda", "mps"] - default: "auto" - group: Performance - -capabilities: - live_detection: - script: scripts/ground.py - description: "Open-vocabulary grounding detection on live frames" - feature_extraction: - script: scripts/extract_features.py - description: "Patch-level DINOv3 feature extraction for similarity search" ---- - -# DINOv3 Visual Grounding - -Open-vocabulary object detection β€” describe what you want to find in natural language, and DINOv3 locates it. Unlike YOLO (fixed 80 classes), DINOv3 can detect **anything you describe**. - -## Use Cases - -- "person carrying a package" β†’ bounding box around delivery driver -- "red car" β†’ detects only red cars, ignores others -- "dog . cat . bird" β†’ multi-class open-vocabulary detection -- Annotation assistance β€” click a region, get patch-level feature similarity - -## Protocol - -### Aegis β†’ Skill (stdin) -```jsonl -{"event": "frame", "camera_id": "front_door", "frame_path": "/tmp/frame.jpg", "timestamp": "..."} -``` - -### Skill β†’ Aegis (stdout) -```jsonl -{"event": "ready", "model": "dinov3-base", "device": "mps"} -{"event": "detections", "camera_id": "front_door", "objects": [ - {"class": "person carrying package", "confidence": 0.87, "bbox": [100, 50, 300, 400]} -]} -``` - -## Setup - -```bash -python3 -m venv .venv && source .venv/bin/activate -pip install -r requirements.txt -``` diff --git a/skills/detection/dinov3-grounding/requirements.txt b/skills/detection/dinov3-grounding/requirements.txt deleted file mode 100644 index d0f78f2..0000000 --- a/skills/detection/dinov3-grounding/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -# DINOv3 Visual Grounding -groundingdino>=0.1.0 -torch>=2.0.0 -torchvision>=0.15.0 -numpy>=1.24.0 -opencv-python-headless>=4.8.0 -Pillow>=10.0.0 -supervision>=0.18.0 diff --git a/skills/detection/dinov3-grounding/scripts/ground.py b/skills/detection/dinov3-grounding/scripts/ground.py deleted file mode 100644 index 9812d44..0000000 --- a/skills/detection/dinov3-grounding/scripts/ground.py +++ /dev/null @@ -1,140 +0,0 @@ -#!/usr/bin/env python3 -""" -DINOv3 Visual Grounding Skill β€” Open-vocabulary object detection. - -Detects objects based on natural language prompts. -Communicates via JSON lines over stdin/stdout. -""" - -import sys -import json -import argparse -import signal -from pathlib import Path - - -def parse_args(): - parser = argparse.ArgumentParser(description="DINOv3 Grounding Skill") - parser.add_argument("--config", type=str, help="Config JSON path") - parser.add_argument("--model", type=str, default="dinov3-base", choices=["dinov3-base", "dinov3-large"]) - parser.add_argument("--prompt", type=str, default="person . car . dog . cat") - parser.add_argument("--box-threshold", type=float, default=0.3) - parser.add_argument("--text-threshold", type=float, default=0.25) - parser.add_argument("--device", type=str, default="auto") - return parser.parse_args() - - -def load_config(args): - if args.config and Path(args.config).exists(): - with open(args.config) as f: - return json.load(f) - return { - "model": args.model, - "prompt": args.prompt, - "box_threshold": args.box_threshold, - "text_threshold": args.text_threshold, - "device": args.device, - } - - -def select_device(pref): - if pref != "auto": - return pref - try: - import torch - if torch.cuda.is_available(): return "cuda" - if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): return "mps" - except ImportError: - pass - return "cpu" - - -def emit(event): - print(json.dumps(event), flush=True) - - -def main(): - args = parse_args() - config = load_config(args) - device = select_device(config.get("device", "auto")) - - try: - from groundingdino.util.inference import load_model, predict - import cv2 - import numpy as np - - model = load_model( - "groundingdino/config/GroundingDINO_SwinT_OGC.py", - f"weights/{config['model']}.pth" - ) - emit({"event": "ready", "model": config["model"], "device": device}) - except Exception as e: - emit({"event": "error", "message": f"Failed to load model: {e}", "retriable": False}) - sys.exit(1) - - running = True - def handle_signal(s, f): - nonlocal running - running = False - signal.signal(signal.SIGTERM, handle_signal) - signal.signal(signal.SIGINT, handle_signal) - - prompt = config.get("prompt", "person . car") - box_thresh = config.get("box_threshold", 0.3) - text_thresh = config.get("text_threshold", 0.25) - - for line in sys.stdin: - if not running: - break - line = line.strip() - if not line: - continue - try: - msg = json.loads(line) - except json.JSONDecodeError: - continue - - if msg.get("command") == "stop": - break - - if msg.get("event") == "frame": - frame_path = msg.get("frame_path") - if not frame_path or not Path(frame_path).exists(): - emit({"event": "error", "message": f"Frame not found: {frame_path}", "retriable": True}) - continue - - try: - import cv2 - image = cv2.imread(frame_path) - boxes, logits, phrases = predict( - model=model, - image=image, - caption=prompt, - box_threshold=box_thresh, - text_threshold=text_thresh, - ) - h, w = image.shape[:2] - objects = [] - for box, logit, phrase in zip(boxes, logits, phrases): - cx, cy, bw, bh = box.tolist() - x1 = int((cx - bw / 2) * w) - y1 = int((cy - bh / 2) * h) - x2 = int((cx + bw / 2) * w) - y2 = int((cy + bh / 2) * h) - objects.append({ - "class": phrase, - "confidence": round(float(logit), 3), - "bbox": [x1, y1, x2, y2], - }) - emit({ - "event": "detections", - "camera_id": msg.get("camera_id", "unknown"), - "timestamp": msg.get("timestamp", ""), - "objects": objects, - }) - except Exception as e: - emit({"event": "error", "message": f"Inference error: {e}", "retriable": True}) - - -if __name__ == "__main__": - main() diff --git a/skills/detection/person-recognition/SKILL.md b/skills/detection/person-recognition/SKILL.md deleted file mode 100644 index 606b9b9..0000000 --- a/skills/detection/person-recognition/SKILL.md +++ /dev/null @@ -1,79 +0,0 @@ ---- -name: person-recognition -description: "Person re-identification β€” track and recognize individuals across cameras" -version: 1.0.0 - -parameters: - - name: model - label: "ReID Model" - type: select - options: ["mgn-r50", "osnet-ain", "bot-r50"] - default: "mgn-r50" - group: Model - - - name: similarity_threshold - label: "Match Threshold" - type: number - min: 0.3 - max: 0.95 - default: 0.7 - group: Model - - - name: gallery_size - label: "Max Gallery Size" - type: number - min: 10 - max: 1000 - default: 100 - description: "Maximum number of known identities to track" - group: Model - - - name: device - label: "Device" - type: select - options: ["auto", "cpu", "cuda", "mps"] - default: "auto" - group: Performance - -capabilities: - live_detection: - script: scripts/detect.py - description: "Person re-identification on detected crops" ---- - -# Person Recognition (ReID) - -Tracks and re-identifies individuals across cameras and over time. When a person is detected, this skill extracts an appearance embedding and matches it against a gallery of known identities. - -## What You Get - -- **Cross-camera tracking** β€” recognize the same person across different cameras -- **Identity gallery** β€” save and label known individuals -- **Re-appearance alerts** β€” "Person X was last seen 2 hours ago at the front door" - -## Wraps - -This skill builds on DeepCamera's original `src/yolov7_reid/` module. - -## Protocol - -### Aegis β†’ Skill (stdin) -```jsonl -{"event": "frame", "camera_id": "...", "frame_path": "/tmp/frame.jpg", "detections": [{"class": "person", "bbox": [100, 50, 300, 400]}]} -``` - -### Skill β†’ Aegis (stdout) -```jsonl -{"event": "ready", "model": "mgn-r50", "gallery_size": 0} -{"event": "detections", "camera_id": "...", "objects": [ - {"class": "person", "bbox": [100, 50, 300, 400], "identity": "delivery_driver", "confidence": 0.85, "track_id": "p1"} -]} -{"event": "new_identity", "identity_id": "unknown_003", "snapshot_path": "/tmp/crop.jpg"} -``` - -## Setup - -```bash -python3 -m venv .venv && source .venv/bin/activate -pip install -r requirements.txt -``` diff --git a/skills/detection/person-recognition/requirements.txt b/skills/detection/person-recognition/requirements.txt deleted file mode 100644 index 934a4a5..0000000 --- a/skills/detection/person-recognition/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Person Recognition (ReID) -torchreid>=0.2.5 -torch>=2.0.0 -torchvision>=0.15.0 -numpy>=1.24.0 -opencv-python-headless>=4.8.0 -Pillow>=10.0.0 -scikit-learn>=1.3.0 diff --git a/skills/detection/person-recognition/scripts/detect.py b/skills/detection/person-recognition/scripts/detect.py deleted file mode 100644 index d4d7c20..0000000 --- a/skills/detection/person-recognition/scripts/detect.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env python3 -""" -Person Recognition (ReID) Skill β€” Track individuals across cameras. - -Extracts appearance embeddings from detected person crops and matches -against a gallery of known identities. -""" - -import sys -import json -import argparse -import signal -from pathlib import Path - - -def parse_args(): - parser = argparse.ArgumentParser(description="Person Recognition Skill") - parser.add_argument("--config", type=str) - parser.add_argument("--model", type=str, default="mgn-r50") - parser.add_argument("--threshold", type=float, default=0.7) - parser.add_argument("--gallery-size", type=int, default=100) - parser.add_argument("--device", type=str, default="auto") - return parser.parse_args() - - -def load_config(args): - if args.config and Path(args.config).exists(): - with open(args.config) as f: - return json.load(f) - return { - "model": args.model, - "similarity_threshold": args.threshold, - "gallery_size": args.gallery_size, - "device": args.device, - } - - -def select_device(pref): - if pref != "auto": - return pref - try: - import torch - if torch.cuda.is_available(): return "cuda" - if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): return "mps" - except ImportError: - pass - return "cpu" - - -def emit(event): - print(json.dumps(event), flush=True) - - -class IdentityGallery: - """Simple in-memory gallery of known person embeddings.""" - - def __init__(self, max_size=100, threshold=0.7): - self.embeddings = {} # identity_id -> embedding - self.labels = {} # identity_id -> label - self.max_size = max_size - self.threshold = threshold - self._next_id = 0 - - def match(self, embedding): - """Find the closest matching identity, or create new one.""" - import numpy as np - - best_id = None - best_sim = 0.0 - - for identity_id, stored_emb in self.embeddings.items(): - sim = float(np.dot(embedding, stored_emb) / - (np.linalg.norm(embedding) * np.linalg.norm(stored_emb) + 1e-8)) - if sim > best_sim: - best_sim = sim - best_id = identity_id - - if best_sim >= self.threshold and best_id is not None: - return best_id, self.labels.get(best_id, best_id), best_sim - - # New identity - if len(self.embeddings) < self.max_size: - new_id = f"person_{self._next_id:04d}" - self._next_id += 1 - self.embeddings[new_id] = embedding - self.labels[new_id] = new_id - return new_id, new_id, 1.0 - - return None, "unknown", 0.0 - - -def main(): - args = parse_args() - config = load_config(args) - device = select_device(config.get("device", "auto")) - - try: - import torchreid - import torch - import cv2 - import numpy as np - - extractor = torchreid.utils.FeatureExtractor( - model_name="osnet_ain_x1_0", - device=device, - ) - gallery = IdentityGallery( - max_size=config.get("gallery_size", 100), - threshold=config.get("similarity_threshold", 0.7), - ) - emit({"event": "ready", "model": config["model"], "device": device, "gallery_size": 0}) - except Exception as e: - emit({"event": "error", "message": f"Failed to load model: {e}", "retriable": False}) - sys.exit(1) - - running = True - def handle_signal(s, f): - nonlocal running - running = False - signal.signal(signal.SIGTERM, handle_signal) - signal.signal(signal.SIGINT, handle_signal) - - for line in sys.stdin: - if not running: - break - line = line.strip() - if not line: - continue - try: - msg = json.loads(line) - except json.JSONDecodeError: - continue - - if msg.get("command") == "stop": - break - - if msg.get("event") == "frame": - frame_path = msg.get("frame_path") - detections = msg.get("detections", []) - camera_id = msg.get("camera_id", "unknown") - - if not frame_path or not Path(frame_path).exists(): - continue - - try: - image = cv2.imread(frame_path) - results = [] - - for det in detections: - if det.get("class") != "person": - results.append(det) - continue - - x1, y1, x2, y2 = det["bbox"] - crop = image[max(0, y1):y2, max(0, x1):x2] - if crop.size == 0: - continue - - crop_rgb = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB) - features = extractor([crop_rgb]) - embedding = features[0].cpu().numpy() - - identity_id, label, confidence = gallery.match(embedding) - results.append({ - **det, - "identity": label, - "identity_id": identity_id, - "confidence": round(confidence, 3), - "track_id": identity_id, - }) - - emit({ - "event": "detections", - "camera_id": camera_id, - "timestamp": msg.get("timestamp", ""), - "objects": results, - }) - except Exception as e: - emit({"event": "error", "message": f"ReID error: {e}", "retriable": True}) - - -if __name__ == "__main__": - main() diff --git a/skills/detection/yolo-detection-2026/SKILL.md b/skills/detection/yolo-detection-2026/SKILL.md index 95ff307..278d924 100644 --- a/skills/detection/yolo-detection-2026/SKILL.md +++ b/skills/detection/yolo-detection-2026/SKILL.md @@ -6,6 +6,12 @@ icon: assets/icon.png entry: scripts/detect.py deploy: deploy.sh +requirements: + python: ">=3.9" + ultralytics: ">=8.3.0" + torch: ">=2.4.0" + platforms: ["linux", "macos", "windows"] + parameters: - name: auto_start label: "Auto Start" @@ -27,7 +33,7 @@ parameters: type: number min: 0.1 max: 1.0 - default: 0.5 + default: 0.8 group: Model - name: classes diff --git a/skills/detection/yolo-detection-2026/config.yaml b/skills/detection/yolo-detection-2026/config.yaml index 742146f..62f8225 100644 --- a/skills/detection/yolo-detection-2026/config.yaml +++ b/skills/detection/yolo-detection-2026/config.yaml @@ -23,7 +23,7 @@ params: - key: confidence label: Confidence Threshold type: number - default: 0.5 + default: 0.8 description: "Minimum detection confidence (0.1–1.0)" - key: fps diff --git a/skills/detection/yolo-detection-2026/deploy.sh b/skills/detection/yolo-detection-2026/deploy.sh index a56dfdf..4cd7840 100755 --- a/skills/detection/yolo-detection-2026/deploy.sh +++ b/skills/detection/yolo-detection-2026/deploy.sh @@ -84,15 +84,30 @@ PIP="$VENV_DIR/bin/pip" emit '{"event": "progress", "stage": "venv", "message": "Virtual environment ready"}' +# ─── Step 2.5: Bundle env_config.py alongside detect.py ───────────────────── + +if [ -n "$LIB_DIR" ] && [ -f "$LIB_DIR/env_config.py" ]; then + cp "$LIB_DIR/env_config.py" "$SKILL_DIR/scripts/env_config.py" + log "Bundled env_config.py into scripts/" +fi + # ─── Step 3: Detect hardware via env_config ───────────────────────────────── BACKEND="cpu" -if [ -n "$LIB_DIR" ] && [ -f "$LIB_DIR/env_config.py" ]; then +# Find env_config.py β€” bundled copy or repo lib/ +ENV_CONFIG_DIR="" +if [ -f "$SKILL_DIR/scripts/env_config.py" ]; then + ENV_CONFIG_DIR="$SKILL_DIR/scripts" +elif [ -n "$LIB_DIR" ] && [ -f "$LIB_DIR/env_config.py" ]; then + ENV_CONFIG_DIR="$LIB_DIR" +fi + +if [ -n "$ENV_CONFIG_DIR" ]; then log "Detecting hardware via env_config.py..." DETECT_OUTPUT=$("$VENV_DIR/bin/python" -c " import sys -sys.path.insert(0, '$LIB_DIR') +sys.path.insert(0, '$ENV_CONFIG_DIR') from env_config import HardwareEnv env = HardwareEnv.detect() print(env.backend) @@ -114,14 +129,14 @@ print(env.backend) else log "env_config.py not found, using heuristic detection..." - # Fallback: inline GPU detection (same as before) + # Fallback: inline GPU detection if command -v nvidia-smi &>/dev/null; then cuda_ver=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -1) if [ -n "$cuda_ver" ]; then BACKEND="cuda" log "Detected NVIDIA GPU (driver: $cuda_ver)" fi - elif command -v rocm-smi &>/dev/null || [ -d "/opt/rocm" ]; then + elif command -v amd-smi &>/dev/null || command -v rocm-smi &>/dev/null || [ -d "/opt/rocm" ]; then BACKEND="rocm" log "Detected AMD ROCm" elif [ "$(uname)" = "Darwin" ] && [ "$(uname -m)" = "arm64" ]; then @@ -145,7 +160,59 @@ fi log "Installing dependencies from $REQ_FILE ..." emit "{\"event\": \"progress\", \"stage\": \"install\", \"message\": \"Installing $BACKEND dependencies...\"}" -"$PIP" install -r "$REQ_FILE" -q 2>&1 | tail -5 >&2 +if [ "$BACKEND" = "rocm" ]; then + # ROCm: detect installed version for correct PyTorch index URL + ROCM_VER="" + if [ -f /opt/rocm/.info/version ]; then + ROCM_VER=$(head -1 /opt/rocm/.info/version | grep -oE '[0-9]+\.[0-9]+') + elif command -v amd-smi &>/dev/null; then + ROCM_VER=$(amd-smi version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+' | head -1) + elif command -v rocminfo &>/dev/null; then + ROCM_VER=$(rocminfo 2>/dev/null | grep -i "HSA Runtime" | grep -oE '[0-9]+\.[0-9]+' | head -1) + fi + ROCM_VER="${ROCM_VER:-6.2}" # fallback if detection fails + log "Detected ROCm version: $ROCM_VER" + + # Build list of ROCm versions to try (detected β†’ step down β†’ previous major) + ROCM_MAJOR=$(echo "$ROCM_VER" | cut -d. -f1) + ROCM_MINOR=$(echo "$ROCM_VER" | cut -d. -f2) + ROCM_CANDIDATES="$ROCM_VER" + m=$((ROCM_MINOR - 1)) + while [ "$m" -ge 0 ]; do + ROCM_CANDIDATES="$ROCM_CANDIDATES ${ROCM_MAJOR}.${m}" + m=$((m - 1)) + done + # Also try previous major version (e.g., 6.4, 6.2 if on 7.x) + prev_major=$((ROCM_MAJOR - 1)) + for pm in 4 3 2 1 0; do + ROCM_CANDIDATES="$ROCM_CANDIDATES ${prev_major}.${pm}" + done + + # Phase 1: Try each candidate until PyTorch installs successfully + TORCH_INSTALLED=false + for ver in $ROCM_CANDIDATES; do + log "Trying PyTorch for ROCm $ver ..." + if "$PIP" install torch torchvision --index-url "https://download.pytorch.org/whl/rocm${ver}" -q 2>&1; then + log "Installed PyTorch with ROCm $ver support" + TORCH_INSTALLED=true + break + fi + done + + if [ "$TORCH_INSTALLED" = false ]; then + log "WARNING: No PyTorch ROCm wheels found, installing CPU PyTorch from PyPI" + "$PIP" install torch torchvision -q 2>&1 | tail -3 >&2 + fi + + # Phase 2: remaining packages (ultralytics, onnxruntime-rocm, etc.) + "$PIP" install ultralytics onnxruntime-rocm 'onnx>=1.12.0,<2.0.0' 'onnxslim>=0.1.71' \ + 'numpy>=1.24.0' 'opencv-python-headless>=4.8.0' 'Pillow>=10.0.0' -q 2>&1 | tail -3 >&2 + + # Prevent ultralytics from auto-installing CPU onnxruntime during export + export YOLO_AUTOINSTALL=0 +else + "$PIP" install -r "$REQ_FILE" -q 2>&1 | tail -5 >&2 +fi # ─── Step 5: Pre-convert model to optimized format ─────────────────────────── @@ -155,7 +222,7 @@ if [ "$BACKEND" != "cpu" ] || [ -f "$SKILL_DIR/requirements_cpu.txt" ]; then "$VENV_DIR/bin/python" -c " import sys -sys.path.insert(0, '$LIB_DIR') +sys.path.insert(0, '$ENV_CONFIG_DIR') from env_config import HardwareEnv env = HardwareEnv.detect() @@ -184,7 +251,7 @@ fi log "Verifying installation..." "$VENV_DIR/bin/python" -c " import sys -sys.path.insert(0, '$LIB_DIR') +sys.path.insert(0, '$ENV_CONFIG_DIR') from env_config import HardwareEnv import json diff --git a/skills/detection/yolo-detection-2026/requirements.txt b/skills/detection/yolo-detection-2026/requirements.txt index 641ea23..77ef373 100644 --- a/skills/detection/yolo-detection-2026/requirements.txt +++ b/skills/detection/yolo-detection-2026/requirements.txt @@ -2,6 +2,10 @@ # Install: pip install -r requirements.txt ultralytics>=8.3.0 # YOLOv11/v10/v8 inference -numpy>=1.24.0 +numpy>=1.24.0,<2.0.0 opencv-python-headless>=4.8.0 Pillow>=10.0.0 +# GPU inference β€” install ONE of these (not both!): +# AMD ROCm: pip install onnxruntime-rocm (do NOT install onnxruntime alongside) +# NVIDIA: handled by ultralytics (tensorrt) +# CPU only: pip install onnxruntime diff --git a/skills/detection/yolo-detection-2026/requirements_mps.txt b/skills/detection/yolo-detection-2026/requirements_mps.txt index eb018ea..a9e282f 100644 --- a/skills/detection/yolo-detection-2026/requirements_mps.txt +++ b/skills/detection/yolo-detection-2026/requirements_mps.txt @@ -4,7 +4,7 @@ torch>=2.4.0 torchvision>=0.19.0 ultralytics>=8.3.0 coremltools>=8.0 -numpy>=1.24.0 +numpy>=1.24.0,<2.0.0 opencv-python-headless>=4.8.0 Pillow>=10.0.0 diff --git a/skills/detection/yolo-detection-2026/requirements_rocm.txt b/skills/detection/yolo-detection-2026/requirements_rocm.txt index 0d0ca7f..00a2909 100644 --- a/skills/detection/yolo-detection-2026/requirements_rocm.txt +++ b/skills/detection/yolo-detection-2026/requirements_rocm.txt @@ -1,10 +1,12 @@ # YOLO 2026 β€” ROCm (AMD GPU) requirements -# Installs PyTorch with ROCm 6.2 support ---extra-index-url https://download.pytorch.org/whl/rocm6.2 +# NOTE: deploy.sh auto-detects the installed ROCm version and installs +# PyTorch from the matching index URL. This file is a reference manifest. torch>=2.4.0 torchvision>=0.19.0 ultralytics>=8.3.0 onnxruntime-rocm>=1.18 +onnx>=1.12.0,<2.0.0 # pre-install: prevents ultralytics from auto-installing CPU onnxruntime +onnxslim>=0.1.71 # pre-install: same reason numpy>=1.24.0 opencv-python-headless>=4.8.0 Pillow>=10.0.0 diff --git a/skills/detection/yolo-detection-2026/scripts/detect.py b/skills/detection/yolo-detection-2026/scripts/detect.py index 5608b64..d149374 100644 --- a/skills/detection/yolo-detection-2026/scripts/detect.py +++ b/skills/detection/yolo-detection-2026/scripts/detect.py @@ -15,15 +15,73 @@ """ import sys +import os import json import argparse import signal import time from pathlib import Path -# Add skills/lib to path for shared modules -sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent.parent / "lib")) -from env_config import HardwareEnv # noqa: E402 +# Prevent ultralytics from auto-installing packages (e.g. onnxruntime-gpu on ROCm) +os.environ.setdefault("YOLO_AUTOINSTALL", "0") + +# Import env_config β€” try multiple locations: +# 1. Same directory as detect.py (bundled copy) +# 2. DeepCamera repo: skills/lib/ +# 3. Inline fallback (basic PyTorch-only mode) +_script_dir = Path(__file__).resolve().parent +_lib_candidates = [ + _script_dir, # bundled alongside detect.py + _script_dir.parent.parent.parent.parent / "lib", # repo: skills/lib/ + _script_dir.parent / "lib", # skill-level lib/ +] +_env_config_loaded = False +for _lib_path in _lib_candidates: + if (_lib_path / "env_config.py").exists(): + sys.path.insert(0, str(_lib_path)) + from env_config import HardwareEnv # noqa: E402 + _env_config_loaded = True + break + +if not _env_config_loaded: + # Minimal fallback β€” PyTorch only, no optimization + import types + _msg = "[YOLO-2026] WARNING: env_config.py not found, using PyTorch-only fallback" + print(_msg, file=sys.stderr, flush=True) + + class HardwareEnv: + def __init__(self): + self.backend = "cpu" + self.device = "cpu" + self.export_format = "none" + self.gpu_name = "" + self.gpu_memory_mb = 0 + self.driver_version = "" + self.framework_ok = False + self.export_ms = 0.0 + self.load_ms = 0.0 + + @staticmethod + def detect(): + import torch + env = HardwareEnv() + if torch.cuda.is_available(): + env.backend = "cuda"; env.device = "cuda" + elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): + env.backend = "mps"; env.device = "mps" + return env + + def load_optimized(self, model_name, use_optimized=True): + import time + from ultralytics import YOLO + t0 = time.perf_counter() + model = YOLO(f"{model_name}.pt") + model.to(self.device) + self.load_ms = (time.perf_counter() - t0) * 1000 + return model, "pytorch" + + def to_dict(self): + return {"backend": self.backend, "device": self.device} # Model size β†’ ultralytics model name mapping (YOLO26, released Jan 2026) @@ -118,7 +176,7 @@ def parse_args(): parser.add_argument("--config", type=str, help="Path to config JSON file") parser.add_argument("--model-size", type=str, default="nano", choices=["nano", "small", "medium", "large"]) - parser.add_argument("--confidence", type=float, default=0.5) + parser.add_argument("--confidence", type=float, default=0.8) parser.add_argument("--classes", type=str, default="person,car,dog,cat") parser.add_argument("--device", type=str, default="auto", choices=["auto", "cpu", "cuda", "mps", "rocm"]) @@ -169,7 +227,7 @@ def main(): config = load_config(args) model_size = config.get("model_size", "nano") - confidence = config.get("confidence", 0.5) + confidence = config.get("confidence", 0.8) fps = config.get("fps", 5) use_optimized = config.get("use_optimized", config.get("use_coreml", True)) if isinstance(use_optimized, str): diff --git a/skills/detection/yolo-detection-2026/scripts/env_config.py b/skills/detection/yolo-detection-2026/scripts/env_config.py new file mode 100644 index 0000000..ff42e6f --- /dev/null +++ b/skills/detection/yolo-detection-2026/scripts/env_config.py @@ -0,0 +1,528 @@ +""" +env_config.py β€” Shared hardware environment detection and model optimization. + +Provides a single entry point for any DeepCamera skill to: + 1. Detect available compute hardware (NVIDIA, AMD, Apple, Intel, CPU) + 2. Auto-export models to the optimal inference format + 3. Load cached optimized models with PyTorch fallback + +Usage: + from lib.env_config import HardwareEnv + + env = HardwareEnv.detect() + model, fmt = env.load_optimized("yolo26n") +""" + +import json +import os +import platform +import shutil +import subprocess +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + + +def _log(msg: str): + """Log to stderr.""" + print(f"[env_config] {msg}", file=sys.stderr, flush=True) + + +# ─── Backend definitions ──────────────────────────────────────────────────── + +@dataclass +class BackendSpec: + """Specification for a compute backend's optimized export.""" + name: str # "cuda", "rocm", "mps", "intel", "cpu" + export_format: str # ultralytics export format string + model_suffix: str # file extension/dir to look for cached model + half: bool = True # use FP16 + extra_export_args: dict = field(default_factory=dict) + + +BACKEND_SPECS = { + "cuda": BackendSpec( + name="cuda", + export_format="engine", + model_suffix=".engine", + half=True, + ), + "rocm": BackendSpec( + name="rocm", + export_format="pytorch", # PyTorch + HIP β€” ultralytics ONNX doesn't support ROCMExecutionProvider + model_suffix=".pt", + half=False, + ), + "mps": BackendSpec( + name="mps", + export_format="coreml", + model_suffix=".mlpackage", + half=True, + extra_export_args={"nms": False}, + ), + "intel": BackendSpec( + name="intel", + export_format="openvino", + model_suffix="_openvino_model", + half=True, + ), + "cpu": BackendSpec( + name="cpu", + export_format="onnx", + model_suffix=".onnx", + half=False, + ), +} + + +# ─── Hardware detection ────────────────────────────────────────────────────── + +@dataclass +class HardwareEnv: + """Detected hardware environment with model optimization capabilities.""" + + backend: str = "cpu" # "cuda" | "rocm" | "mps" | "intel" | "cpu" + device: str = "cpu" # torch device string + export_format: str = "onnx" # optimal export format + gpu_name: str = "" # human-readable GPU name + gpu_memory_mb: int = 0 # GPU memory in MB + driver_version: str = "" # GPU driver version + framework_ok: bool = False # True if optimized runtime is importable + detection_details: dict = field(default_factory=dict) # raw detection info + + # Timing (populated by export/load) + export_ms: float = 0.0 + load_ms: float = 0.0 + + @staticmethod + def detect() -> "HardwareEnv": + """Probe the system and return a populated HardwareEnv.""" + env = HardwareEnv() + + # Try each backend in priority order + if env._try_cuda(): + pass + elif env._try_rocm(): + pass + elif env._try_mps(): + pass + elif env._try_intel(): + pass + else: + env._fallback_cpu() + + # Set export format from backend spec + spec = BACKEND_SPECS.get(env.backend, BACKEND_SPECS["cpu"]) + env.export_format = spec.export_format + + # Check if optimized runtime is available + env.framework_ok = env._check_framework() + + _log(f"Detected: backend={env.backend}, device={env.device}, " + f"gpu={env.gpu_name or 'none'}, " + f"format={env.export_format}, " + f"framework_ok={env.framework_ok}") + + return env + + def _try_cuda(self) -> bool: + """Detect NVIDIA GPU via nvidia-smi and torch.""" + if not shutil.which("nvidia-smi"): + return False + try: + result = subprocess.run( + ["nvidia-smi", "--query-gpu=name,memory.total,driver_version", + "--format=csv,noheader,nounits"], + capture_output=True, text=True, timeout=10, + ) + if result.returncode != 0: + return False + + line = result.stdout.strip().split("\n")[0] + parts = [p.strip() for p in line.split(",")] + if len(parts) >= 3: + self.backend = "cuda" + self.device = "cuda" + self.gpu_name = parts[0] + self.gpu_memory_mb = int(float(parts[1])) + self.driver_version = parts[2] + self.detection_details["nvidia_smi"] = line + _log(f"NVIDIA GPU: {self.gpu_name} ({self.gpu_memory_mb}MB, driver {self.driver_version})") + return True + except (subprocess.TimeoutExpired, FileNotFoundError, ValueError) as e: + _log(f"nvidia-smi probe failed: {e}") + return False + + def _try_rocm(self) -> bool: + """Detect AMD GPU via amd-smi (preferred) or rocm-smi.""" + has_amd_smi = shutil.which("amd-smi") is not None + has_rocm_smi = shutil.which("rocm-smi") is not None + has_rocm_dir = Path("/opt/rocm").is_dir() + + if not (has_amd_smi or has_rocm_smi or has_rocm_dir): + return False + + self.backend = "rocm" + # ROCm exposes as CUDA in PyTorch β€” but only if PyTorch-ROCm is installed + try: + import torch + if torch.cuda.is_available(): + self.device = "cuda" + else: + self.device = "cpu" + _log("PyTorch CUDA/ROCm not available, using CPU for PyTorch fallback") + except ImportError: + self.device = "cpu" + + # Strategy 1: amd-smi static --json (ROCm 6.3+/7.x, richest output) + if has_amd_smi: + try: + result = subprocess.run( + ["amd-smi", "static", "--json"], + capture_output=True, text=True, timeout=10, + ) + if result.returncode == 0: + import json as _json + data = _json.loads(result.stdout) + # amd-smi may return {"gpu_data": [...]} or a bare list + gpu_list = data.get("gpu_data", data) if isinstance(data, dict) else data + if isinstance(gpu_list, list) and len(gpu_list) > 0: + # Pick GPU with most VRAM (discrete > iGPU) + def _vram_mb(g): + vram = g.get("vram", {}).get("size", {}) + if isinstance(vram, dict): + return int(vram.get("value", 0)) + return 0 + + best_gpu = max(gpu_list, key=_vram_mb) + best_idx = gpu_list.index(best_gpu) + asic = best_gpu.get("asic", {}) + vram = best_gpu.get("vram", {}).get("size", {}) + + self.gpu_name = asic.get("market_name", "AMD GPU") + self.gpu_memory_mb = int(vram.get("value", 0)) if isinstance(vram, dict) else 0 + self.detection_details["amd_smi"] = { + "gpu_index": best_idx, + "gfx_version": asic.get("target_graphics_version", ""), + "total_gpus": len(gpu_list), + } + + # Pin to discrete GPU if multiple GPUs present + if len(gpu_list) > 1: + os.environ["HIP_VISIBLE_DEVICES"] = str(best_idx) + os.environ["ROCR_VISIBLE_DEVICES"] = str(best_idx) + _log(f"Multi-GPU: pinned to GPU {best_idx} ({self.gpu_name})") + except (subprocess.TimeoutExpired, FileNotFoundError, ValueError, Exception) as e: + _log(f"amd-smi probe failed: {e}") + + # Strategy 2: rocm-smi fallback (legacy ROCm <6.3) + if not self.gpu_name and has_rocm_smi: + try: + result = subprocess.run( + ["rocm-smi", "--showproductname", "--csv"], + capture_output=True, text=True, timeout=10, + ) + if result.returncode == 0: + lines = result.stdout.strip().split("\n") + if len(lines) > 1: + self.gpu_name = lines[1].split(",")[0].strip() + self.detection_details["rocm_smi"] = result.stdout.strip() + except (subprocess.TimeoutExpired, FileNotFoundError): + pass + + try: + result = subprocess.run( + ["rocm-smi", "--showmeminfo", "vram", "--csv"], + capture_output=True, text=True, timeout=10, + ) + if result.returncode == 0: + for line in result.stdout.strip().split("\n")[1:]: + parts = line.split(",") + if len(parts) >= 2: + try: + self.gpu_memory_mb = int(float(parts[0].strip()) / (1024 * 1024)) + except ValueError: + pass + break + except (subprocess.TimeoutExpired, FileNotFoundError): + pass + + _log(f"AMD ROCm GPU: {self.gpu_name or 'detected'} ({self.gpu_memory_mb}MB)") + return True + + def _try_mps(self) -> bool: + """Detect Apple Silicon via uname + sysctl.""" + if platform.system() != "Darwin" or platform.machine() != "arm64": + return False + + self.backend = "mps" + self.device = "mps" + + # Get chip name + try: + result = subprocess.run( + ["sysctl", "-n", "machdep.cpu.brand_string"], + capture_output=True, text=True, timeout=5, + ) + if result.returncode == 0: + self.gpu_name = result.stdout.strip() + except (subprocess.TimeoutExpired, FileNotFoundError): + self.gpu_name = "Apple Silicon" + + # Get total memory (shared with GPU on Apple Silicon) + try: + result = subprocess.run( + ["sysctl", "-n", "hw.memsize"], + capture_output=True, text=True, timeout=5, + ) + if result.returncode == 0: + self.gpu_memory_mb = int(int(result.stdout.strip()) / (1024 * 1024)) + except (subprocess.TimeoutExpired, FileNotFoundError, ValueError): + pass + + _log(f"Apple Silicon: {self.gpu_name} ({self.gpu_memory_mb}MB unified)") + return True + + def _try_intel(self) -> bool: + """Detect Intel OpenVINO-capable hardware.""" + # Check for OpenVINO installation + has_openvino = False + try: + import openvino # noqa: F401 + has_openvino = True + except ImportError: + # Check for system install + has_openvino = Path("/opt/intel/openvino").is_dir() + + if not has_openvino: + # Check CPU flags for Intel-specific features (AVX-512, AMX) + try: + if platform.system() == "Linux": + with open("/proc/cpuinfo") as f: + cpuinfo = f.read() + if "GenuineIntel" in cpuinfo: + self.backend = "intel" + self.device = "cpu" + self.gpu_name = "Intel CPU" + _log("Intel CPU detected (no OpenVINO installed)") + return True + except FileNotFoundError: + pass + return False + + self.backend = "intel" + self.device = "cpu" # OpenVINO handles device selection internally + self.gpu_name = "Intel (OpenVINO)" + + # Check for Intel GPU / NPU + try: + from openvino.runtime import Core + core = Core() + devices = core.available_devices + self.detection_details["openvino_devices"] = devices + if "GPU" in devices: + self.gpu_name = "Intel GPU (OpenVINO)" + if "NPU" in devices: + self.gpu_name = "Intel NPU (OpenVINO)" + _log(f"OpenVINO devices: {devices}") + except Exception: + pass + + _log(f"Intel: {self.gpu_name}") + return True + + def _fallback_cpu(self): + """CPU-only fallback.""" + self.backend = "cpu" + self.device = "cpu" + self.gpu_name = "" + + # Report CPU info + try: + self.detection_details["cpu"] = platform.processor() or "unknown" + except Exception: + pass + + _log("No GPU detected, using CPU backend") + + def _check_rocm_runtime(self): + """Verify onnxruntime has ROCm provider, not just CPU.""" + import onnxruntime + providers = onnxruntime.get_available_providers() + if "ROCmExecutionProvider" in providers or "MIGraphXExecutionProvider" in providers: + _log(f"onnxruntime ROCm providers: {providers}") + return True + _log(f"onnxruntime providers: {providers} β€” ROCmExecutionProvider not found") + _log("Fix: pip uninstall onnxruntime && pip install onnxruntime-rocm") + raise ImportError("ROCmExecutionProvider not available") + + def _check_framework(self) -> bool: + """Check if the optimized inference runtime is importable.""" + checks = { + "cuda": lambda: __import__("tensorrt"), + "rocm": lambda: self._check_rocm_runtime(), + "mps": lambda: __import__("coremltools"), + "intel": lambda: __import__("openvino"), + "cpu": lambda: __import__("onnxruntime"), + } + + check = checks.get(self.backend) + if not check: + return False + try: + check() + return True + except ImportError: + _log(f"Optimized runtime not installed for {self.backend}, " + f"will use PyTorch fallback") + return False + + # ─── Model export & loading ────────────────────────────────────────── + + def get_optimized_path(self, model_name: str) -> Path: + """Get the expected path for the optimized model.""" + spec = BACKEND_SPECS.get(self.backend, BACKEND_SPECS["cpu"]) + return Path(f"{model_name}{spec.model_suffix}") + + def export_model(self, model, model_name: str) -> Optional[Path]: + """Export PyTorch model to optimal format. Returns path or None.""" + if not self.framework_ok: + _log(f"Skipping export β€” {self.backend} runtime not available") + return None + + spec = BACKEND_SPECS.get(self.backend, BACKEND_SPECS["cpu"]) + optimized_path = self.get_optimized_path(model_name) + + # Already exported + if optimized_path.exists(): + _log(f"Cached model found: {optimized_path}") + return optimized_path + + # Guard: numpy 2.x breaks coremltools PyTorchβ†’MIL converter + # (TypeError: only 0-dimensional arrays can be converted to Python scalars) + if spec.export_format == "coreml": + try: + import numpy as np + np_major = int(np.__version__.split('.')[0]) + if np_major >= 2: + _log(f"numpy {np.__version__} detected β€” CoreML export " + f"requires numpy<2.0.0 (coremltools incompatibility)") + _log("Fix: pip install 'numpy>=1.24,<2.0'") + return None + except Exception: + pass # If numpy check fails, try export anyway + + try: + _log(f"Exporting {model_name}.pt β†’ {spec.export_format} " + f"(one-time, may take 30-120s)...") + t0 = time.perf_counter() + + export_kwargs = { + "format": spec.export_format, + "half": spec.half, + } + export_kwargs.update(spec.extra_export_args) + + exported = model.export(**export_kwargs) + self.export_ms = (time.perf_counter() - t0) * 1000 + + exported_path = Path(exported) + if exported_path.exists(): + _log(f"Export complete: {exported_path} ({self.export_ms:.0f}ms)") + return exported_path + + _log(f"Export returned {exported} but path not found") + except Exception as e: + _log(f"Export failed ({spec.export_format}): {e}") + + return None + + def load_optimized(self, model_name: str, use_optimized: bool = True): + """ + Load the best available model for this hardware. + + Returns: + (model, format_str) β€” the YOLO model and its format name + """ + from ultralytics import YOLO + + t0 = time.perf_counter() + + if use_optimized and self.framework_ok: + # Try loading from cache first (no export needed) + optimized_path = self.get_optimized_path(model_name) + if optimized_path.exists(): + try: + model = YOLO(str(optimized_path)) + self.load_ms = (time.perf_counter() - t0) * 1000 + _log(f"Loaded {self.export_format} model ({self.load_ms:.0f}ms)") + return model, self.export_format + except Exception as e: + _log(f"Failed to load cached model: {e}") + + # Try exporting then loading + pt_model = YOLO(f"{model_name}.pt") + exported = self.export_model(pt_model, model_name) + if exported: + try: + model = YOLO(str(exported)) + self.load_ms = (time.perf_counter() - t0) * 1000 + _log(f"Loaded freshly exported {self.export_format} model ({self.load_ms:.0f}ms)") + return model, self.export_format + except Exception as e: + _log(f"Failed to load exported model: {e}") + + # Fallback: use the PT model we already loaded + _log("Falling back to PyTorch model") + fallback_device = self.device + if fallback_device == "cuda": + try: + import torch + if not torch.cuda.is_available(): + fallback_device = "cpu" + _log("torch.cuda not available, falling back to CPU") + except ImportError: + fallback_device = "cpu" + pt_model.to(fallback_device) + self.device = fallback_device + self.load_ms = (time.perf_counter() - t0) * 1000 + return pt_model, "pytorch" + + # No optimization requested or framework missing + model = YOLO(f"{model_name}.pt") + fallback_device = self.device + if fallback_device == "cuda": + try: + import torch + if not torch.cuda.is_available(): + fallback_device = "cpu" + _log("torch.cuda not available, falling back to CPU") + except ImportError: + fallback_device = "cpu" + model.to(fallback_device) + self.device = fallback_device + self.load_ms = (time.perf_counter() - t0) * 1000 + return model, "pytorch" + + def to_dict(self) -> dict: + """Serialize environment info for JSON output.""" + return { + "backend": self.backend, + "device": self.device, + "export_format": self.export_format, + "gpu_name": self.gpu_name, + "gpu_memory_mb": self.gpu_memory_mb, + "driver_version": self.driver_version, + "framework_ok": self.framework_ok, + "export_ms": round(self.export_ms, 1), + "load_ms": round(self.load_ms, 1), + } + + +# ─── CLI: run standalone for diagnostics ───────────────────────────────────── + +if __name__ == "__main__": + env = HardwareEnv.detect() + print(json.dumps(env.to_dict(), indent=2)) diff --git a/skills/lib/env_config.py b/skills/lib/env_config.py index 1676e21..ff42e6f 100644 --- a/skills/lib/env_config.py +++ b/skills/lib/env_config.py @@ -51,9 +51,9 @@ class BackendSpec: ), "rocm": BackendSpec( name="rocm", - export_format="onnx", - model_suffix=".onnx", - half=False, # ONNX Runtime ROCm handles precision internally + export_format="pytorch", # PyTorch + HIP β€” ultralytics ONNX doesn't support ROCMExecutionProvider + model_suffix=".pt", + half=False, ), "mps": BackendSpec( name="mps", @@ -156,17 +156,69 @@ def _try_cuda(self) -> bool: return False def _try_rocm(self) -> bool: - """Detect AMD GPU via rocm-smi or /opt/rocm.""" + """Detect AMD GPU via amd-smi (preferred) or rocm-smi.""" + has_amd_smi = shutil.which("amd-smi") is not None has_rocm_smi = shutil.which("rocm-smi") is not None has_rocm_dir = Path("/opt/rocm").is_dir() - if not (has_rocm_smi or has_rocm_dir): + if not (has_amd_smi or has_rocm_smi or has_rocm_dir): return False self.backend = "rocm" - self.device = "cuda" # ROCm exposes as CUDA in PyTorch + # ROCm exposes as CUDA in PyTorch β€” but only if PyTorch-ROCm is installed + try: + import torch + if torch.cuda.is_available(): + self.device = "cuda" + else: + self.device = "cpu" + _log("PyTorch CUDA/ROCm not available, using CPU for PyTorch fallback") + except ImportError: + self.device = "cpu" - if has_rocm_smi: + # Strategy 1: amd-smi static --json (ROCm 6.3+/7.x, richest output) + if has_amd_smi: + try: + result = subprocess.run( + ["amd-smi", "static", "--json"], + capture_output=True, text=True, timeout=10, + ) + if result.returncode == 0: + import json as _json + data = _json.loads(result.stdout) + # amd-smi may return {"gpu_data": [...]} or a bare list + gpu_list = data.get("gpu_data", data) if isinstance(data, dict) else data + if isinstance(gpu_list, list) and len(gpu_list) > 0: + # Pick GPU with most VRAM (discrete > iGPU) + def _vram_mb(g): + vram = g.get("vram", {}).get("size", {}) + if isinstance(vram, dict): + return int(vram.get("value", 0)) + return 0 + + best_gpu = max(gpu_list, key=_vram_mb) + best_idx = gpu_list.index(best_gpu) + asic = best_gpu.get("asic", {}) + vram = best_gpu.get("vram", {}).get("size", {}) + + self.gpu_name = asic.get("market_name", "AMD GPU") + self.gpu_memory_mb = int(vram.get("value", 0)) if isinstance(vram, dict) else 0 + self.detection_details["amd_smi"] = { + "gpu_index": best_idx, + "gfx_version": asic.get("target_graphics_version", ""), + "total_gpus": len(gpu_list), + } + + # Pin to discrete GPU if multiple GPUs present + if len(gpu_list) > 1: + os.environ["HIP_VISIBLE_DEVICES"] = str(best_idx) + os.environ["ROCR_VISIBLE_DEVICES"] = str(best_idx) + _log(f"Multi-GPU: pinned to GPU {best_idx} ({self.gpu_name})") + except (subprocess.TimeoutExpired, FileNotFoundError, ValueError, Exception) as e: + _log(f"amd-smi probe failed: {e}") + + # Strategy 2: rocm-smi fallback (legacy ROCm <6.3) + if not self.gpu_name and has_rocm_smi: try: result = subprocess.run( ["rocm-smi", "--showproductname", "--csv"], @@ -186,7 +238,6 @@ def _try_rocm(self) -> bool: capture_output=True, text=True, timeout=10, ) if result.returncode == 0: - # Parse total VRAM for line in result.stdout.strip().split("\n")[1:]: parts = line.split(",") if len(parts) >= 2: @@ -296,11 +347,22 @@ def _fallback_cpu(self): _log("No GPU detected, using CPU backend") + def _check_rocm_runtime(self): + """Verify onnxruntime has ROCm provider, not just CPU.""" + import onnxruntime + providers = onnxruntime.get_available_providers() + if "ROCmExecutionProvider" in providers or "MIGraphXExecutionProvider" in providers: + _log(f"onnxruntime ROCm providers: {providers}") + return True + _log(f"onnxruntime providers: {providers} β€” ROCmExecutionProvider not found") + _log("Fix: pip uninstall onnxruntime && pip install onnxruntime-rocm") + raise ImportError("ROCmExecutionProvider not available") + def _check_framework(self) -> bool: """Check if the optimized inference runtime is importable.""" checks = { "cuda": lambda: __import__("tensorrt"), - "rocm": lambda: __import__("onnxruntime"), + "rocm": lambda: self._check_rocm_runtime(), "mps": lambda: __import__("coremltools"), "intel": lambda: __import__("openvino"), "cpu": lambda: __import__("onnxruntime"), @@ -338,6 +400,20 @@ def export_model(self, model, model_name: str) -> Optional[Path]: _log(f"Cached model found: {optimized_path}") return optimized_path + # Guard: numpy 2.x breaks coremltools PyTorchβ†’MIL converter + # (TypeError: only 0-dimensional arrays can be converted to Python scalars) + if spec.export_format == "coreml": + try: + import numpy as np + np_major = int(np.__version__.split('.')[0]) + if np_major >= 2: + _log(f"numpy {np.__version__} detected β€” CoreML export " + f"requires numpy<2.0.0 (coremltools incompatibility)") + _log("Fix: pip install 'numpy>=1.24,<2.0'") + return None + except Exception: + pass # If numpy check fails, try export anyway + try: _log(f"Exporting {model_name}.pt β†’ {spec.export_format} " f"(one-time, may take 30-120s)...") @@ -400,13 +476,33 @@ def load_optimized(self, model_name: str, use_optimized: bool = True): # Fallback: use the PT model we already loaded _log("Falling back to PyTorch model") - pt_model.to(self.device) + fallback_device = self.device + if fallback_device == "cuda": + try: + import torch + if not torch.cuda.is_available(): + fallback_device = "cpu" + _log("torch.cuda not available, falling back to CPU") + except ImportError: + fallback_device = "cpu" + pt_model.to(fallback_device) + self.device = fallback_device self.load_ms = (time.perf_counter() - t0) * 1000 return pt_model, "pytorch" # No optimization requested or framework missing model = YOLO(f"{model_name}.pt") - model.to(self.device) + fallback_device = self.device + if fallback_device == "cuda": + try: + import torch + if not torch.cuda.is_available(): + fallback_device = "cpu" + _log("torch.cuda not available, falling back to CPU") + except ImportError: + fallback_device = "cpu" + model.to(fallback_device) + self.device = fallback_device self.load_ms = (time.perf_counter() - t0) * 1000 return model, "pytorch" diff --git a/skills/lib/test_env_config_rocm.py b/skills/lib/test_env_config_rocm.py new file mode 100644 index 0000000..b17d92a --- /dev/null +++ b/skills/lib/test_env_config_rocm.py @@ -0,0 +1,303 @@ +#!/usr/bin/env python3 +""" +Unit tests for ROCm GPU detection in env_config.py. + +Tests amd-smi parsing, rocm-smi fallback, provider verification, +and multi-GPU selection β€” all mocked, no ROCm hardware required. + +Run: python -m pytest skills/lib/test_env_config_rocm.py -v +""" + +import json +import os +import subprocess +import sys +from pathlib import Path +from unittest import mock + +import pytest + +# Ensure env_config is importable from skills/lib/ +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from env_config import HardwareEnv, _log # noqa: E402 + + +# ── Sample amd-smi JSON (dual-GPU: discrete R9700 + iGPU) ───────────────── + +AMD_SMI_DUAL_GPU = json.dumps([ + { + "asic": { + "market_name": "AMD Radeon AI PRO R9700", + "vendor_id": "0x1002", + "target_graphics_version": "gfx1201", + }, + "vram": { + "size": {"value": 32624, "unit": "MB"}, + }, + }, + { + "asic": { + "market_name": "AMD Radeon Graphics", + "vendor_id": "0x1002", + "target_graphics_version": "gfx1036", + }, + "vram": { + "size": {"value": 2048, "unit": "MB"}, + }, + }, +]) + +AMD_SMI_SINGLE_GPU = json.dumps([ + { + "asic": { + "market_name": "AMD Radeon RX 7900 XTX", + "target_graphics_version": "gfx1100", + }, + "vram": { + "size": {"value": 24576, "unit": "MB"}, + }, + }, +]) + +# Wrapped in gpu_data key (some amd-smi versions do this) +AMD_SMI_WRAPPED = json.dumps({ + "gpu_data": json.loads(AMD_SMI_SINGLE_GPU), +}) + +ROCM_SMI_PRODUCTNAME = "device,Card Series\ncard0,AMD Radeon RX 7900 XTX\n" +ROCM_SMI_MEMINFO = "GPU,vram Total Memory (B)\n25769803776,25769803776\n" + + +# ── Helpers ──────────────────────────────────────────────────────────────── + +def _make_run_result(stdout="", returncode=0): + return subprocess.CompletedProcess(args=[], returncode=returncode, stdout=stdout, stderr="") + + +def _mock_which(available_tools): + """Return a shutil.which mock that only finds tools in available_tools.""" + def _which(name): + return f"/usr/bin/{name}" if name in available_tools else None + return _which + + +# ── Tests: _try_rocm ────────────────────────────────────────────────────── + +class TestTryRocmAmdSmi: + """amd-smi primary strategy.""" + + @mock.patch("env_config.shutil.which", _mock_which({"amd-smi"})) + @mock.patch("env_config.Path.is_dir", return_value=False) + @mock.patch("env_config.subprocess.run") + def test_dual_gpu_picks_discrete(self, mock_run, _mock_dir): + """With 2 GPUs, picks the R9700 (32 GB) over iGPU (2 GB).""" + mock_run.return_value = _make_run_result(AMD_SMI_DUAL_GPU) + + mock_torch = mock.MagicMock() + mock_torch.cuda.is_available.return_value = True + with mock.patch.dict("sys.modules", {"torch": mock_torch}): + env = HardwareEnv() + result = env._try_rocm() + + assert result is True + assert env.backend == "rocm" + assert env.device == "cuda" + assert env.gpu_name == "AMD Radeon AI PRO R9700" + assert env.gpu_memory_mb == 32624 + assert env.detection_details["amd_smi"]["gpu_index"] == 0 + assert env.detection_details["amd_smi"]["gfx_version"] == "gfx1201" + assert env.detection_details["amd_smi"]["total_gpus"] == 2 + + @mock.patch("env_config.shutil.which", _mock_which({"amd-smi"})) + @mock.patch("env_config.Path.is_dir", return_value=False) + @mock.patch("env_config.subprocess.run") + def test_dual_gpu_sets_env_vars(self, mock_run, _mock_dir): + """Multi-GPU: HIP_VISIBLE_DEVICES and ROCR_VISIBLE_DEVICES are set.""" + mock_run.return_value = _make_run_result(AMD_SMI_DUAL_GPU) + + # Clean env + for var in ("HIP_VISIBLE_DEVICES", "ROCR_VISIBLE_DEVICES"): + os.environ.pop(var, None) + + env = HardwareEnv() + env._try_rocm() + + assert os.environ.get("HIP_VISIBLE_DEVICES") == "0" + assert os.environ.get("ROCR_VISIBLE_DEVICES") == "0" + + # Cleanup + os.environ.pop("HIP_VISIBLE_DEVICES", None) + os.environ.pop("ROCR_VISIBLE_DEVICES", None) + + @mock.patch("env_config.shutil.which", _mock_which({"amd-smi"})) + @mock.patch("env_config.Path.is_dir", return_value=False) + @mock.patch("env_config.subprocess.run") + def test_single_gpu_no_env_vars(self, mock_run, _mock_dir): + """Single GPU: HIP_VISIBLE_DEVICES NOT set.""" + mock_run.return_value = _make_run_result(AMD_SMI_SINGLE_GPU) + + for var in ("HIP_VISIBLE_DEVICES", "ROCR_VISIBLE_DEVICES"): + os.environ.pop(var, None) + + env = HardwareEnv() + env._try_rocm() + + assert env.gpu_name == "AMD Radeon RX 7900 XTX" + assert env.gpu_memory_mb == 24576 + assert "HIP_VISIBLE_DEVICES" not in os.environ + + @mock.patch("env_config.shutil.which", _mock_which({"amd-smi"})) + @mock.patch("env_config.Path.is_dir", return_value=False) + @mock.patch("env_config.subprocess.run") + def test_wrapped_gpu_data_format(self, mock_run, _mock_dir): + """amd-smi returning {\"gpu_data\": [...]} wrapper.""" + mock_run.return_value = _make_run_result(AMD_SMI_WRAPPED) + + env = HardwareEnv() + env._try_rocm() + + assert env.gpu_name == "AMD Radeon RX 7900 XTX" + assert env.gpu_memory_mb == 24576 + + @mock.patch("env_config.shutil.which", _mock_which({"amd-smi"})) + @mock.patch("env_config.Path.is_dir", return_value=False) + @mock.patch("env_config.subprocess.run") + def test_amd_smi_failure_returns_true_with_defaults(self, mock_run, _mock_dir): + """amd-smi fails β†’ still returns True (ROCm detected), empty gpu_name.""" + mock_run.return_value = _make_run_result("", returncode=1) + + env = HardwareEnv() + result = env._try_rocm() + + assert result is True + assert env.backend == "rocm" + assert env.gpu_name == "" # No name parsed, but backend detected + + @mock.patch("env_config.shutil.which", _mock_which({"amd-smi"})) + @mock.patch("env_config.Path.is_dir", return_value=False) + @mock.patch("env_config.subprocess.run") + def test_no_pytorch_rocm_falls_back_to_cpu_device(self, mock_run, _mock_dir): + """When torch.cuda.is_available() is False, device stays 'cpu'.""" + mock_run.return_value = _make_run_result(AMD_SMI_SINGLE_GPU) + + mock_torch = mock.MagicMock() + mock_torch.cuda.is_available.return_value = False + with mock.patch.dict("sys.modules", {"torch": mock_torch}): + env = HardwareEnv() + env._try_rocm() + + assert env.backend == "rocm" + assert env.device == "cpu" # No PyTorch-ROCm β†’ CPU fallback + assert env.gpu_name == "AMD Radeon RX 7900 XTX" # GPU still detected + + +class TestTryRocmFallback: + """rocm-smi fallback (amd-smi not available).""" + + @mock.patch("env_config.shutil.which", _mock_which({"rocm-smi"})) + @mock.patch("env_config.Path.is_dir", return_value=False) + @mock.patch("env_config.subprocess.run") + def test_rocm_smi_parses_name_and_vram(self, mock_run, _mock_dir): + """Legacy rocm-smi fallback parses product name and VRAM.""" + def side_effect(cmd, **kwargs): + if "--showproductname" in cmd: + return _make_run_result(ROCM_SMI_PRODUCTNAME) + elif "--showmeminfo" in cmd: + return _make_run_result(ROCM_SMI_MEMINFO) + return _make_run_result("", returncode=1) + + mock_run.side_effect = side_effect + + env = HardwareEnv() + result = env._try_rocm() + + assert result is True + # NOTE: rocm-smi --showproductname CSV puts device ID in col 0 ("card0"), + # which is why amd-smi is the preferred strategy. This is the known + # limitation documented in the original bug report. + assert env.gpu_name == "card0" + # 25769803776 / (1024*1024) = 24576 + assert env.gpu_memory_mb == 24576 + + @mock.patch("env_config.shutil.which", _mock_which(set())) + @mock.patch("env_config.Path.is_dir", return_value=True) + def test_only_opt_rocm_dir(self, _mock_dir): + """Only /opt/rocm exists β€” detects ROCm with no GPU info.""" + env = HardwareEnv() + result = env._try_rocm() + + assert result is True + assert env.backend == "rocm" + assert env.gpu_name == "" + + @mock.patch("env_config.shutil.which", _mock_which(set())) + @mock.patch("env_config.Path.is_dir", return_value=False) + def test_no_rocm_at_all(self, _mock_dir): + """No amd-smi, no rocm-smi, no /opt/rocm β†’ returns False.""" + env = HardwareEnv() + result = env._try_rocm() + + assert result is False + assert env.backend == "cpu" # unchanged default + + +# ── Tests: _check_rocm_runtime ──────────────────────────────────────────── + +class TestCheckRocmRuntime: + """Verify ONNX Runtime provider check.""" + + def test_rocm_provider_present(self): + """ROCmExecutionProvider in list β†’ returns True.""" + env = HardwareEnv() + with mock.patch.dict("sys.modules", {"onnxruntime": mock.MagicMock()}): + ort = sys.modules["onnxruntime"] + ort.get_available_providers.return_value = [ + "ROCmExecutionProvider", "CPUExecutionProvider", + ] + assert env._check_rocm_runtime() is True + + def test_migraphx_provider_present(self): + """MIGraphXExecutionProvider also accepted.""" + env = HardwareEnv() + with mock.patch.dict("sys.modules", {"onnxruntime": mock.MagicMock()}): + ort = sys.modules["onnxruntime"] + ort.get_available_providers.return_value = [ + "MIGraphXExecutionProvider", "CPUExecutionProvider", + ] + assert env._check_rocm_runtime() is True + + def test_cpu_only_raises(self): + """CPU-only onnxruntime β†’ raises ImportError.""" + env = HardwareEnv() + with mock.patch.dict("sys.modules", {"onnxruntime": mock.MagicMock()}): + ort = sys.modules["onnxruntime"] + ort.get_available_providers.return_value = [ + "AzureExecutionProvider", "CPUExecutionProvider", + ] + with pytest.raises(ImportError, match="ROCmExecutionProvider not available"): + env._check_rocm_runtime() + + def test_onnxruntime_missing_raises(self): + """onnxruntime not installed β†’ ImportError from import.""" + env = HardwareEnv() + with mock.patch.dict("sys.modules", {"onnxruntime": None}): + with pytest.raises((ImportError, ModuleNotFoundError)): + env._check_rocm_runtime() + + +# ── Tests: _check_framework integration ─────────────────────────────────── + +class TestCheckFrameworkRocm: + """_check_framework uses _check_rocm_runtime for ROCm backend.""" + + def test_rocm_framework_ok_when_provider_present(self): + env = HardwareEnv() + env.backend = "rocm" + with mock.patch.object(env, "_check_rocm_runtime", return_value=True): + assert env._check_framework() is True + + def test_rocm_framework_not_ok_when_provider_missing(self): + env = HardwareEnv() + env.backend = "rocm" + with mock.patch.object(env, "_check_rocm_runtime", side_effect=ImportError("no ROCm")): + assert env._check_framework() is False