diff --git a/README.md b/README.md index b821942..a833fdd 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,8 @@ Each skill is a self-contained module with its own model, parameters, and [commu | **Detection** | [`yolo-detection-2026`](skills/detection/yolo-detection-2026/) | Real-time 80+ class detection β€” auto-accelerated via TensorRT / CoreML / OpenVINO / ONNX | βœ…| | | [`dinov3-grounding`](skills/detection/dinov3-grounding/) | Open-vocabulary detection β€” describe what to find | πŸ“ | | | [`person-recognition`](skills/detection/person-recognition/) | Re-identify individuals across cameras | πŸ“ | -| **Analysis** | [`home-security-benchmark`](skills/analysis/home-security-benchmark/) | [131-test evaluation suite](#-homesec-bench--how-secure-is-your-local-ai) for LLM & VLM security performance | βœ… | +| **Analysis** | [`home-security-benchmark`](skills/analysis/home-security-benchmark/) | [143-test evaluation suite](#-homesec-bench--how-secure-is-your-local-ai) for LLM & VLM security performance | βœ… | +| | [`smarthome-bench`](skills/analysis/smarthome-bench/) | Video anomaly detection benchmark β€” 105 clips across 7 smart home categories | βœ… | | | [`vlm-scene-analysis`](skills/analysis/vlm-scene-analysis/) | Describe what happened in recorded clips | πŸ“ | | | [`sam2-segmentation`](skills/analysis/sam2-segmentation/) | Click-to-segment with pixel-perfect masks | πŸ“ | | **Transformation** | [`depth-estimation`](skills/transformation/depth-estimation/) | Monocular depth maps with Depth Anything v2 | πŸ“ | @@ -140,7 +141,7 @@ Camera β†’ Frame Governor β†’ detect.py (JSONL) β†’ Aegis IPC β†’ Live Overlay ## πŸ“Š HomeSec-Bench β€” How Secure Is Your Local AI? -**HomeSec-Bench** is a 131-test security benchmark that measures how well your local AI performs as a security guard. It tests what matters: Can it detect a person in fog? Classify a break-in vs. a delivery? Resist prompt injection? Route alerts correctly at 3 AM? +**HomeSec-Bench** is a 143-test security benchmark that measures how well your local AI performs as a security guard. It tests what matters: Can it detect a person in fog? Classify a break-in vs. a delivery? Resist prompt injection? Route alerts correctly at 3 AM? Run it on your own hardware to know exactly where your setup stands. diff --git a/skills.json b/skills.json index 50f50d6..11ab31a 100644 --- a/skills.json +++ b/skills.json @@ -96,6 +96,75 @@ "medium", "large" ] + }, + { + "id": "smarthome-bench", + "name": "SmartHome Video Anomaly Benchmark", + "description": "VLM evaluation suite for video anomaly detection in smart home camera footage β€” 7 categories, 105 curated clips from SmartHome-Bench.", + "version": "1.0.0", + "category": "analysis", + "path": "skills/analysis/smarthome-bench", + "tags": [ + "benchmark", + "vlm", + "video", + "anomaly-detection", + "smart-home" + ], + "platforms": [ + "linux-x64", + "linux-arm64", + "darwin-arm64", + "darwin-x64", + "win-x64" + ], + "requirements": { + "node": ">=18", + "ram_gb": 2, + "system_deps": [ + "yt-dlp", + "ffmpeg" + ] + }, + "capabilities": [ + "benchmark", + "report_generation" + ], + "ui_unlocks": [ + "benchmark_report" + ] + }, + { + "id": "homesafe-bench", + "name": "HomeSafe Indoor Safety Benchmark", + "description": "VLM evaluation suite for indoor home safety hazard detection β€” 40 tests across 5 categories: fire/smoke, electrical, trip/fall, child safety, falling objects.", + "version": "1.0.0", + "category": "analysis", + "path": "skills/analysis/homesafe-bench", + "tags": [ + "benchmark", + "vlm", + "safety", + "hazard", + "indoor" + ], + "platforms": [ + "linux-x64", + "linux-arm64", + "darwin-arm64", + "darwin-x64", + "win-x64" + ], + "requirements": { + "node": ">=18", + "ram_gb": 2 + }, + "capabilities": [ + "benchmark" + ], + "ui_unlocks": [ + "benchmark_report" + ] } ] } \ No newline at end of file diff --git a/skills/analysis/home-security-benchmark/SKILL.md b/skills/analysis/home-security-benchmark/SKILL.md index 5b859a1..03ccafb 100644 --- a/skills/analysis/home-security-benchmark/SKILL.md +++ b/skills/analysis/home-security-benchmark/SKILL.md @@ -1,7 +1,7 @@ --- name: Home Security AI Benchmark description: LLM & VLM evaluation suite for home security AI applications -version: 2.0.0 +version: 2.1.0 category: analysis runtime: node entry: scripts/run-benchmark.cjs @@ -15,7 +15,7 @@ requirements: # Home Security AI Benchmark -Comprehensive benchmark suite evaluating LLM and VLM models on **131 tests** across **16 suites** β€” context preprocessing, tool use, security classification, prompt injection resistance, alert routing, knowledge injection, VLM-to-alert triage, and scene analysis. +Comprehensive benchmark suite evaluating LLM and VLM models on **143 tests** across **16 suites** β€” context preprocessing, tool use, security classification, prompt injection resistance, alert routing, knowledge injection, VLM-to-alert triage, and scene analysis. ## Setup @@ -76,7 +76,7 @@ This skill includes a [`config.yaml`](config.yaml) that defines user-configurabl | Parameter | Type | Default | Description | |-----------|------|---------|-------------| -| `mode` | select | `llm` | Which suites to run: `llm` (96 tests), `vlm` (35 tests), or `full` (131 tests) | +| `mode` | select | `llm` | Which suites to run: `llm` (96 tests), `vlm` (47 tests), or `full` (143 tests) | | `noOpen` | boolean | `false` | Skip auto-opening the HTML report in browser | Platform parameters like `AEGIS_GATEWAY_URL` and `AEGIS_VLM_URL` are auto-injected by Aegis β€” they are **not** in `config.yaml`. See [Aegis Skill Platform Parameters](../../../docs/skill-params.md) for the full platform contract. @@ -112,7 +112,7 @@ AEGIS_SKILL_PARAMS={} Human-readable output goes to **stderr** (visible in Aegis console tab). -## Test Suites (131 Tests) +## Test Suites (143 Tests) | Suite | Tests | Domain | |-------|-------|--------| @@ -131,7 +131,7 @@ Human-readable output goes to **stderr** (visible in Aegis console tab). | Alert Routing & Subscription | 5 | Channel targeting, schedule CRUD | | Knowledge Injection to Dialog | 5 | KI-personalized responses | | VLM-to-Alert Triage | 5 | Urgency classification from VLM | -| VLM Scene Analysis | 35 | Frame entity detection & description | +| VLM Scene Analysis | 47 | Frame entity detection & description (outdoor + indoor safety) | ## Results @@ -142,4 +142,4 @@ Results are saved to `~/.aegis-ai/benchmarks/` as JSON. An HTML report with cros - Node.js β‰₯ 18 - `npm install` (for `openai` SDK dependency) - Running LLM server (llama-server, OpenAI API, or any OpenAI-compatible endpoint) -- Optional: Running VLM server for scene analysis tests (35 tests) +- Optional: Running VLM server for scene analysis tests (47 tests) diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_blocked_exit.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_blocked_exit.png new file mode 100644 index 0000000..fdca045 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_blocked_exit.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_child_cabinet.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_child_cabinet.png new file mode 100644 index 0000000..3a4e1e6 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_child_cabinet.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_elec_cord.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_elec_cord.png new file mode 100644 index 0000000..4c7a552 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_elec_cord.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_elec_powerstrip.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_elec_powerstrip.png new file mode 100644 index 0000000..666c48a Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_elec_powerstrip.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fall_person.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fall_person.png new file mode 100644 index 0000000..c074920 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fall_person.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fall_shelf.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fall_shelf.png new file mode 100644 index 0000000..582ad2c Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fall_shelf.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_candle.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_candle.png new file mode 100644 index 0000000..e200ca0 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_candle.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_heater.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_heater.png new file mode 100644 index 0000000..5f2845a Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_heater.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_iron.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_iron.png new file mode 100644 index 0000000..f3f7c78 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_iron.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_stove.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_stove.png new file mode 100644 index 0000000..de54fbe Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_fire_stove.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_trip_stairs.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_trip_stairs.png new file mode 100644 index 0000000..736c3d1 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_trip_stairs.png differ diff --git a/skills/analysis/home-security-benchmark/fixtures/frames/indoor_trip_wetfloor.png b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_trip_wetfloor.png new file mode 100644 index 0000000..7ac3309 Binary files /dev/null and b/skills/analysis/home-security-benchmark/fixtures/frames/indoor_trip_wetfloor.png differ diff --git a/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs b/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs index b45306c..96dd71c 100644 --- a/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs +++ b/skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs @@ -1704,6 +1704,68 @@ suite('πŸ“Έ VLM Scene Analysis', async () => { prompt: 'Describe this outdoor area. Are there any people present? What objects are visible?', expect: ['patio', 'furniture', 'table', 'chair', 'grill', 'empty', 'no one', 'no people'] }, + + // Category E: Indoor Safety Hazards (12) + { + name: 'Stove smoke β†’ kitchen fire hazard', file: 'indoor_fire_stove.png', + prompt: 'Describe this indoor security camera scene. Are there any fire or smoke hazards visible?', + expect: ['smoke', 'fire', 'stove', 'kitchen', 'cook', 'pot', 'steam'] + }, + { + name: 'Candle near curtain β†’ fire risk', file: 'indoor_fire_candle.png', + prompt: 'Describe this indoor scene. Is there any fire risk from open flames near flammable materials?', + expect: ['candle', 'fire', 'curtain', 'flame', 'drape', 'fabric', 'risk'] + }, + { + name: 'Overloaded power strip β†’ electrical hazard', file: 'indoor_elec_powerstrip.png', + prompt: 'Describe what you see. Are there any electrical safety hazards?', + expect: ['overload', 'power', 'electrical', 'plug', 'strip', 'cable', 'cord', 'outlet'] + }, + { + name: 'Frayed cord β†’ electrical fire risk', file: 'indoor_elec_cord.png', + prompt: 'Describe the condition of the electrical cord. Is there any damage that could be dangerous?', + expect: ['fray', 'cord', 'damage', 'wire', 'worn', 'exposed', 'cable'] + }, + { + name: 'Toys on stairs β†’ trip hazard', file: 'indoor_trip_stairs.png', + prompt: 'Describe this scene. Are there any trip or fall hazards on the staircase?', + expect: ['toy', 'stair', 'trip', 'hazard', 'ball', 'fall', 'step'] + }, + { + name: 'Wet floor β†’ slip hazard', file: 'indoor_trip_wetfloor.png', + prompt: 'Describe the floor condition in this scene. Is there any slip hazard?', + expect: ['wet', 'slip', 'water', 'floor', 'puddle', 'spill'] + }, + { + name: 'Person on floor β†’ medical emergency', file: 'indoor_fall_person.png', + prompt: 'Describe what you see. Is anyone in distress or in need of medical help?', + expect: ['person', 'fall', 'lying', 'floor', 'down', 'help', 'cane', 'elder'] + }, + { + name: 'Open cabinet chemicals β†’ child safety', file: 'indoor_child_cabinet.png', + prompt: 'Describe this kitchen scene. Are there any child safety concerns with accessible chemicals?', + expect: ['cabinet', 'chemical', 'clean', 'open', 'bottle', 'danger', 'safety'] + }, + { + name: 'Cluttered exit β†’ blocked fire exit', file: 'indoor_blocked_exit.png', + prompt: 'Describe this scene. Is the exit or doorway clear or obstructed?', + expect: ['block', 'exit', 'clutter', 'door', 'box', 'obstruct', 'furniture'] + }, + { + name: 'Space heater near drape β†’ fire ignition risk', file: 'indoor_fire_heater.png', + prompt: 'Describe this bedroom scene. Is the space heater positioned safely?', + expect: ['heater', 'drape', 'fire', 'curtain', 'close', 'fabric', 'risk'] + }, + { + name: 'Items on high shelf β†’ falling object risk', file: 'indoor_fall_shelf.png', + prompt: 'Describe the shelf and items on it. Are there any falling object hazards?', + expect: ['shelf', 'fall', 'heavy', 'unstable', 'box', 'stack', 'top'] + }, + { + name: 'Iron left face-down β†’ burn/fire risk', file: 'indoor_fire_iron.png', + prompt: 'Describe this laundry scene. Is the iron being used safely?', + expect: ['iron', 'burn', 'fire', 'left', 'hot', 'steam', 'unattended', 'board'] + }, ]; // ─── Run all VLM tests ────────────────────────────────────────────── diff --git a/skills/analysis/homesafe-bench/SKILL.md b/skills/analysis/homesafe-bench/SKILL.md new file mode 100644 index 0000000..8dd6bcf --- /dev/null +++ b/skills/analysis/homesafe-bench/SKILL.md @@ -0,0 +1,123 @@ +--- +name: HomeSafe-Bench +description: VLM indoor safety hazard detection benchmark inspired by HomeSafeBench (arXiv 2509.23690) +version: 1.0.0 +category: analysis +runtime: node +entry: scripts/run-benchmark.cjs +install: npm + +requirements: + node: ">=18" + npm_install: true + platforms: ["linux", "macos", "windows"] +--- + +# HomeSafe-Bench β€” Indoor Safety Hazard Detection + +VLM benchmark evaluating indoor home safety hazard detection from static camera frames. Inspired by [HomeSafeBench (arXiv 2509.23690)](https://arxiv.org/abs/2509.23690), adapted for **fixed indoor security cameras** with AI-generated fixture images. + +**40 tests** across **5 hazard categories**: + +| Category | Tests | Example Hazards | +|----------|-------|-----------------| +| πŸ”₯ Fire / Smoke | 8 | Stove smoke, candle near curtain, iron left face-down | +| ⚑ Electrical | 8 | Overloaded power strip, exposed wire, frayed cord | +| 🚢 Trip / Fall | 8 | Toys on staircase, wet floor, loose rug edge | +| πŸ‘Ά Child Safety | 8 | Open cabinet with chemicals, uncovered outlet, blind cord | +| πŸ“¦ Falling Objects | 8 | Heavy items on high shelf, leaning bookshelf, loose ceiling fan | + +## Setup + +**Requires `npm install`.** This skill has a `package.json` with dependencies (e.g. `openai` SDK). The deployment agent should check for `package.json` and run `npm install` in the skill directory before launching. + +Entry script: `scripts/run-benchmark.cjs` + +### Verification + +```bash +node scripts/run-benchmark.cjs --help +``` + +## Quick Start + +### As an Aegis Skill (automatic) + +When spawned by Aegis, all configuration is injected via environment variables. The benchmark discovers your VLM server automatically, generates an HTML report, and opens it when complete. + +### Standalone + +```bash +# Run all 40 tests +node scripts/run-benchmark.cjs --vlm http://localhost:5405 + +# Quick mode (2 tests per category = 10 total) +node scripts/run-benchmark.cjs --vlm http://localhost:5405 --mode quick + +# Skip report auto-open +node scripts/run-benchmark.cjs --vlm http://localhost:5405 --no-open +``` + +## Configuration + +### Environment Variables (set by Aegis) + +| Variable | Default | Description | +|----------|---------|-------------| +| `AEGIS_VLM_URL` | *(required)* | VLM server base URL | +| `AEGIS_VLM_MODEL` | β€” | Loaded VLM model ID | +| `AEGIS_SKILL_ID` | β€” | Skill identifier (enables skill mode) | +| `AEGIS_SKILL_PARAMS` | `{}` | JSON params from skill config | + +> **Note**: URLs should be base URLs (e.g. `http://localhost:5405`). The benchmark appends `/v1/chat/completions` automatically. + +### User Configuration (config.yaml) + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `mode` | select | `full` | Which mode: `full` (40 tests) or `quick` (10 tests β€” 2 per category) | +| `noOpen` | boolean | `false` | Skip auto-opening the HTML report in browser | + +### CLI Arguments (standalone fallback) + +| Argument | Default | Description | +|----------|---------|-------------| +| `--vlm URL` | *(required)* | VLM server base URL | +| `--mode MODE` | `full` | Test mode: `full` or `quick` | +| `--out DIR` | `~/.aegis-ai/homesafe-benchmarks` | Results directory | +| `--no-open` | β€” | Don't auto-open report in browser | + +## Protocol + +### Aegis β†’ Skill (env vars) +``` +AEGIS_VLM_URL=http://localhost:5405 +AEGIS_SKILL_ID=homesafe-bench +AEGIS_SKILL_PARAMS={} +``` + +### Skill β†’ Aegis (stdout, JSON lines) +```jsonl +{"event": "ready", "vlm": "SmolVLM-500M", "system": "Apple M3"} +{"event": "suite_start", "suite": "πŸ”₯ Fire / Smoke"} +{"event": "test_result", "suite": "...", "test": "...", "status": "pass", "timeMs": 4500} +{"event": "suite_end", "suite": "...", "passed": 7, "failed": 1} +{"event": "complete", "passed": 36, "total": 40, "timeMs": 180000, "reportPath": "/path/to/report.html"} +``` + +Human-readable output goes to **stderr** (visible in Aegis console tab). + +## Citation + +This benchmark is inspired by: + +> **HomeSafeBench: Towards Measuring the Proficiency of Home Safety for Embodied AI Agents** +> arXiv:2509.23690 +> +> Unlike the academic benchmark (embodied agent + navigation in simulated 3D environments), our version uses **static indoor camera frames** β€” matching real-world indoor security camera deployment (fixed wall/ceiling mount). All fixture images are **AI-generated** consistent with DeepCamera's privacy-first approach. + +## Requirements + +- Node.js β‰₯ 18 +- `npm install` (for `openai` SDK dependency) +- Running VLM server (llama-server with vision model, or OpenAI-compatible VLM endpoint) diff --git a/skills/analysis/homesafe-bench/config.yaml b/skills/analysis/homesafe-bench/config.yaml new file mode 100644 index 0000000..cc01d76 --- /dev/null +++ b/skills/analysis/homesafe-bench/config.yaml @@ -0,0 +1,13 @@ +params: + - key: mode + label: Test Mode + type: select + options: [full, quick] + default: full + description: "Which test mode: full (40 tests) or quick (10 tests β€” 2 per category)" + + - key: noOpen + label: Don't auto-open report + type: boolean + default: false + description: Skip opening the HTML report in browser after completion diff --git a/skills/analysis/homesafe-bench/deploy.sh b/skills/analysis/homesafe-bench/deploy.sh new file mode 100755 index 0000000..05d27e3 --- /dev/null +++ b/skills/analysis/homesafe-bench/deploy.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# HomeSafe-Bench deployment script +# Runs npm install to fetch openai SDK dependency + +set -e +cd "$(dirname "$0")" +npm install +echo "βœ… HomeSafe-Bench dependencies installed" diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_01_chemicals_cabinet.png b/skills/analysis/homesafe-bench/fixtures/frames/child_01_chemicals_cabinet.png new file mode 100644 index 0000000..2e6d077 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_01_chemicals_cabinet.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_02_uncovered_outlet.png b/skills/analysis/homesafe-bench/fixtures/frames/child_02_uncovered_outlet.png new file mode 100644 index 0000000..787a0ba Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_02_uncovered_outlet.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_03_blind_cord.png b/skills/analysis/homesafe-bench/fixtures/frames/child_03_blind_cord.png new file mode 100644 index 0000000..13c0335 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_03_blind_cord.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_04_sharp_corner.png b/skills/analysis/homesafe-bench/fixtures/frames/child_04_sharp_corner.png new file mode 100644 index 0000000..d919a09 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_04_sharp_corner.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_05_choking_hazard.png b/skills/analysis/homesafe-bench/fixtures/frames/child_05_choking_hazard.png new file mode 100644 index 0000000..c2b346e Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_05_choking_hazard.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_06_unsecured_bookshelf.png b/skills/analysis/homesafe-bench/fixtures/frames/child_06_unsecured_bookshelf.png new file mode 100644 index 0000000..516b035 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_06_unsecured_bookshelf.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_07_stove_knobs.png b/skills/analysis/homesafe-bench/fixtures/frames/child_07_stove_knobs.png new file mode 100644 index 0000000..c0fb60f Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_07_stove_knobs.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/child_08_open_window.png b/skills/analysis/homesafe-bench/fixtures/frames/child_08_open_window.png new file mode 100644 index 0000000..cc14e6b Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/child_08_open_window.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_01_overloaded_strip.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_01_overloaded_strip.png new file mode 100644 index 0000000..1269551 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_01_overloaded_strip.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_02_exposed_wire_sink.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_02_exposed_wire_sink.png new file mode 100644 index 0000000..d1139ad Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_02_exposed_wire_sink.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_03_frayed_cord.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_03_frayed_cord.png new file mode 100644 index 0000000..e433bec Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_03_frayed_cord.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_04_cord_under_rug.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_04_cord_under_rug.png new file mode 100644 index 0000000..cdeee7d Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_04_cord_under_rug.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_05_wet_hands_outlet.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_05_wet_hands_outlet.png new file mode 100644 index 0000000..8fc0c0e Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_05_wet_hands_outlet.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_06_ungrounded_adapter.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_06_ungrounded_adapter.png new file mode 100644 index 0000000..34e27bd Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_06_ungrounded_adapter.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_07_damaged_outlet.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_07_damaged_outlet.png new file mode 100644 index 0000000..4a555dc Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_07_damaged_outlet.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/electrical_08_tangled_cords.png b/skills/analysis/homesafe-bench/fixtures/frames/electrical_08_tangled_cords.png new file mode 100644 index 0000000..2c45ac3 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/electrical_08_tangled_cords.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_01_heavy_high_shelf.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_01_heavy_high_shelf.png new file mode 100644 index 0000000..7291d2c Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_01_heavy_high_shelf.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_02_stacked_boxes.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_02_stacked_boxes.png new file mode 100644 index 0000000..579c59a Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_02_stacked_boxes.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_03_leaning_bookshelf.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_03_leaning_bookshelf.png new file mode 100644 index 0000000..9c1d8d3 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_03_leaning_bookshelf.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_04_fridge_top.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_04_fridge_top.png new file mode 100644 index 0000000..5e65828 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_04_fridge_top.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_05_overhead_tools.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_05_overhead_tools.png new file mode 100644 index 0000000..b223a1d Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_05_overhead_tools.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_06_unsecured_tv.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_06_unsecured_tv.png new file mode 100644 index 0000000..f1f3fef Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_06_unsecured_tv.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_07_overloaded_coatrack.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_07_overloaded_coatrack.png new file mode 100644 index 0000000..ca1bb90 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_07_overloaded_coatrack.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/falling_08_loose_fan_blade.png b/skills/analysis/homesafe-bench/fixtures/frames/falling_08_loose_fan_blade.png new file mode 100644 index 0000000..1dd651a Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/falling_08_loose_fan_blade.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_01_stove_smoke.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_01_stove_smoke.png new file mode 100644 index 0000000..d354fd4 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_01_stove_smoke.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_02_candle_curtain.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_02_candle_curtain.png new file mode 100644 index 0000000..9d5d958 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_02_candle_curtain.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_03_fireplace_ember.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_03_fireplace_ember.png new file mode 100644 index 0000000..f6492c4 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_03_fireplace_ember.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_04_iron_facedown.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_04_iron_facedown.png new file mode 100644 index 0000000..855ebf0 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_04_iron_facedown.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_05_heater_drapes.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_05_heater_drapes.png new file mode 100644 index 0000000..0c9a358 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_05_heater_drapes.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_06_missing_detector.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_06_missing_detector.png new file mode 100644 index 0000000..a97b126 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_06_missing_detector.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_07_grease_fire.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_07_grease_fire.png new file mode 100644 index 0000000..ff85750 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_07_grease_fire.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/fire_08_cigarette_couch.png b/skills/analysis/homesafe-bench/fixtures/frames/fire_08_cigarette_couch.png new file mode 100644 index 0000000..00d2cda Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/fire_08_cigarette_couch.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_01_toys_stairs.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_01_toys_stairs.png new file mode 100644 index 0000000..bcb30c7 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_01_toys_stairs.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_02_wet_floor.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_02_wet_floor.png new file mode 100644 index 0000000..4bdd380 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_02_wet_floor.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_03_loose_rug.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_03_loose_rug.png new file mode 100644 index 0000000..f11e40c Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_03_loose_rug.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_04_cluttered_hallway.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_04_cluttered_hallway.png new file mode 100644 index 0000000..bd27104 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_04_cluttered_hallway.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_05_shoes_doorway.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_05_shoes_doorway.png new file mode 100644 index 0000000..efbafbc Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_05_shoes_doorway.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_06_dark_stairwell.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_06_dark_stairwell.png new file mode 100644 index 0000000..9649e8d Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_06_dark_stairwell.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_07_cables_walkway.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_07_cables_walkway.png new file mode 100644 index 0000000..4085591 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_07_cables_walkway.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/frames/trip_08_stool_stairs.png b/skills/analysis/homesafe-bench/fixtures/frames/trip_08_stool_stairs.png new file mode 100644 index 0000000..d3cb173 Binary files /dev/null and b/skills/analysis/homesafe-bench/fixtures/frames/trip_08_stool_stairs.png differ diff --git a/skills/analysis/homesafe-bench/fixtures/scenarios.json b/skills/analysis/homesafe-bench/fixtures/scenarios.json new file mode 100644 index 0000000..b685f25 --- /dev/null +++ b/skills/analysis/homesafe-bench/fixtures/scenarios.json @@ -0,0 +1,730 @@ +{ + "version": "1.0.0", + "description": "HomeSafe-Bench test scenarios β€” 40 indoor safety hazard tests across 5 categories", + "citation": "Inspired by HomeSafeBench (arXiv 2509.23690)", + "categories": [ + { + "id": "fire", + "name": "Fire / Smoke", + "emoji": "πŸ”₯", + "description": "Fire hazards, smoke sources, and heat-related dangers in the home" + }, + { + "id": "electrical", + "name": "Electrical", + "emoji": "⚑", + "description": "Electrical hazards including overloaded circuits, exposed wiring, and water proximity" + }, + { + "id": "trip_fall", + "name": "Trip / Fall", + "emoji": "🚢", + "description": "Tripping hazards, fall risks, and pathway obstructions" + }, + { + "id": "child_safety", + "name": "Child Safety", + "emoji": "πŸ‘Ά", + "description": "Hazards specifically dangerous to children β€” chemicals, choking, sharp edges" + }, + { + "id": "falling_objects", + "name": "Falling Objects", + "emoji": "πŸ“¦", + "description": "Unstable items, precariously placed objects, and overhead risks" + } + ], + "scenarios": [ + { + "id": "fire_01", + "category": "fire", + "name": "Smoke rising from stove", + "file": "fire_01_stove_smoke.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any safety hazards visible in this image. Focus on fire, heat, and smoke risks.", + "expectedKeywords": [ + "smoke", + "stove", + "fire", + "cook", + "burn", + "hazard", + "kitchen" + ] + }, + { + "id": "fire_02", + "category": "fire", + "name": "Candle near curtain", + "file": "fire_02_candle_curtain.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any safety hazards visible in this image. Focus on fire risks from open flames.", + "expectedKeywords": [ + "candle", + "curtain", + "fire", + "flame", + "fabric", + "hazard", + "drape" + ] + }, + { + "id": "fire_03", + "category": "fire", + "name": "Fireplace ember on rug", + "file": "fire_03_fireplace_ember.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any safety hazards visible in this image, especially near the fireplace.", + "expectedKeywords": [ + "fireplace", + "ember", + "rug", + "spark", + "fire", + "carpet", + "burn", + "flame" + ] + }, + { + "id": "fire_04", + "category": "fire", + "name": "Iron left face-down on board", + "file": "fire_04_iron_facedown.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any safety hazards visible in this image. Look for heat-producing appliances.", + "expectedKeywords": [ + "iron", + "burn", + "hot", + "face-down", + "ironing", + "board", + "fire", + "heat", + "scorch" + ] + }, + { + "id": "fire_05", + "category": "fire", + "name": "Space heater near drapes", + "file": "fire_05_heater_drapes.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any safety hazards, especially heating appliances near flammable materials.", + "expectedKeywords": [ + "heater", + "drape", + "curtain", + "fire", + "space heater", + "fabric", + "hazard", + "close" + ] + }, + { + "id": "fire_06", + "category": "fire", + "name": "Missing smoke detector", + "file": "fire_06_missing_detector.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for smoke and fire detection equipment. Is there a smoke detector visible on the ceiling?", + "expectedKeywords": [ + "smoke detector", + "alarm", + "missing", + "ceiling", + "no", + "absent", + "detector", + "safety" + ] + }, + { + "id": "fire_07", + "category": "fire", + "name": "Grease fire on cooktop", + "file": "fire_07_grease_fire.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any active fires or dangerous cooking situations.", + "expectedKeywords": [ + "grease", + "fire", + "flame", + "cook", + "pan", + "kitchen", + "burn", + "oil", + "stove" + ] + }, + { + "id": "fire_08", + "category": "fire", + "name": "Cigarette on couch arm", + "file": "fire_08_cigarette_couch.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any fire hazards, including smoking materials near upholstery.", + "expectedKeywords": [ + "cigarette", + "couch", + "smoke", + "fire", + "sofa", + "ash", + "burn", + "fabric" + ] + }, + { + "id": "electrical_01", + "category": "electrical", + "name": "Overloaded power strip", + "file": "electrical_01_overloaded_strip.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any electrical safety hazards, especially around power outlets and strips.", + "expectedKeywords": [ + "overload", + "power strip", + "plug", + "outlet", + "electric", + "cord", + "too many", + "surge" + ] + }, + { + "id": "electrical_02", + "category": "electrical", + "name": "Exposed wire near sink", + "file": "electrical_02_exposed_wire_sink.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any electrical hazards, especially wires near water sources.", + "expectedKeywords": [ + "wire", + "exposed", + "sink", + "water", + "electric", + "shock", + "danger", + "bare" + ] + }, + { + "id": "electrical_03", + "category": "electrical", + "name": "Frayed cord on appliance", + "file": "electrical_03_frayed_cord.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for damaged electrical cords or cables.", + "expectedKeywords": [ + "fray", + "cord", + "damage", + "wire", + "electric", + "worn", + "cable", + "appliance" + ] + }, + { + "id": "electrical_04", + "category": "electrical", + "name": "Extension cord under rug", + "file": "electrical_04_cord_under_rug.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any fire or electrical hazards related to cord placement.", + "expectedKeywords": [ + "extension", + "cord", + "rug", + "under", + "carpet", + "fire", + "hazard", + "hidden" + ] + }, + { + "id": "electrical_05", + "category": "electrical", + "name": "Wet hands near outlet", + "file": "electrical_05_wet_hands_outlet.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any electrical shock hazards, especially involving water and electricity.", + "expectedKeywords": [ + "wet", + "water", + "outlet", + "hand", + "electric", + "shock", + "danger", + "plug" + ] + }, + { + "id": "electrical_06", + "category": "electrical", + "name": "Ungrounded 3-to-2 adapter", + "file": "electrical_06_ungrounded_adapter.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for improper electrical connections or adapters.", + "expectedKeywords": [ + "adapter", + "ground", + "plug", + "outlet", + "prong", + "electric", + "unsafe", + "converter" + ] + }, + { + "id": "electrical_07", + "category": "electrical", + "name": "Damaged outlet cover", + "file": "electrical_07_damaged_outlet.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check the condition of electrical outlets and covers.", + "expectedKeywords": [ + "outlet", + "cover", + "damage", + "broken", + "crack", + "expose", + "plate", + "electric" + ] + }, + { + "id": "electrical_08", + "category": "electrical", + "name": "Tangled cords behind desk", + "file": "electrical_08_tangled_cords.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any cable management hazards behind furniture.", + "expectedKeywords": [ + "tangle", + "cord", + "cable", + "wire", + "mess", + "desk", + "behind", + "fire", + "hazard" + ] + }, + { + "id": "trip_01", + "category": "trip_fall", + "name": "Toys scattered on staircase", + "file": "trip_01_toys_stairs.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any tripping or falling hazards, especially on stairs.", + "expectedKeywords": [ + "toy", + "stair", + "trip", + "fall", + "hazard", + "block", + "step", + "obstruct" + ] + }, + { + "id": "trip_02", + "category": "trip_fall", + "name": "Wet floor without sign", + "file": "trip_02_wet_floor.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any slip or fall hazards on the floor surface.", + "expectedKeywords": [ + "wet", + "floor", + "slip", + "water", + "spill", + "puddle", + "slippery", + "fall" + ] + }, + { + "id": "trip_03", + "category": "trip_fall", + "name": "Loose rug edge curled up", + "file": "trip_03_loose_rug.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for tripping hazards related to floor coverings.", + "expectedKeywords": [ + "rug", + "loose", + "curl", + "trip", + "edge", + "fold", + "carpet", + "hazard", + "bunched" + ] + }, + { + "id": "trip_04", + "category": "trip_fall", + "name": "Cluttered hallway", + "file": "trip_04_cluttered_hallway.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any pathway obstructions or tripping hazards.", + "expectedKeywords": [ + "clutter", + "hallway", + "obstruct", + "trip", + "box", + "item", + "block", + "path", + "narrow" + ] + }, + { + "id": "trip_05", + "category": "trip_fall", + "name": "Shoes piled in doorway", + "file": "trip_05_shoes_doorway.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any tripping hazards near doorways and entries.", + "expectedKeywords": [ + "shoe", + "door", + "trip", + "entry", + "pile", + "obstruct", + "hazard", + "block" + ] + }, + { + "id": "trip_06", + "category": "trip_fall", + "name": "Unlit stairwell", + "file": "trip_06_dark_stairwell.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Assess lighting conditions and visibility hazards, especially on stairs.", + "expectedKeywords": [ + "dark", + "stair", + "light", + "dim", + "unlit", + "visibility", + "shadow", + "hazard" + ] + }, + { + "id": "trip_07", + "category": "trip_fall", + "name": "Cables across walkway", + "file": "trip_07_cables_walkway.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any cords or cables creating tripping hazards.", + "expectedKeywords": [ + "cable", + "cord", + "trip", + "wire", + "floor", + "across", + "walk", + "hazard" + ] + }, + { + "id": "trip_08", + "category": "trip_fall", + "name": "Step stool near top of stairs", + "file": "trip_08_stool_stairs.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any unstable or misplaced furniture near stairs.", + "expectedKeywords": [ + "stool", + "step", + "stair", + "fall", + "top", + "unstable", + "hazard", + "ladder" + ] + }, + { + "id": "child_01", + "category": "child_safety", + "name": "Open cabinet with cleaning chemicals", + "file": "child_01_chemicals_cabinet.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any child safety hazards, especially toxic substances within reach.", + "expectedKeywords": [ + "cabinet", + "chemical", + "clean", + "open", + "child", + "poison", + "toxic", + "reach", + "bottle" + ] + }, + { + "id": "child_02", + "category": "child_safety", + "name": "Uncovered electrical outlet", + "file": "child_02_uncovered_outlet.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for child safety hazards related to electrical outlets.", + "expectedKeywords": [ + "outlet", + "uncovered", + "child", + "electric", + "plug", + "cover", + "safety", + "exposed" + ] + }, + { + "id": "child_03", + "category": "child_safety", + "name": "Blind cord hanging in reach", + "file": "child_03_blind_cord.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify strangulation or entanglement hazards for children.", + "expectedKeywords": [ + "blind", + "cord", + "string", + "hang", + "child", + "strangle", + "window", + "reach", + "loop" + ] + }, + { + "id": "child_04", + "category": "child_safety", + "name": "Sharp corner on coffee table", + "file": "child_04_sharp_corner.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any sharp edges or corners that could injure a child.", + "expectedKeywords": [ + "sharp", + "corner", + "table", + "edge", + "child", + "bump", + "injury", + "point" + ] + }, + { + "id": "child_05", + "category": "child_safety", + "name": "Small objects choking hazard", + "file": "child_05_choking_hazard.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any small objects that could be choking hazards for young children.", + "expectedKeywords": [ + "small", + "choke", + "child", + "toy", + "marble", + "button", + "piece", + "swallow", + "hazard" + ] + }, + { + "id": "child_06", + "category": "child_safety", + "name": "Unsecured tall bookshelf", + "file": "child_06_unsecured_bookshelf.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for furniture tip-over risks that could endanger children.", + "expectedKeywords": [ + "bookshelf", + "unsecured", + "tip", + "fall", + "heavy", + "child", + "anchor", + "lean", + "unstable" + ] + }, + { + "id": "child_07", + "category": "child_safety", + "name": "Stove knobs accessible to children", + "file": "child_07_stove_knobs.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any kitchen hazards accessible to small children.", + "expectedKeywords": [ + "stove", + "knob", + "child", + "reach", + "gas", + "burner", + "kitchen", + "turn", + "accessible" + ] + }, + { + "id": "child_08", + "category": "child_safety", + "name": "Open window without guard", + "file": "child_08_open_window.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for fall hazards from windows, especially for children.", + "expectedKeywords": [ + "window", + "open", + "guard", + "child", + "fall", + "screen", + "height", + "safety" + ] + }, + { + "id": "falling_01", + "category": "falling_objects", + "name": "Heavy items on high shelf", + "file": "falling_01_heavy_high_shelf.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any heavy objects stored unsafely at height.", + "expectedKeywords": [ + "heavy", + "shelf", + "high", + "fall", + "weight", + "above", + "stack", + "danger", + "top" + ] + }, + { + "id": "falling_02", + "category": "falling_objects", + "name": "Precariously stacked boxes", + "file": "falling_02_stacked_boxes.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for unstable stacking that could collapse.", + "expectedKeywords": [ + "stack", + "box", + "unstable", + "fall", + "lean", + "collapse", + "precarious", + "pile" + ] + }, + { + "id": "falling_03", + "category": "falling_objects", + "name": "Leaning bookshelf", + "file": "falling_03_leaning_bookshelf.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for furniture stability issues.", + "expectedKeywords": [ + "bookshelf", + "lean", + "tilt", + "unstable", + "fall", + "tip", + "heavy", + "book" + ] + }, + { + "id": "falling_04", + "category": "falling_objects", + "name": "Items on fridge top near edge", + "file": "falling_04_fridge_top.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Identify any objects that could fall from on top of appliances.", + "expectedKeywords": [ + "fridge", + "top", + "edge", + "fall", + "item", + "refrigerator", + "above", + "precarious" + ] + }, + { + "id": "falling_05", + "category": "falling_objects", + "name": "Tools on overhead rack", + "file": "falling_05_overhead_tools.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for overhead storage hazards.", + "expectedKeywords": [ + "tool", + "overhead", + "rack", + "hang", + "fall", + "heavy", + "above", + "mount" + ] + }, + { + "id": "falling_06", + "category": "falling_objects", + "name": "Unsecured wall-mount TV", + "file": "falling_06_unsecured_tv.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for improperly mounted electronics that could fall.", + "expectedKeywords": [ + "tv", + "mount", + "wall", + "unsecured", + "television", + "fall", + "hang", + "loose", + "tilt" + ] + }, + { + "id": "falling_07", + "category": "falling_objects", + "name": "Overloaded coat rack", + "file": "falling_07_overloaded_coatrack.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check for overloaded freestanding furniture.", + "expectedKeywords": [ + "coat", + "rack", + "overload", + "heavy", + "tip", + "fall", + "hang", + "lean", + "weight" + ] + }, + { + "id": "falling_08", + "category": "falling_objects", + "name": "Ceiling fan with loose blade", + "file": "falling_08_loose_fan_blade.png", + "prompt": "You are a home safety inspector analyzing an indoor security camera frame. Check the ceiling fan for any maintenance issues.", + "expectedKeywords": [ + "fan", + "blade", + "ceiling", + "loose", + "wobble", + "hang", + "detach", + "danger" + ] + } + ] +} \ No newline at end of file diff --git a/skills/analysis/homesafe-bench/package-lock.json b/skills/analysis/homesafe-bench/package-lock.json new file mode 100644 index 0000000..a774f40 --- /dev/null +++ b/skills/analysis/homesafe-bench/package-lock.json @@ -0,0 +1,37 @@ +{ + "name": "homesafe-bench", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "homesafe-bench", + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "openai": "^6.27.0" + } + }, + "node_modules/openai": { + "version": "6.27.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-6.27.0.tgz", + "integrity": "sha512-osTKySlrdYrLYTt0zjhY8yp0JUBmWDCN+Q+QxsV4xMQnnoVFpylgKGgxwN8sSdTNw0G4y+WUXs4eCMWpyDNWZQ==", + "license": "Apache-2.0", + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + } + } +} diff --git a/skills/analysis/homesafe-bench/package.json b/skills/analysis/homesafe-bench/package.json new file mode 100644 index 0000000..1d96d2b --- /dev/null +++ b/skills/analysis/homesafe-bench/package.json @@ -0,0 +1,21 @@ +{ + "name": "homesafe-bench", + "version": "1.0.0", + "description": "VLM indoor safety hazard detection benchmark", + "main": "scripts/run-benchmark.cjs", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [ + "benchmark", + "vlm", + "safety", + "hazard" + ], + "author": "SharpAI", + "license": "ISC", + "type": "commonjs", + "dependencies": { + "openai": "^6.27.0" + } +} \ No newline at end of file diff --git a/skills/analysis/homesafe-bench/scripts/run-benchmark.cjs b/skills/analysis/homesafe-bench/scripts/run-benchmark.cjs new file mode 100644 index 0000000..450b39c --- /dev/null +++ b/skills/analysis/homesafe-bench/scripts/run-benchmark.cjs @@ -0,0 +1,545 @@ +#!/usr/bin/env node +/** + * HomeSafe-Bench β€” Indoor Safety Hazard Detection Benchmark + * + * Evaluates VLM models on indoor home safety hazard detection across 5 categories: + * - Fire/Smoke, Electrical, Trip/Fall, Child Safety, Falling Objects + * + * Inspired by HomeSafeBench (arXiv 2509.23690), adapted for static indoor cameras. + * + * ## Skill Protocol (when spawned by Aegis) + * + * Aegis β†’ Skill (env vars): + * AEGIS_VLM_URL β€” VLM server URL (e.g. http://localhost:5405) + * AEGIS_SKILL_PARAMS β€” JSON params from skill config + * AEGIS_SKILL_ID β€” Skill ID + * + * Skill β†’ Aegis (stdout, JSON lines): + * {"event": "ready", "vlm": "SmolVLM-500M"} + * {"event": "suite_start", "suite": "πŸ”₯ Fire / Smoke"} + * {"event": "test_result", "suite": "...", "test": "...", "status": "pass", "timeMs": 4500} + * {"event": "suite_end", "suite": "...", "passed": 7, "failed": 1} + * {"event": "complete", "passed": 36, "total": 40, "timeMs": 180000} + * + * Standalone usage: + * node run-benchmark.cjs [options] + * --vlm URL VLM server (required) + * --mode MODE full or quick (default: full) + * --out DIR Results directory + * --no-open Don't auto-open report + */ + +const fs = require('fs'); +const path = require('path'); +const os = require('os'); +const { execSync } = require('child_process'); + +// ─── Config: Aegis env vars β†’ CLI args β†’ defaults ──────────────────────────── + +const args = process.argv.slice(2); +function getArg(name, defaultVal) { + const idx = args.indexOf(`--${name}`); + if (idx === -1) return defaultVal; + return args[idx + 1] || defaultVal; +} + +// ─── Help ───────────────────────────────────────────────────────────────────── +if (args.includes('--help') || args.includes('-h')) { + console.log(` +HomeSafe-Bench β€” Indoor Safety Hazard Detection Benchmark β€’ DeepCamera / SharpAI + +Inspired by HomeSafeBench (arXiv 2509.23690) + +Usage: node scripts/run-benchmark.cjs [options] + +Options: + --vlm URL VLM server base URL (required) + --mode MODE full or quick (default: full) + --out DIR Results output directory (default: ~/.aegis-ai/homesafe-benchmarks) + --no-open Don't auto-open report in browser + -h, --help Show this help message + +Environment Variables (set by Aegis): + AEGIS_VLM_URL VLM server base URL + AEGIS_SKILL_ID Skill identifier (enables skill mode) + AEGIS_SKILL_PARAMS JSON params from skill config + +Categories: Fire/Smoke, Electrical, Trip/Fall, Child Safety, Falling Objects + `.trim()); + process.exit(0); +} + +// Parse skill parameters if running as Aegis skill +let skillParams = {}; +try { skillParams = JSON.parse(process.env.AEGIS_SKILL_PARAMS || '{}'); } catch { } + +const VLM_URL = process.env.AEGIS_VLM_URL || getArg('vlm', ''); +const VLM_MODEL = process.env.AEGIS_VLM_MODEL || ''; +const RESULTS_DIR = getArg('out', path.join(os.homedir(), '.aegis-ai', 'homesafe-benchmarks')); +const IS_SKILL_MODE = !!process.env.AEGIS_SKILL_ID; +const NO_OPEN = args.includes('--no-open') || skillParams.noOpen || false; +const FIXTURES_DIR = path.join(__dirname, '..', 'fixtures'); +const FRAMES_DIR = path.join(FIXTURES_DIR, 'frames'); +const IDLE_TIMEOUT_MS = 120000; // 2 minutes β€” safety scenarios may need more analysis + +// Mode (full = 40 tests, quick = 10 tests β€” 2 per category) +const TEST_MODE = skillParams.mode || getArg('mode', 'full'); + +// ─── OpenAI SDK Client ────────────────────────────────────────────────────── +const OpenAI = require('openai'); + +const strip = (u) => u.replace(/\/v1\/?$/, ''); +const vlmClient = VLM_URL ? new OpenAI({ + apiKey: 'not-needed', + baseURL: `${strip(VLM_URL)}/v1`, +}) : null; + +// ─── Skill Protocol: JSON lines on stdout, human text on stderr ────────────── + +function emit(event) { + process.stdout.write(JSON.stringify(event) + '\n'); +} + +function log(msg) { + process.stderr.write(msg + '\n'); +} + +// ─── Test Framework ─────────────────────────────────────────────────────────── + +const suites = []; +let currentSuite = null; + +function suite(name, fn) { + suites.push({ name, fn, tests: [] }); +} + +const results = { + timestamp: new Date().toISOString(), + vlm: VLM_URL || null, + system: {}, + model: {}, + suites: [], + totals: { passed: 0, failed: 0, skipped: 0, total: 0, timeMs: 0 }, + tokenTotals: { prompt: 0, completion: 0, total: 0 }, +}; + +async function vlmCall(messages, opts = {}) { + if (!vlmClient) { + throw new Error('VLM client not configured β€” pass --vlm URL'); + } + + const model = opts.model || VLM_MODEL || undefined; + + const params = { + messages, + stream: true, + ...(model && { model }), + ...(opts.temperature !== undefined && { temperature: opts.temperature }), + max_completion_tokens: opts.maxTokens || 512, + }; + + const controller = new AbortController(); + const idleMs = opts.timeout || IDLE_TIMEOUT_MS; + let idleTimer = setTimeout(() => controller.abort(), idleMs); + const resetIdle = () => { clearTimeout(idleTimer); idleTimer = setTimeout(() => controller.abort(), idleMs); }; + + try { + const stream = await vlmClient.chat.completions.create(params, { + signal: controller.signal, + }); + + let content = ''; + let reasoningContent = ''; + let streamModel = ''; + let usage = {}; + let tokenCount = 0; + + for await (const chunk of stream) { + resetIdle(); + if (chunk.model) streamModel = chunk.model; + const delta = chunk.choices?.[0]?.delta; + if (delta?.content) content += delta.content; + if (delta?.reasoning_content) reasoningContent += delta.reasoning_content; + if (delta?.content || delta?.reasoning_content) { + tokenCount++; + if (tokenCount % 100 === 0) { + log(` … ${tokenCount} tokens received`); + } + } + if (chunk.usage) usage = chunk.usage; + } + + if (!content && reasoningContent) { + content = reasoningContent; + } + + results.tokenTotals.prompt += usage.prompt_tokens || 0; + results.tokenTotals.completion += usage.completion_tokens || 0; + results.tokenTotals.total += usage.total_tokens || 0; + + if (!results.model.vlm && streamModel) results.model.vlm = streamModel; + + return { content, usage, model: streamModel }; + } finally { + clearTimeout(idleTimer); + } +} + +function stripThink(text) { + return text.replace(/[\s\S]*?<\/think>\s*/gi, '').trim(); +} + +function assert(condition, msg) { + if (!condition) throw new Error(msg || 'Assertion failed'); +} + +async function runSuites() { + for (const s of suites) { + currentSuite = { name: s.name, tests: [], passed: 0, failed: 0, skipped: 0, timeMs: 0 }; + log(`\n${'─'.repeat(60)}`); + log(` ${s.name}`); + log(`${'─'.repeat(60)}`); + emit({ event: 'suite_start', suite: s.name }); + + await s.fn(); + + results.suites.push(currentSuite); + results.totals.passed += currentSuite.passed; + results.totals.failed += currentSuite.failed; + results.totals.skipped += currentSuite.skipped; + results.totals.total += currentSuite.tests.length; + + emit({ event: 'suite_end', suite: s.name, passed: currentSuite.passed, failed: currentSuite.failed, skipped: currentSuite.skipped, timeMs: currentSuite.timeMs }); + } +} + +async function test(name, fn) { + const testResult = { name, status: 'pass', timeMs: 0, detail: '', tokens: {} }; + const start = Date.now(); + try { + const detail = await fn(); + testResult.timeMs = Date.now() - start; + testResult.detail = detail || ''; + currentSuite.passed++; + log(` βœ… ${name} (${testResult.timeMs}ms)${detail ? ` β€” ${detail}` : ''}`); + } catch (err) { + testResult.timeMs = Date.now() - start; + testResult.status = 'fail'; + testResult.detail = err.message; + currentSuite.failed++; + log(` ❌ ${name} (${testResult.timeMs}ms) β€” ${err.message}`); + } + currentSuite.timeMs += testResult.timeMs; + currentSuite.tests.push(testResult); + emit({ event: 'test_result', suite: currentSuite.name, test: name, status: testResult.status, timeMs: testResult.timeMs, detail: testResult.detail.slice(0, 120) }); +} + +function skip(name, reason) { + currentSuite.skipped++; + currentSuite.tests.push({ name, status: 'skip', timeMs: 0, detail: reason }); + log(` ⏭️ ${name} β€” ${reason}`); + emit({ event: 'test_result', suite: currentSuite.name, test: name, status: 'skip', timeMs: 0, detail: reason }); +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// DISK SPACE CHECK +// ═══════════════════════════════════════════════════════════════════════════════ + +function checkDiskSpace(targetDir, requiredGB) { + try { + fs.mkdirSync(targetDir, { recursive: true }); + const dfOutput = execSync(`df -k "${targetDir}"`, { encoding: 'utf8' }); + const lines = dfOutput.trim().split('\n'); + if (lines.length >= 2) { + const parts = lines[1].split(/\s+/); + const availableKB = parseInt(parts[3], 10); + if (!isNaN(availableKB)) { + const availableGB = availableKB / (1024 * 1024); + if (availableGB < requiredGB) { + log(` ❌ Insufficient disk space`); + log(` Required: ${requiredGB.toFixed(1)} GB`); + log(` Available: ${availableGB.toFixed(1)} GB`); + log(` Location: ${targetDir}`); + emit({ event: 'error', message: `Insufficient disk space: need ${requiredGB}GB, have ${availableGB.toFixed(1)}GB` }); + process.exit(1); + } + log(` πŸ’Ύ Disk: ${availableGB.toFixed(1)} GB available (need ${requiredGB} GB) βœ“`); + return availableGB; + } + } + } catch (err) { + log(` ⚠️ Could not check disk space: ${err.message} β€” proceeding anyway`); + } + return -1; +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// DATASET MANAGEMENT +// ═══════════════════════════════════════════════════════════════════════════════ + +/** + * Check if upstream HomeSafeBench dataset is available for download. + * When the academic dataset becomes publicly available, this function + * will download it to ~/.aegis-ai/datasets/homesafe-bench/. + * + * Until then, the skill uses AI-generated fixture images from fixtures/frames/. + */ +function checkUpstreamDataset() { + const datasetDir = path.join(os.homedir(), '.aegis-ai', 'datasets', 'homesafe-bench'); + const markerFile = path.join(datasetDir, '.downloaded'); + + if (fs.existsSync(markerFile)) { + log(` πŸ“‚ Upstream dataset cached at: ${datasetDir}`); + return datasetDir; + } + + // Upstream not available yet β€” use bundled AI-generated fixtures + log(` ℹ️ Upstream HomeSafeBench dataset not yet public (arXiv 2509.23690)`); + log(` Using bundled AI-generated fixture images`); + return null; +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// VLM EVALUATION +// ═══════════════════════════════════════════════════════════════════════════════ + +async function vlmAnalyze(framePath, prompt) { + const imageData = fs.readFileSync(framePath); + const base64 = imageData.toString('base64'); + const mimeType = framePath.endsWith('.png') ? 'image/png' : 'image/jpeg'; + + const r = await vlmCall([{ + role: 'user', + content: [ + { type: 'image_url', image_url: { url: `data:${mimeType};base64,${base64}` } }, + { type: 'text', text: prompt }, + ], + }], { maxTokens: 512 }); + + return stripThink(r.content); +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// SUITE BUILDER +// ═══════════════════════════════════════════════════════════════════════════════ + +function loadScenarios() { + const data = JSON.parse(fs.readFileSync(path.join(FIXTURES_DIR, 'scenarios.json'), 'utf8')); + + // Group scenarios by category + const byCategory = {}; + for (const cat of data.categories) { + byCategory[cat.id] = { + name: cat.name, + emoji: cat.emoji, + scenarios: [], + }; + } + + for (const scenario of data.scenarios) { + if (byCategory[scenario.category]) { + byCategory[scenario.category].scenarios.push(scenario); + } + } + + // Apply quick mode β€” keep 2 per category + if (TEST_MODE === 'quick') { + for (const cat of Object.values(byCategory)) { + cat.scenarios = cat.scenarios.slice(0, 2); + } + } + + return byCategory; +} + +function buildSuites(byCategory) { + for (const [catId, cat] of Object.entries(byCategory)) { + if (cat.scenarios.length === 0) continue; + + suite(`${cat.emoji} ${cat.name}`, async () => { + for (const scenario of cat.scenarios) { + await test(scenario.name, async () => { + const framePath = path.join(FRAMES_DIR, scenario.file); + + if (!fs.existsSync(framePath)) { + skip(scenario.name, `Frame missing: ${scenario.file}`); + throw new Error(`Frame file not found: ${scenario.file}`); + } + + const desc = await vlmAnalyze(framePath, scenario.prompt); + const lower = desc.toLowerCase(); + const matched = scenario.expectedKeywords.some(kw => lower.includes(kw.toLowerCase())); + + assert(matched, + `Expected one of [${scenario.expectedKeywords.slice(0, 4).join(', ')}...] in: "${desc.slice(0, 80)}"`); + + const hits = scenario.expectedKeywords.filter(kw => lower.includes(kw.toLowerCase())); + return `${desc.length} chars, matched: ${hits.join(', ')} βœ“`; + }); + } + }); + } +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// MAIN +// ═══════════════════════════════════════════════════════════════════════════════ + +async function main() { + log(''); + log(' ╔══════════════════════════════════════════════════════════════╗'); + log(' β•‘ HomeSafe-Bench β€” Indoor Safety Hazard Detection Benchmark β•‘'); + log(' β•‘ Inspired by HomeSafeBench (arXiv 2509.23690) β•‘'); + log(' β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•'); + log(''); + + // Check VLM + if (!VLM_URL) { + log(' ❌ VLM server URL required. Pass --vlm http://localhost:5405'); + log(' This is a VLM-only benchmark (indoor safety image analysis).'); + emit({ event: 'error', message: 'VLM server URL required' }); + process.exit(1); + } + + // Disk space check (minimal β€” bundled frames are <50MB, dataset download ~20GB when available) + checkDiskSpace(RESULTS_DIR, 0.1); + + // Check for upstream dataset (will use bundled fixtures if not available) + checkUpstreamDataset(); + + // System info + results.system = { + platform: `${os.platform()} ${os.arch()}`, + cpus: os.cpus()[0]?.model || 'unknown', + totalRAM_GB: (os.totalmem() / 1073741824).toFixed(1), + node: process.version, + }; + + log(` VLM: ${VLM_URL}`); + log(` Mode: ${TEST_MODE} (${TEST_MODE === 'quick' ? '10' : '40'} tests)`); + log(` Frames: ${FRAMES_DIR}`); + log(` Results: ${RESULTS_DIR}`); + log(` System: ${results.system.cpus} (${results.system.totalRAM_GB} GB RAM)`); + + // VLM healthcheck + try { + const ping = await vlmCall([ + { role: 'user', content: 'ping' }, + ], { maxTokens: 5, timeout: 10000 }); + results.model.vlm = ping.model || 'unknown'; + log(` VLM Model: ${results.model.vlm}`); + } catch (err) { + log(`\n ❌ Cannot reach VLM endpoint: ${err.message}`); + log(` URL: ${VLM_URL}`); + log(' Check that the VLM server is running.\n'); + emit({ event: 'error', message: `Cannot reach VLM endpoint: ${err.message}` }); + process.exit(1); + } + + // Emit ready event + emit({ + event: 'ready', + vlm: results.model.vlm, + system: results.system.cpus, + mode: TEST_MODE, + }); + + // Check that fixture frames exist + if (!fs.existsSync(FRAMES_DIR)) { + log(`\n ❌ Frames directory not found: ${FRAMES_DIR}`); + log(' Run the image generation step first.'); + emit({ event: 'error', message: 'Frames directory not found' }); + process.exit(1); + } + + const frameCount = fs.readdirSync(FRAMES_DIR).filter(f => f.endsWith('.png')).length; + log(` Frames: ${frameCount} PNG files loaded`); + + // Load scenarios and build test suites + const byCategory = loadScenarios(); + const totalTests = Object.values(byCategory).reduce((n, cat) => n + cat.scenarios.length, 0); + log(`\n πŸ“Š ${totalTests} tests across ${Object.keys(byCategory).length} categories\n`); + + buildSuites(byCategory); + + // Run all suites + const suiteStart = Date.now(); + await runSuites(); + results.totals.timeMs = Date.now() - suiteStart; + + // Summary + const { passed, failed, skipped, total, timeMs } = results.totals; + const tokPerSec = timeMs > 0 ? ((results.tokenTotals.total / (timeMs / 1000)).toFixed(1)) : '?'; + + log(`\n${'═'.repeat(66)}`); + log(` RESULTS: ${passed}/${total} passed, ${failed} failed, ${skipped} skipped (${(timeMs / 1000).toFixed(1)}s)`); + log(` TOKENS: ${results.tokenTotals.total} total (${tokPerSec} tok/s)`); + log(` MODEL: ${results.model.vlm || 'unknown'}`); + + // Compare with academic benchmark + log(`\n πŸ“ Academic reference (HomeSafeBench, best model):`); + log(` F1-score: 10.23% β€” current VLMs struggle significantly with safety hazards`); + log(` Your score: ${total > 0 ? ((passed / total * 100).toFixed(1) + '%') : 'N/A'} pass rate`); + log(`${'═'.repeat(66)}`); + + if (failed > 0) { + log('\n Failures:'); + for (const s of results.suites) { + for (const t of s.tests) { + if (t.status === 'fail') log(` ❌ ${s.name} > ${t.name}: ${t.detail}`); + } + } + } + + // Save results + fs.mkdirSync(RESULTS_DIR, { recursive: true }); + const modelSlug = (results.model.vlm || 'unknown').replace(/[^a-zA-Z0-9_.-]/g, '_'); + const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const resultFile = path.join(RESULTS_DIR, `${modelSlug}_${ts}.json`); + fs.writeFileSync(resultFile, JSON.stringify(results, null, 2)); + log(`\n Results saved: ${resultFile}`); + + // Update index + const indexFile = path.join(RESULTS_DIR, 'index.json'); + let index = []; + try { index = JSON.parse(fs.readFileSync(indexFile, 'utf8')); } catch { } + index.push({ + file: path.basename(resultFile), + model: results.model.vlm || 'unknown', + timestamp: results.timestamp, + passed, failed, total, + passRate: total > 0 ? passed / total : 0, + timeMs, + tokens: results.tokenTotals.total, + }); + fs.writeFileSync(indexFile, JSON.stringify(index, null, 2)); + + // Emit completion + emit({ + event: 'complete', + model: results.model.vlm, + passed, failed, skipped, total, + timeMs, + passRate: total > 0 ? passed / total : 0, + tokens: results.tokenTotals.total, + tokPerSec: parseFloat(tokPerSec) || 0, + resultFile, + }); + + log(''); + process.exit(failed > 0 ? 1 : 0); +} + +// Run when executed directly +const isDirectRun = require.main === module || + (process.argv[1] && require('path').resolve(process.argv[1]) === __filename); + +if (isDirectRun) { + main().catch(err => { + log(`Fatal: ${err.message}`); + emit({ event: 'error', message: err.message }); + process.exit(1); + }); +} + +module.exports = { main }; diff --git a/skills/analysis/smarthome-bench/SKILL.md b/skills/analysis/smarthome-bench/SKILL.md new file mode 100644 index 0000000..6140bd6 --- /dev/null +++ b/skills/analysis/smarthome-bench/SKILL.md @@ -0,0 +1,159 @@ +--- +name: SmartHome Video Anomaly Benchmark +description: VLM evaluation suite for video anomaly detection in smart home camera footage +version: 1.0.0 +category: analysis +runtime: node +entry: scripts/run-benchmark.cjs +install: npm + +requirements: + node: ">=18" + npm_install: true + system_deps: [yt-dlp, ffmpeg] + platforms: ["linux", "macos", "windows"] +--- + +# SmartHome Video Anomaly Benchmark + +Benchmark suite evaluating VLM models on **video anomaly detection** across **7 smart home categories** β€” based on the [SmartHome-Bench](https://github.com/Xinyi-0724/SmartHome-Bench-LLM) dataset (1,203 YouTube smart home camera clips). + +> **Key differentiator from HomeSec-Bench**: This benchmark requires **video understanding** (multi-frame sampling), not single-frame analysis. + +## Setup + +**Requires `npm install` + system dependencies.** The deployment agent should: +1. Check for `yt-dlp` and `ffmpeg` in PATH +2. Run `npm install` in the skill directory + +Entry script: `scripts/run-benchmark.cjs` + +### Verification + +```bash +node scripts/run-benchmark.cjs --help +``` + +## Quick Start + +### As an Aegis Skill (automatic) + +When spawned by Aegis, configuration is injected via environment variables. The benchmark downloads video clips, samples frames, evaluates with VLM, and generates an HTML report. + +### Standalone + +```bash +# Run with local VLM (subset mode, 50 videos) +node scripts/run-benchmark.cjs --vlm http://localhost:5405 + +# Quick test with 10 videos +node scripts/run-benchmark.cjs --vlm http://localhost:5405 --max-videos 10 + +# Full benchmark (all curated clips) +node scripts/run-benchmark.cjs --vlm http://localhost:5405 --mode full + +# Filter by category +node scripts/run-benchmark.cjs --vlm http://localhost:5405 --categories "Wildlife,Security" + +# Skip download (re-evaluate cached videos) +node scripts/run-benchmark.cjs --vlm http://localhost:5405 --skip-download + +# Skip report auto-open +node scripts/run-benchmark.cjs --vlm http://localhost:5405 --no-open +``` + +## Configuration + +### Environment Variables (set by Aegis) + +| Variable | Default | Description | +|----------|---------|-------------| +| `AEGIS_VLM_URL` | *(required)* | VLM server base URL | +| `AEGIS_VLM_MODEL` | β€” | Loaded VLM model ID | +| `AEGIS_SKILL_ID` | β€” | Skill identifier (enables skill mode) | +| `AEGIS_SKILL_PARAMS` | `{}` | JSON params from skill config | + +> **Note**: This is a VLM-only benchmark. An LLM gateway is not required. + +### User Configuration (config.yaml) + +This skill includes a [`config.yaml`](config.yaml) that defines user-configurable parameters. Aegis parses this at install time and renders a config panel in the UI. Values are delivered via `AEGIS_SKILL_PARAMS`. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `mode` | select | `subset` | Which clips to evaluate: `subset` (~50 clips) or `full` (all ~105 curated clips) | +| `maxVideos` | number | `50` | Maximum number of videos to evaluate | +| `categories` | text | `all` | Comma-separated category filter (e.g. `Wildlife,Security`) | +| `noOpen` | boolean | `false` | Skip auto-opening the HTML report in browser | + +### CLI Arguments (standalone fallback) + +| Argument | Default | Description | +|----------|---------|-------------| +| `--vlm URL` | *(required)* | VLM server base URL | +| `--out DIR` | `~/.aegis-ai/smarthome-bench` | Results directory | +| `--max-videos N` | `50` | Max videos to evaluate | +| `--mode MODE` | `subset` | `subset` or `full` | +| `--categories LIST` | `all` | Comma-separated category filter | +| `--skip-download` | β€” | Skip video download, use cached | +| `--no-open` | β€” | Don't auto-open report in browser | +| `--report` | *(auto in skill mode)* | Force report generation | + +## Protocol + +### Aegis β†’ Skill (env vars) +``` +AEGIS_VLM_URL=http://localhost:5405 +AEGIS_SKILL_ID=smarthome-bench +AEGIS_SKILL_PARAMS={} +``` + +### Skill β†’ Aegis (stdout, JSON lines) +```jsonl +{"event": "ready", "model": "SmolVLM2-2.2B", "system": "Apple M3"} +{"event": "suite_start", "suite": "Wildlife"} +{"event": "test_result", "suite": "Wildlife", "test": "smartbench_0003", "status": "pass", "timeMs": 4500} +{"event": "suite_end", "suite": "Wildlife", "passed": 12, "failed": 3} +{"event": "complete", "passed": 78, "total": 105, "timeMs": 480000, "reportPath": "/path/to/report.html"} +``` + +Human-readable output goes to **stderr** (visible in Aegis console tab). + +## Test Suites (7 Categories) + +| Suite | Description | Anomaly Examples | +|-------|-------------|------------------| +| 🦊 Wildlife | Wild animals near home cameras | Bear on porch, deer in garden, coyote at night | +| πŸ‘΄ Senior Care | Elderly activity monitoring | Falls, wandering, unusual inactivity | +| πŸ‘Ά Baby Monitoring | Infant/child safety | Stroller rolling, child climbing, unsupervised | +| 🐾 Pet Monitoring | Pet behavior detection | Pet illness, escaped pets, unusual behavior | +| πŸ”’ Home Security | Intrusion & suspicious activity | Break-ins, trespassing, porch pirates | +| πŸ“¦ Package Delivery | Package arrival & theft | Stolen packages, misdelivered, weather damage | +| 🏠 General Activity | General smart home events | Unusual hours activity, appliance issues | + +Each clip is evaluated for **binary anomaly detection**: the VLM predicts normal (0) or abnormal (1), compared against expert annotations. + +## Metrics + +Per-category and overall: +- **Accuracy** β€” correct predictions / total +- **Precision** β€” true positives / predicted positives +- **Recall** β€” true positives / actual positives +- **F1-Score** β€” harmonic mean of precision & recall +- **Confusion Matrix** β€” TP, FP, TN, FN breakdown + +## Results + +Results are saved to `~/.aegis-ai/smarthome-bench/` as JSON. An HTML report with per-category breakdown, confusion matrix, and model comparison is auto-generated. + +## Requirements + +- Node.js β‰₯ 18 +- `npm install` (for `openai` SDK dependency) +- `yt-dlp` (video download from YouTube) +- `ffmpeg` (frame extraction from video clips) +- Running VLM server (must support multi-image input) + +## Citation + +Based on [SmartHome-Bench: A Comprehensive Benchmark for Video Anomaly Detection in Smart Homes Using Multi-Modal Foundation Models](https://arxiv.org/abs/2506.12992). diff --git a/skills/analysis/smarthome-bench/config.yaml b/skills/analysis/smarthome-bench/config.yaml new file mode 100644 index 0000000..6b973d9 --- /dev/null +++ b/skills/analysis/smarthome-bench/config.yaml @@ -0,0 +1,25 @@ +params: + - key: mode + label: Evaluation Mode + type: select + options: [subset, full] + default: subset + description: "Which clips to evaluate: subset (~50 videos) or full (all ~105 curated clips)" + + - key: maxVideos + label: Max Videos + type: number + default: 50 + description: Maximum number of videos to evaluate (overrides mode) + + - key: categories + label: Categories + type: text + default: all + description: "Comma-separated category filter, e.g. Wildlife,Security (default: all)" + + - key: noOpen + label: Don't auto-open report + type: boolean + default: false + description: Skip opening the HTML report in browser after completion diff --git a/skills/analysis/smarthome-bench/deploy.sh b/skills/analysis/smarthome-bench/deploy.sh new file mode 100755 index 0000000..166d29b --- /dev/null +++ b/skills/analysis/smarthome-bench/deploy.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# SmartHome-Bench deployment script +# Called by Aegis deployment agent during skill installation + +set -e + +SKILL_DIR="$(cd "$(dirname "$0")" && pwd)" +echo "πŸ“¦ Deploying SmartHome-Bench from: $SKILL_DIR" + +# ── Check system dependencies ───────────────────────────────────────────────── + +echo "πŸ” Checking system dependencies..." + +if ! command -v yt-dlp &>/dev/null; then + echo "⚠️ yt-dlp not found. Attempting install..." + if command -v brew &>/dev/null; then + brew install yt-dlp + elif command -v pip3 &>/dev/null; then + pip3 install yt-dlp + elif command -v apt-get &>/dev/null; then + sudo apt-get install -y yt-dlp 2>/dev/null || pip3 install yt-dlp + else + echo "❌ Cannot install yt-dlp automatically. Please install manually:" + echo " pip install yt-dlp OR brew install yt-dlp" + exit 1 + fi +fi +echo " βœ… yt-dlp: $(yt-dlp --version)" + +if ! command -v ffmpeg &>/dev/null; then + echo "⚠️ ffmpeg not found. Attempting install..." + if command -v brew &>/dev/null; then + brew install ffmpeg + elif command -v apt-get &>/dev/null; then + sudo apt-get install -y ffmpeg + else + echo "❌ Cannot install ffmpeg automatically. Please install manually:" + echo " brew install ffmpeg OR apt-get install ffmpeg" + exit 1 + fi +fi +echo " βœ… ffmpeg: $(ffmpeg -version 2>&1 | head -1)" + +# ── Install npm dependencies ────────────────────────────────────────────────── + +echo "πŸ“¦ Installing npm dependencies..." +cd "$SKILL_DIR" +npm install --production + +echo "βœ… SmartHome-Bench deployed successfully" diff --git a/skills/analysis/smarthome-bench/fixtures/annotations.json b/skills/analysis/smarthome-bench/fixtures/annotations.json new file mode 100644 index 0000000..c28f142 --- /dev/null +++ b/skills/analysis/smarthome-bench/fixtures/annotations.json @@ -0,0 +1,794 @@ +[ + { + "id": "smartbench_0001", + "category": "Baby Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=fVVAeFISp2c", + "description": "Child and adult riding tricycles in a suburban driveway. Girl in pink shirt near basketball hoop. Toys scattered. Supervised play.", + "reasoning": "Children playing in a driveway is common. Adult supervision reinforces normalcy." + }, + { + "id": "smartbench_0002", + "category": "Baby Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=ayzKVfJX5Wg", + "description": "Woman pushes stroller into entryway. Stroller reverses and rolls backward down steps. Woman chases and catches stroller.", + "reasoning": "Stroller unexpectedly rolled backward, potentially endangering the baby. Adults intervened quickly." + }, + { + "id": "smartbench_0050", + "category": "Baby Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=whDxqUY7Pok", + "description": "Boy in yellow helmet approaches front door. Person on camera tells boy he shrunk inside camera. Boy presses face to lens. Girl also looks. Playful interaction.", + "reasoning": "Fun interaction between camera user and children. Environment calm and safe." + }, + { + "id": "smartbench_0051", + "category": "Baby Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=rGP9bXOrMZU", + "description": "Toddler walks alone near pool area. No adult supervision visible. Child approaches water edge.", + "reasoning": "Unsupervised toddler near water is a safety hazard. This is considered an anomaly." + }, + { + "id": "smartbench_0052", + "category": "Baby Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=Y8ghImmBjlM", + "description": "Children playing in backyard with toys. Parents visible nearby. Normal daytime activity.", + "reasoning": "Standard supervised outdoor play. No concerns." + }, + { + "id": "smartbench_0053", + "category": "Baby Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=jsuqBMVeyeo", + "description": "Child climbs on furniture near window. Reaches for curtain rod. No adult present in frame.", + "reasoning": "Child climbing near window without supervision poses fall risk. Anomaly detected." + }, + { + "id": "smartbench_0054", + "category": "Baby Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=E-KX42Baw18", + "description": "Mother carrying baby walks through living room. Sets baby in playpen. Normal indoor routine.", + "reasoning": "Standard childcare activity. Safe environment." + }, + { + "id": "smartbench_0055", + "category": "Baby Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=Fgq6XQUEK4I", + "description": "Baby crawling toward stairs without gate. No supervision visible.", + "reasoning": "Unsupervised infant near stairs is a fall hazard. This is an anomaly." + }, + { + "id": "smartbench_0003", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=TJ3OahLUahw", + "description": "Wild turkey on back porch railing, facing door. Moves head side to side surveying area. Grill and propane tank in background. Overcast.", + "reasoning": "Wild turkey unexpectedly appeared on porch. No human presence. Not domesticated." + }, + { + "id": "smartbench_0005", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=l5NFMDdYbM0", + "description": "Male mallard duck walks on concrete pathway toward camera. Camera announces recording. Duck startled and walks away.", + "reasoning": "Duck of unknown domestication appears at porch, attempting to enter. Considered anomaly." + }, + { + "id": "smartbench_0006", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=U9qHetMzD4s", + "description": "Coyote face close-up at night in black and white. Camera announces recording. Coyote retreats into snowy yard bordered by forest.", + "reasoning": "Coyote appears at camera, potentially dangerous wildlife near home. Anomaly." + }, + { + "id": "smartbench_0007", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=GfNH24LsSh8", + "description": "Black bear in backyard at night. Pushes over trash can, rummages through spilled contents. Alone on grass.", + "reasoning": "Bear rummaging through trash can cause property damage. Anomaly." + }, + { + "id": "smartbench_0008", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=bIyAGCc3RD8", + "description": "Gray squirrel approaches front entrance. Climbs railing, jumps down steps, scurries away. Overcast, wet streets.", + "reasoning": "Squirrel interacting with doorbell/camera. Minor wildlife anomaly." + }, + { + "id": "smartbench_0047", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=yb9pu5uickk", + "description": "Night vision: raccoon enters from left, approaches fruit on ground, grabs it, turns toward patio.", + "reasoning": "Raccoon carried away food. Property damage potential. Anomaly." + }, + { + "id": "smartbench_0145", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=TNDtycEzUYo", + "description": "Night: four deer approach wooden deck. Two spar playfully on lawn, rearing up. Then all graze calmly.", + "reasoning": "Wild deer around house could damage property. Anomaly." + }, + { + "id": "smartbench_0146", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=XXrIIU-Org0", + "description": "Night: large antlered deer crosses driveway followed by two spotted fawns. Deer walks leisurely.", + "reasoning": "Multiple deer at front door. Wild animals could damage property. Anomaly." + }, + { + "id": "smartbench_0278", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=GqWCH3AcRII", + "description": "Night: mother bear and cub walk across paved driveway. Cub walks ahead, mother follows. Disappear into darkness.", + "reasoning": "Bears near house pose threat to property and safety. Anomaly." + }, + { + "id": "smartbench_0279", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=jX7wSFVuStI", + "description": "Night: snake slithers along stone wall on porch. Camera night vision. Slow movement along wall top.", + "reasoning": "Snake at house entrance. Wild animal could be dangerous. Anomaly." + }, + { + "id": "smartbench_0281", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=Z__Wg1fMXMA", + "description": "Black bear at front door in daytime. Stands on hind legs, peers over wall, pushes on door trying to open it. Walks away.", + "reasoning": "Bear attempting to break in. Dangerous wildlife. Anomaly." + }, + { + "id": "smartbench_0046", + "category": "General Activity", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=LLxrNx-Wkq4", + "description": "Night residential street. Bright green meteor streaks across sky left to right. No other movement. Still environment.", + "reasoning": "Natural phenomenon (meteor). Typical night scene. Normal." + }, + { + "id": "smartbench_0056", + "category": "General Activity", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=-k64PIAB3Hw", + "description": "Daytime front porch. Mail carrier delivers package, waves at camera. Normal delivery routine.", + "reasoning": "Standard mail delivery. No concerns." + }, + { + "id": "smartbench_0057", + "category": "General Activity", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=YSlCea9IOt8", + "description": "Late night. Person wanders in front yard appearing disoriented. Stumbles, sits on lawn.", + "reasoning": "Unusual late-night activity. Person appears disoriented. Anomaly." + }, + { + "id": "smartbench_0058", + "category": "General Activity", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=TnjOgP6IvU8", + "description": "Family gathering in backyard. Adults and children around grill. Daytime. Normal BBQ activity.", + "reasoning": "Typical family gathering. No anomalies." + }, + { + "id": "smartbench_0059", + "category": "General Activity", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=pPsjT_lvATQ", + "description": "Garage door opens and closes repeatedly at 3 AM. No person visible. Possible malfunction or unauthorized access.", + "reasoning": "Repeated garage door activation at unusual hours without visible cause. Anomaly." + }, + { + "id": "smartbench_0060", + "category": "General Activity", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=ogx7uoRRpNo", + "description": "Neighbor walking dog past house on sidewalk. Brief stop, dog sniffs lawn. Continue walking.", + "reasoning": "Normal neighborhood activity. No concerns." + }, + { + "id": "smartbench_0061", + "category": "General Activity", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=XuzHCZdubJE", + "description": "Sprinkler system activates at 2 AM flooding walkway. Water pooling near entrance.", + "reasoning": "Unusual water system activation at night. Possible malfunction. Anomaly." + }, + { + "id": "smartbench_0062", + "category": "General Activity", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=mBMPOdZYdb0", + "description": "Car pulls into driveway. Person exits, walks to front door with groceries. Normal arrival.", + "reasoning": "Standard homecoming routine. No anomalies." + }, + { + "id": "smartbench_0063", + "category": "General Activity", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=BnD3b20QZDQ", + "description": "Smoke visible from kitchen window. No fire alarm heard. Grows thicker over time.", + "reasoning": "Possible kitchen fire or smoke event. Safety concern. Anomaly." + }, + { + "id": "smartbench_0064", + "category": "General Activity", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=bXHsotibxNE", + "description": "Kids riding bicycles on sidewalk. Parents watching from porch. Sunny afternoon.", + "reasoning": "Normal supervised outdoor play. Safe environment." + }, + { + "id": "smartbench_0065", + "category": "General Activity", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=ewRg_VpmtRI", + "description": "Tree branch falls on parked car in driveway during storm. Visible damage to windshield.", + "reasoning": "Weather-related property damage. Safety hazard. Anomaly." + }, + { + "id": "smartbench_0004", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=k9si1UG8uLc", + "description": "Two individuals approach front door. One in red jacket, another in yellow bird costume with hospital foundation text. Mascot dances at camera.", + "reasoning": "Person in eccentric attire acting unusually at front door could frighten. Anomaly." + }, + { + "id": "smartbench_0048", + "category": "Home Security", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=tyBhNSRzOGI", + "description": "Man at front door with McDonald's bag. Calls boy named Cam. Boy appears. Playful interaction through doorbell camera with woman.", + "reasoning": "Fun light-hearted family interaction using doorbell camera. Normal." + }, + { + "id": "smartbench_0049", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=anH1uE2j7Qw", + "description": "Night: man in dark clothing approaches pickup truck in driveway. Tries driver door. Camera announces recording. Man runs away.", + "reasoning": "Attempted vehicle theft. Person fled after camera warning. Anomaly." + }, + { + "id": "smartbench_0280", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=YhSSxNNkeL0", + "description": "Night: individual approaches white SUV, opens driver door, searches inside. Voice says 'Hey!' Person closes door and runs.", + "reasoning": "Unauthorized vehicle access. Theft attempt. Anomaly." + }, + { + "id": "smartbench_0364", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=kAK6qLCbZ4k", + "description": "Night: man in dark clothing approaches blue car with car jack. Alarm goes off, voice yells 'Get out!' Man runs away.", + "reasoning": "Attempted tire theft from vehicle. Man fled when caught. Anomaly." + }, + { + "id": "smartbench_0366", + "category": "Home Security", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=M3zJnxVOUWE", + "description": "Two teenagers playing basketball in driveway. Kicking ball up. Girl makes shot through hoop. Sunny day.", + "reasoning": "Casual game of basketball. Normal everyday scene." + }, + { + "id": "smartbench_0533", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=0u5DS9-gPHE", + "description": "Night: person examines door on wooden deck. Moves picture frames. Reaches under camera. Suspicious rummaging.", + "reasoning": "Woman suspiciously rummaging at front door. Potential theft. Anomaly." + }, + { + "id": "smartbench_0534", + "category": "Home Security", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=mKRsMtdqUKs", + "description": "Woman in blue shirt approaches porch. Picks up cards, reads them smiling. Picks up flower bouquet in vase. Carries away excitedly.", + "reasoning": "Gift retrieval by resident. Calm, happy behavior. Normal." + }, + { + "id": "smartbench_0535", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=5-Jd7oUisk4", + "description": "Overcast: two hooded individuals in dark clothing enter patio. One looks inside building. Other approaches teal door, looks through window, makes phone call.", + "reasoning": "Suspicious individuals peering through windows. Potential burglary. Anomaly." + }, + { + "id": "smartbench_0536", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=JI2NtXQR9to", + "description": "Daytime carport: man adjusting bicycle next to kayaks and motorcycle. Moving bicycle around positioning it.", + "reasoning": "Person attempting to steal bicycle. Theft. Anomaly." + }, + { + "id": "smartbench_0009", + "category": "Pet Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=Ym9pPECNQyY", + "description": "Black squirrel reaches into metal container on lawn. Ginger cat approaches. Squirrel hesitates but resumes. Cat steps closer, squirrel flees.", + "reasoning": "Squirrel foraging near house. Cat may have escaped. Potential animal conflict. Anomaly." + }, + { + "id": "smartbench_0143", + "category": "Pet Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=deamYHz7Sno", + "description": "Two cows enter driveway through metal gate. Brown-white cow stops briefly, exits. White cow hesitates, follows. Gate swings shut.", + "reasoning": "Cows entered yard alone without owner. Possibly escaped. Anomaly." + }, + { + "id": "smartbench_0144", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=kAK6qLCbZ4k", + "description": "Woman in black jacket on porch greeted by two excited dogs. Man in blue sweater enters with green bag. Family arriving home.", + "reasoning": "Normal family returning home with dogs. Standard activity." + }, + { + "id": "smartbench_0363", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=5F2mg0p1EY8", + "description": "Indoor: light brown dog near hallway. Walks to dog bed, rolls on dark grey rug playfully. Well-lit room with sofa. Calm.", + "reasoning": "Normal pet behavior. Dog playing safely indoors. No concerns." + }, + { + "id": "smartbench_0365", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=7KiITgvN5Aw", + "description": "Indoor: brown-white boxer dog enters, walks to kiddie pool with newborn puppies. Owner instructs dog to feed puppies. Dog nurses.", + "reasoning": "Normal pet monitoring. Owner checking on dog and puppies. Safe and calm." + }, + { + "id": "smartbench_0066", + "category": "Pet Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=TKxo96PRJqc", + "description": "Dog alone in backyard digging hole near fence. Appears to be attempting to escape under fence.", + "reasoning": "Dog attempting to escape yard. Could get lost or injured. Anomaly." + }, + { + "id": "smartbench_0067", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=knIyxSHUhnQ", + "description": "Cat sleeping on couch. Stretches, yawns, repositions. Indoor camera. Normal feline behavior.", + "reasoning": "Standard cat resting behavior. No anomalies." + }, + { + "id": "smartbench_0068", + "category": "Pet Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=MamIX1zJMM4", + "description": "Dog approaches kitchen counter. Jumps up, takes food from counter. Knocks plate to floor.", + "reasoning": "Pet getting into food and breaking items. Minor property damage. Anomaly." + }, + { + "id": "smartbench_0069", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=0o6LGpxRhT8", + "description": "Two dogs playing together in living room. Chasing each other around furniture. Tails wagging.", + "reasoning": "Normal playful pet behavior. No safety concerns." + }, + { + "id": "smartbench_0070", + "category": "Pet Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=pOA51YSNlds", + "description": "Cat knocks over vase from shelf. Vase shatters on floor. Cat runs away startled.", + "reasoning": "Pet causing property damage. Broken glass hazard. Anomaly." + }, + { + "id": "smartbench_0071", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=y3O0eItyxDc", + "description": "Dog lying in designated pet bed. Eyes closed, sleeping peacefully. Occasional ear twitch.", + "reasoning": "Normal pet resting. Calm environment. No anomalies." + }, + { + "id": "smartbench_0072", + "category": "Pet Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=OE0XSgcYjcA", + "description": "Dog pacing back and forth near door. Whining audibly. Repeated scratching at door.", + "reasoning": "Dog showing signs of distress or needing to go out urgently. Behavioral anomaly." + }, + { + "id": "smartbench_0073", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=E1hMlQ7BfIY", + "description": "Cat using litter box. Normal bathroom behavior. Exits and walks to water bowl.", + "reasoning": "Standard pet care routine. Normal behavior." + }, + { + "id": "smartbench_0277", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=jFNyxIdqD1o", + "description": "Boy enters house. Loud noise, boy shouts and falls. Woman in white pants enters concerned. Man in black shirt enters laughing, was hiding to scare. Picks up boy.", + "reasoning": "Man scared child causing fall. Could cause physical harm. Vague anomaly." + }, + { + "id": "smartbench_0074", + "category": "Senior Care", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=bUbXnWYDjmA", + "description": "Elderly woman walks through living room with walker. Moves slowly but steadily. Reaches kitchen, sits down at table.", + "reasoning": "Normal independent mobility. Senior maintaining daily routine safely." + }, + { + "id": "smartbench_0075", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=ufjOxeZ-FNw", + "description": "Elderly person takes a fall in hallway. Struggles to get up. Reaches for furniture to pull self up.", + "reasoning": "Fall event for elderly person. Medical concern. Anomaly." + }, + { + "id": "smartbench_0076", + "category": "Senior Care", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=sr1VLgBYMNs", + "description": "Senior couple watching TV in living room. Both seated on couch. Normal evening activity.", + "reasoning": "Typical leisure activity. No concerns." + }, + { + "id": "smartbench_0077", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=Ep3sf3jD8E8", + "description": "Elderly person wandering around house at 3 AM. Opens front door, steps outside briefly. Returns inside confused.", + "reasoning": "Nighttime wandering. Possible disorientation. Safety concern. Anomaly." + }, + { + "id": "smartbench_0078", + "category": "Senior Care", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=b92shV0BbhM", + "description": "Senior doing light exercises in living room. Stretching movements. Following routine calmly.", + "reasoning": "Normal exercise activity. Healthy behavior. No anomalies." + }, + { + "id": "smartbench_0079", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=LINEK1p4-1U", + "description": "Elderly person hasn't moved from chair for extended period. No response to doorbell. Appears unresponsive.", + "reasoning": "Extended inactivity and unresponsiveness concerning. Medical emergency potential. Anomaly." + }, + { + "id": "smartbench_0080", + "category": "Senior Care", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=QVGxK7X2mUo", + "description": "Elderly woman in kitchen preparing food. Moves carefully around counter. Uses stove with attention.", + "reasoning": "Normal cooking activity. Senior managing daily task safely." + }, + { + "id": "smartbench_0081", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=iF3N61nr6JA", + "description": "Senior leaves stove burner on, walks away to another room. Pot begins to smoke.", + "reasoning": "Forgotten stove with smoking pot. Fire hazard. Anomaly." + }, + { + "id": "smartbench_0082", + "category": "Senior Care", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=_zLhytipIQU", + "description": "Elderly man reading book in armchair. Occasionally sips from cup. Peaceful afternoon scene.", + "reasoning": "Normal leisure activity. Relaxed and safe environment." + }, + { + "id": "smartbench_0083", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=bBgevdp5DKM", + "description": "Senior trips on rug edge in hallway. Catches self on wall but appears shaken. Limps slightly continuing.", + "reasoning": "Trip hazard incident. Potential injury. Mobility concern. Anomaly." + }, + { + "id": "smartbench_0084", + "category": "Senior Care", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=TNDtycEzUYo", + "description": "Caregiver visits elderly person at home. Helps with medications. Cheerful interaction.", + "reasoning": "Normal caregiving visit. Standard assisted living activity." + }, + { + "id": "smartbench_0085", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=XXrIIU-Org0", + "description": "Senior drops medication bottles, unable to pick them up. Sits looking frustrated for extended period.", + "reasoning": "Mobility limitation preventing medication access. Care concern. Anomaly." + }, + { + "id": "smartbench_0086", + "category": "Package Delivery", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=Xlp3FZyYuK8", + "description": "Delivery driver in uniform approaches front door. Places package carefully on porch. Rings doorbell, takes photo. Walks back to van.", + "reasoning": "Standard package delivery. Professional conduct. Normal." + }, + { + "id": "smartbench_0087", + "category": "Package Delivery", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=GqWCH3AcRII", + "description": "Person in casual clothes approaches porch. Looks around nervously. Picks up package from doorstep. Quickly walks away.", + "reasoning": "Package theft. Person took package that wasn't theirs. Anomaly." + }, + { + "id": "smartbench_0088", + "category": "Package Delivery", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=jX7wSFVuStI", + "description": "UPS driver delivers large box. Homeowner opens door, signs for package. Brief conversation. Driver leaves.", + "reasoning": "Normal signed delivery. Expected interaction. No anomalies." + }, + { + "id": "smartbench_0089", + "category": "Package Delivery", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=YhSSxNNkeL0", + "description": "Delivery driver throws package from several feet away at porch. Package bounces on ground. Driver walks away quickly.", + "reasoning": "Rough package handling. Potential damage to contents. Anomaly." + }, + { + "id": "smartbench_0090", + "category": "Package Delivery", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=Z__Wg1fMXMA", + "description": "Amazon driver walks to door. Carefully places package beside mat. Takes delivery photo. Professional service.", + "reasoning": "Standard careful delivery. Normal activity." + }, + { + "id": "smartbench_0091", + "category": "Package Delivery", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=jFNyxIdqD1o", + "description": "Two people drive up to house. Passenger gets out, takes all three packages from porch. Gets back in car. Drive away.", + "reasoning": "Multiple packages stolen by organized thieves. Porch piracy. Anomaly." + }, + { + "id": "smartbench_0092", + "category": "Package Delivery", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=z-TMWr0axSo", + "description": "FedEx driver leaves package at door. Homeowner retrieves within minutes. Normal day.", + "reasoning": "Routine delivery and retrieval. No issues." + }, + { + "id": "smartbench_0093", + "category": "Package Delivery", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=_yp-l9ywvPU", + "description": "Package left on porch during rainstorm. Gets soaked. No protective covering provided.", + "reasoning": "Package exposed to weather damage. Delivery issue. Anomaly." + }, + { + "id": "smartbench_0094", + "category": "Package Delivery", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=3XBBQlR21rY", + "description": "Neighbor picks up misdelivered package from porch. Walks it over to correct address next door.", + "reasoning": "Helpful neighbor redirecting package. Normal community behavior." + }, + { + "id": "smartbench_0095", + "category": "Package Delivery", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=c4BiMNV5dME", + "description": "Delivery left at wrong address. Package sits on porch for days. Gets stepped on by visitors.", + "reasoning": "Misdelivered package accumulating damage. Delivery failure. Anomaly." + }, + { + "id": "smartbench_0096", + "category": "Package Delivery", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=dZU47xjMPfc", + "description": "Mail carrier delivers envelope and small package. Places in mailbox area. Routine postal delivery.", + "reasoning": "Standard mail delivery. Nothing unusual." + }, + { + "id": "smartbench_0097", + "category": "Package Delivery", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=1OsGAAGe6CA", + "description": "Person follows delivery truck. After driver leaves, approaches porch and takes the just-delivered package.", + "reasoning": "Trailing delivery vehicle to steal packages. Organized theft. Anomaly." + }, + { + "id": "smartbench_0098", + "category": "Package Delivery", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=L7s4QL9fozo", + "description": "Homeowner places outgoing package on porch for scheduled pickup. USPS driver collects it.", + "reasoning": "Scheduled package pickup. Normal postal service interaction." + }, + { + "id": "smartbench_0099", + "category": "Package Delivery", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=UuDW7Zvm5Gg", + "description": "Delivery driver leaves package hanging out of mailbox precariously. Visible to street. Easy target.", + "reasoning": "Insecure package placement visible from street. Theft risk. Anomaly." + }, + { + "id": "smartbench_0100", + "category": "Home Security", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=P7UGtwg7C4I", + "description": "Homeowner on porch checking mail. Waves at passing neighbor. Normal daytime activity.", + "reasoning": "Standard residential activity. No security concerns." + }, + { + "id": "smartbench_0101", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=Mh1EgnB1DmQ", + "description": "Night: person approaches back door. Tries handle. Uses tool to pry at lock. Camera floodlight activates. Person runs.", + "reasoning": "Break-in attempt at rear entrance. Criminal activity. Anomaly." + }, + { + "id": "smartbench_0102", + "category": "Home Security", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=IUW21JpYPPE", + "description": "Landscaper arrives with mower. Works on front lawn. Professional uniform. Expected service visit.", + "reasoning": "Scheduled lawn maintenance. Normal expected activity." + }, + { + "id": "smartbench_0103", + "category": "Home Security", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=Q_0WEIZVc28", + "description": "Person walks up to multiple cars on street. Tries each door handle. One opens. Reaches inside then continues walking.", + "reasoning": "Vehicle break-ins on street. Checking multiple cars. Theft. Anomaly." + }, + { + "id": "smartbench_0104", + "category": "Home Security", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=7IwZMmTWoKM", + "description": "Pizza delivery person at front door. Homeowner answers, takes pizza, pays. Brief friendly exchange.", + "reasoning": "Expected food delivery. Normal transaction." + }, + { + "id": "smartbench_0010", + "category": "Baby Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=QNbsdpgb2YI", + "description": "Toddler playing with blocks in nursery. Parent visible in doorway watching. Safe supervised play.", + "reasoning": "Normal supervised indoor play. Safe environment. No concerns." + }, + { + "id": "smartbench_0011", + "category": "Baby Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=sZ9ij4A0REg", + "description": "Baby in crib pulling at mobile attachment. Mobile comes loose and falls near baby.", + "reasoning": "Nursery equipment failure near infant. Safety hazard. Anomaly." + }, + { + "id": "smartbench_0012", + "category": "Baby Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=gI4oK4azztc", + "description": "Baby sleeping peacefully in crib. Night vision camera. Occasional movement during sleep. Normal patterns.", + "reasoning": "Normal infant sleep patterns. No concerns." + }, + { + "id": "smartbench_0013", + "category": "Baby Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=deamYHz7Sno", + "description": "Toddler opens baby gate and walks toward stairs alone. Gate latch appears broken or improperly secured.", + "reasoning": "Failed safety gate allowing unsupervised stair access. Serious safety hazard. Anomaly." + }, + { + "id": "smartbench_0014", + "category": "Baby Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=kAK6qLCbZ4k", + "description": "Mother and child reading book on couch. Calm indoor scene. Child points at pictures.", + "reasoning": "Normal bonding activity. Safe supervised environment." + }, + { + "id": "smartbench_0015", + "category": "Baby Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=bUbXnWYDjmA", + "description": "Child opens front door and walks outside alone. No adult follows. Door left open.", + "reasoning": "Unsupervised child leaving house alone. Serious safety concern. Anomaly." + }, + { + "id": "smartbench_0105", + "category": "Senior Care", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=LLxrNx-Wkq4", + "description": "Elderly couple taking morning walk around neighborhood. Slow steady pace. Return home safely.", + "reasoning": "Normal exercise routine. Healthy activity for seniors." + }, + { + "id": "smartbench_0106", + "category": "Senior Care", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=ufjOxeZ-FNw", + "description": "Senior found on floor near bathroom. Appears to have slipped. Calling out for help.", + "reasoning": "Fall in bathroom. Medical emergency. Requires immediate attention. Anomaly." + }, + { + "id": "smartbench_0107", + "category": "Wildlife", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=sr1VLgBYMNs", + "description": "Birds feeding at bird feeder in backyard. Squirrel approaches but can't reach. Normal wildlife interaction.", + "reasoning": "Expected backyard wildlife at feeder. Normal and welcome activity." + }, + { + "id": "smartbench_0108", + "category": "Wildlife", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=Ep3sf3jD8E8", + "description": "Rabbit on front lawn in early morning. Nibbles grass. Hops away when car passes. Daytime.", + "reasoning": "Common suburban wildlife. Harmless. Normal." + }, + { + "id": "smartbench_0109", + "category": "Wildlife", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=b92shV0BbhM", + "description": "Hummingbird visits porch feeder. Hovers, drinks nectar, flies away. Beautiful nature scene.", + "reasoning": "Typical backyard bird activity. No concerns." + }, + { + "id": "smartbench_0110", + "category": "Wildlife", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=LINEK1p4-1U", + "description": "Night: large cat (possibly bobcat) stalks across backyard. Moves stealthily. Disappears into bushes near pet door.", + "reasoning": "Predatory wild cat near pet door. Risk to household pets. Anomaly." + }, + { + "id": "smartbench_0111", + "category": "Pet Monitoring", + "anomaly_tag": 0, + "youtube_url": "https://www.youtube.com/watch?v=QVGxK7X2mUo", + "description": "Dog eating from food bowl in kitchen. Normal mealtime behavior. Wags tail.", + "reasoning": "Standard pet feeding time. Normal behavior." + }, + { + "id": "smartbench_0112", + "category": "Pet Monitoring", + "anomaly_tag": 1, + "youtube_url": "https://www.youtube.com/watch?v=iF3N61nr6JA", + "description": "Dog chewing on electrical cord plugged into wall. Cord partially damaged.", + "reasoning": "Pet chewing on electrical cord. Electrocution and fire hazard. Anomaly." + } +] \ No newline at end of file diff --git a/skills/analysis/smarthome-bench/package-lock.json b/skills/analysis/smarthome-bench/package-lock.json new file mode 100644 index 0000000..b1d8328 --- /dev/null +++ b/skills/analysis/smarthome-bench/package-lock.json @@ -0,0 +1,37 @@ +{ + "name": "smarthome-bench", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "smarthome-bench", + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "openai": "^6.27.0" + } + }, + "node_modules/openai": { + "version": "6.27.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-6.27.0.tgz", + "integrity": "sha512-osTKySlrdYrLYTt0zjhY8yp0JUBmWDCN+Q+QxsV4xMQnnoVFpylgKGgxwN8sSdTNw0G4y+WUXs4eCMWpyDNWZQ==", + "license": "Apache-2.0", + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + } + } +} diff --git a/skills/analysis/smarthome-bench/package.json b/skills/analysis/smarthome-bench/package.json new file mode 100644 index 0000000..0f2cdeb --- /dev/null +++ b/skills/analysis/smarthome-bench/package.json @@ -0,0 +1,22 @@ +{ + "name": "smarthome-bench", + "version": "1.0.0", + "description": "SmartHome-Bench video anomaly detection benchmark for VLM evaluation", + "main": "scripts/run-benchmark.cjs", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [ + "benchmark", + "vlm", + "video", + "anomaly-detection", + "smart-home" + ], + "author": "", + "license": "ISC", + "type": "commonjs", + "dependencies": { + "openai": "^6.27.0" + } +} \ No newline at end of file diff --git a/skills/analysis/smarthome-bench/scripts/generate-report.cjs b/skills/analysis/smarthome-bench/scripts/generate-report.cjs new file mode 100644 index 0000000..6eec6bd --- /dev/null +++ b/skills/analysis/smarthome-bench/scripts/generate-report.cjs @@ -0,0 +1,307 @@ +#!/usr/bin/env node +/** + * HTML Report Generator for SmartHome-Bench Video Anomaly Detection Benchmark + * + * Reads JSON result files from the results directory and generates + * a self-contained HTML report with: + * - Per-category accuracy breakdown + * - Confusion matrix (TP/FP/TN/FN) + * - Overall metrics (accuracy, precision, recall, F1) + * - Historical model comparison table + * + * Usage: + * node generate-report.cjs [results-dir] + * Default: ~/.aegis-ai/smarthome-bench + */ + +const fs = require('fs'); +const path = require('path'); +const os = require('os'); + +const RESULTS_DIR = process.argv[2] || path.join(os.homedir(), '.aegis-ai', 'smarthome-bench'); + +function generateReport(resultsDir = RESULTS_DIR) { + // Find all result files + const files = fs.readdirSync(resultsDir) + .filter(f => f.endsWith('.json') && !f.startsWith('index')) + .sort() + .reverse(); // Most recent first + + if (files.length === 0) { + console.error('No result files found in', resultsDir); + return null; + } + + // Load latest result + const latestFile = path.join(resultsDir, files[0]); + const latest = JSON.parse(fs.readFileSync(latestFile, 'utf8')); + + // Load all results for comparison + const allResults = files.slice(0, 20).map(f => { + try { + return JSON.parse(fs.readFileSync(path.join(resultsDir, f), 'utf8')); + } catch { + return null; + } + }).filter(Boolean); + + // Generate HTML + const html = buildHTML(latest, allResults); + const reportPath = path.join(resultsDir, 'report.html'); + fs.writeFileSync(reportPath, html); + console.error(`Report generated: ${reportPath}`); + return reportPath; +} + +function buildHTML(latest, allResults) { + const model = latest.model?.vlm || 'Unknown'; + const timestamp = new Date(latest.timestamp).toLocaleString(); + const totalTests = latest.totals?.total || 0; + const passed = latest.totals?.passed || 0; + const failed = latest.totals?.failed || 0; + const skipped = latest.totals?.skipped || 0; + const timeMs = latest.totals?.timeMs || 0; + const metrics = latest.metrics || {}; + const overall = metrics.overall || {}; + const perCategory = metrics.perCategory || {}; + + // Build category rows + const categoryRows = Object.entries(perCategory).map(([cat, m]) => { + const accPct = (m.accuracy * 100).toFixed(1); + const precPct = (m.precision * 100).toFixed(1); + const recPct = (m.recall * 100).toFixed(1); + const f1Pct = (m.f1 * 100).toFixed(1); + const accClass = m.accuracy >= 0.8 ? 'high' : m.accuracy >= 0.5 ? 'mid' : 'low'; + return ` + ${escHtml(cat)} + ${accPct}% + ${precPct}% + ${recPct}% + ${f1Pct}% + ${m.tp} + ${m.fp} + ${m.tn} + ${m.fn} + ${m.total} + `; + }).join('\n'); + + // Build suite detail rows + const suiteDetailRows = (latest.suites || []).map(s => { + const testRows = s.tests.map(t => { + const statusIcon = t.status === 'pass' ? 'βœ…' : t.status === 'fail' ? '❌' : '⏭️'; + const statusClass = t.status; + return ` + ${statusIcon} + ${escHtml(t.name)} + ${t.status} + ${t.timeMs}ms + ${escHtml((t.detail || '').slice(0, 100))} + `; + }).join('\n'); + + return `
+

${escHtml(s.name)}

+
+ βœ… ${s.passed} passed Β· ❌ ${s.failed} failed Β· ⏭️ ${s.skipped} skipped Β· ⏱ ${(s.timeMs / 1000).toFixed(1)}s +
+ + + ${testRows} +
TestStatusTimeDetail
+
`; + }).join('\n'); + + // Build comparison table + const comparisonRows = allResults.map(r => { + const rModel = r.model?.vlm || 'Unknown'; + const rTime = new Date(r.timestamp).toLocaleDateString(); + const rMetrics = r.metrics?.overall || {}; + const rAcc = ((rMetrics.accuracy || 0) * 100).toFixed(1); + const rF1 = ((rMetrics.f1 || 0) * 100).toFixed(1); + const rPassed = r.totals?.passed || 0; + const rTotal = r.totals?.total || 0; + const rTimeMs = r.totals?.timeMs || 0; + return ` + ${escHtml(rModel)} + ${rTime} + ${rPassed}/${rTotal} + ${rAcc}% + ${rF1}% + ${(rTimeMs / 1000).toFixed(0)}s + `; + }).join('\n'); + + const overallAccPct = ((overall.accuracy || 0) * 100).toFixed(1); + const overallPrecPct = ((overall.precision || 0) * 100).toFixed(1); + const overallRecPct = ((overall.recall || 0) * 100).toFixed(1); + const overallF1Pct = ((overall.f1 || 0) * 100).toFixed(1); + + return ` + + + + +SmartHome-Bench Report β€” ${escHtml(model)} + + + +
+ +
+

🏠 SmartHome-Bench Report

+
+ Video Anomaly Detection Benchmark Β· ${escHtml(model)} Β· ${timestamp} +
+
+ +
+
${overallAccPct}%
Accuracy
+
${overallF1Pct}%
F1 Score
+
${overallPrecPct}%
Precision
+
${overallRecPct}%
Recall
+
${passed}/${totalTests}
Passed
+
${(timeMs / 1000).toFixed(0)}s
Total Time
+
+ +
+ πŸ–₯ ${escHtml(latest.system?.cpus || 'Unknown')} + πŸ’Ύ ${latest.system?.totalRAM_GB || '?'} GB RAM + πŸ”§ Node ${escHtml(latest.system?.node || '?')} +
+ +

πŸ“Š Overall Confusion Matrix

+
+
+
Predicted Normal
+
Predicted Abnormal
+
Actual Normal
+
TN: ${overall.tn || 0}
+
FP: ${overall.fp || 0}
+
Actual Abnormal
+
FN: ${overall.fn || 0}
+
TP: ${overall.tp || 0}
+
+ +

πŸ“‹ Per-Category Breakdown

+ + + + + + + + + + + + + + + + + ${categoryRows} + + + + + + + + + + + + + +
CategoryAccuracyPrecisionRecallF1TPFPTNFNTotal
Overall${overallAccPct}%${overallPrecPct}%${overallRecPct}%${overallF1Pct}%${overall.tp || 0}${overall.fp || 0}${overall.tn || 0}${overall.fn || 0}${totalTests}
+ +

πŸ§ͺ Test Details

+${suiteDetailRows} + +${allResults.length > 1 ? ` +

πŸ“ˆ Model Comparison

+ + + + + ${comparisonRows} +
ModelDatePassedAccuracyF1Time
+` : ''} + + + +
+ +`; +} + +function escHtml(str) { + return String(str || '').replace(/&/g, '&').replace(//g, '>').replace(/"/g, '"'); +} + +// Run if called directly +if (require.main === module) { + generateReport(); +} + +module.exports = { generateReport }; diff --git a/skills/analysis/smarthome-bench/scripts/run-benchmark.cjs b/skills/analysis/smarthome-bench/scripts/run-benchmark.cjs new file mode 100644 index 0000000..8db00ea --- /dev/null +++ b/skills/analysis/smarthome-bench/scripts/run-benchmark.cjs @@ -0,0 +1,825 @@ +#!/usr/bin/env node +/** + * SmartHome-Bench β€” Video Anomaly Detection Benchmark + * + * Evaluates VLM models on video anomaly detection across 7 smart home categories: + * - Wildlife, Senior Care, Baby Monitoring, Pet Monitoring, + * Home Security, Package Delivery, General Activity + * + * Based on SmartHome-Bench (https://github.com/Xinyi-0724/SmartHome-Bench-LLM) + * + * ## Skill Protocol (when spawned by Aegis) + * + * Aegis β†’ Skill (env vars): + * AEGIS_VLM_URL β€” VLM server URL (e.g. http://localhost:5405) + * AEGIS_SKILL_PARAMS β€” JSON params from skill config + * AEGIS_SKILL_ID β€” Skill ID + * + * Skill β†’ Aegis (stdout, JSON lines): + * {"event": "ready", "model": "SmolVLM2-2.2B"} + * {"event": "suite_start", "suite": "Wildlife"} + * {"event": "test_result", "suite": "...", "test": "...", "status": "pass", "timeMs": 1234} + * {"event": "suite_end", "suite": "...", "passed": 12, "failed": 3} + * {"event": "complete", "passed": 78, "total": 105, "timeMs": 480000} + * + * Standalone usage: + * node run-benchmark.cjs [options] + * --vlm URL VLM server (required) + * --max-videos N Max videos to evaluate (default: 50) + * --mode MODE subset or full (default: subset) + * --categories L Comma-separated category filter + * --skip-download Use cached videos only + * --out DIR Results directory + * --no-open Don't auto-open report + */ + +const fs = require('fs'); +const path = require('path'); +const os = require('os'); +const { execSync, spawnSync } = require('child_process'); + +// ─── Config: Aegis env vars β†’ CLI args β†’ defaults ──────────────────────────── + +const args = process.argv.slice(2); +function getArg(name, defaultVal) { + const idx = args.indexOf(`--${name}`); + if (idx === -1) return defaultVal; + return args[idx + 1] || defaultVal; +} + +// ─── Help ───────────────────────────────────────────────────────────────────── +if (args.includes('--help') || args.includes('-h')) { + console.log(` +SmartHome-Bench β€” Video Anomaly Detection Benchmark β€’ DeepCamera / SharpAI + +Usage: node scripts/run-benchmark.cjs [options] + +Options: + --vlm URL VLM server base URL (required) + --max-videos N Max videos to evaluate (default: 50) + --mode MODE subset or full (default: subset) + --categories L Comma-separated filter (default: all) + --skip-download Use cached videos only + --out DIR Results output directory (default: ~/.aegis-ai/smarthome-bench) + --no-open Don't auto-open report in browser + --report Force report generation + -h, --help Show this help message + +Environment Variables (set by Aegis): + AEGIS_VLM_URL VLM server base URL + AEGIS_SKILL_ID Skill identifier (enables skill mode) + AEGIS_SKILL_PARAMS JSON params from skill config + +Categories: Wildlife, Senior Care, Baby Monitoring, Pet Monitoring, + Home Security, Package Delivery, General Activity + `.trim()); + process.exit(0); +} + +// Parse skill parameters if running as Aegis skill +let skillParams = {}; +try { skillParams = JSON.parse(process.env.AEGIS_SKILL_PARAMS || '{}'); } catch { } + +const VLM_URL = process.env.AEGIS_VLM_URL || getArg('vlm', ''); +const VLM_MODEL = process.env.AEGIS_VLM_MODEL || ''; +const RESULTS_DIR = getArg('out', path.join(os.homedir(), '.aegis-ai', 'smarthome-bench')); +const VIDEO_CACHE_DIR = path.join(os.homedir(), '.aegis-ai', 'smarthome-bench', 'videos'); +const FRAMES_DIR = path.join(os.homedir(), '.aegis-ai', 'smarthome-bench', 'frames'); +const IS_SKILL_MODE = !!process.env.AEGIS_SKILL_ID; +const NO_OPEN = args.includes('--no-open') || skillParams.noOpen || false; +const SKIP_DOWNLOAD = args.includes('--skip-download'); +const FIXTURES_DIR = path.join(__dirname, '..', 'fixtures'); +const IDLE_TIMEOUT_MS = 60000; // VLM inference can be slow for multi-image + +// Mode & limits +const TEST_MODE = skillParams.mode || getArg('mode', 'subset'); +const MAX_VIDEOS = parseInt(skillParams.maxVideos || getArg('max-videos', '50'), 10) || 50; +const CATEGORIES_FILTER = (skillParams.categories || getArg('categories', 'all') || 'all').toLowerCase(); +const FRAMES_PER_VIDEO = 6; + +// ─── OpenAI SDK Client ────────────────────────────────────────────────────── +const OpenAI = require('openai'); + +const strip = (u) => u.replace(/\/v1\/?$/, ''); +const vlmClient = VLM_URL ? new OpenAI({ + apiKey: 'not-needed', + baseURL: `${strip(VLM_URL)}/v1`, +}) : null; + +// ─── Skill Protocol: JSON lines on stdout, human text on stderr ────────────── + +function emit(event) { + process.stdout.write(JSON.stringify(event) + '\n'); +} + +function log(msg) { + process.stderr.write(msg + '\n'); +} + +// ─── Test Framework ─────────────────────────────────────────────────────────── + +const suites = []; +let currentSuite = null; + +function suite(name, fn) { + suites.push({ name, fn, tests: [] }); +} + +const results = { + timestamp: new Date().toISOString(), + vlm: VLM_URL || null, + system: {}, + model: {}, + suites: [], + totals: { passed: 0, failed: 0, skipped: 0, total: 0, timeMs: 0 }, + tokenTotals: { prompt: 0, completion: 0, total: 0 }, + metrics: {}, +}; + +async function vlmCall(messages, opts = {}) { + if (!vlmClient) { + throw new Error('VLM client not configured β€” pass --vlm URL'); + } + + const model = opts.model || VLM_MODEL || undefined; + + const params = { + messages, + stream: true, + ...(model && { model }), + ...(opts.temperature !== undefined && { temperature: opts.temperature }), + max_completion_tokens: opts.maxTokens || 512, + }; + + const controller = new AbortController(); + const idleMs = opts.timeout || IDLE_TIMEOUT_MS; + let idleTimer = setTimeout(() => controller.abort(), idleMs); + const resetIdle = () => { clearTimeout(idleTimer); idleTimer = setTimeout(() => controller.abort(), idleMs); }; + + try { + const stream = await vlmClient.chat.completions.create(params, { + signal: controller.signal, + }); + + let content = ''; + let reasoningContent = ''; + let model = ''; + let usage = {}; + let tokenCount = 0; + + for await (const chunk of stream) { + resetIdle(); + if (chunk.model) model = chunk.model; + const delta = chunk.choices?.[0]?.delta; + if (delta?.content) content += delta.content; + if (delta?.reasoning_content) reasoningContent += delta.reasoning_content; + if (delta?.content || delta?.reasoning_content) { + tokenCount++; + if (tokenCount % 100 === 0) { + log(` … ${tokenCount} tokens received`); + } + } + if (chunk.usage) usage = chunk.usage; + } + + if (!content && reasoningContent) { + content = reasoningContent; + } + + results.tokenTotals.prompt += usage.prompt_tokens || 0; + results.tokenTotals.completion += usage.completion_tokens || 0; + results.tokenTotals.total += usage.total_tokens || 0; + + if (!results.model.vlm && model) results.model.vlm = model; + + return { content, usage, model }; + } finally { + clearTimeout(idleTimer); + } +} + +function stripThink(text) { + return text.replace(/[\s\S]*?<\/think>\s*/gi, '').trim(); +} + +function parseJSON(text) { + const cleaned = stripThink(text); + let jsonStr = cleaned; + const codeBlock = cleaned.match(/```(?:json)?\s*([\s\S]*?)\s*```/); + if (codeBlock) jsonStr = codeBlock[1]; + else { + const idx = cleaned.search(/[{[]/); + if (idx > 0) jsonStr = cleaned.slice(idx); + } + return JSON.parse(jsonStr.trim()); +} + +function assert(condition, msg) { + if (!condition) throw new Error(msg || 'Assertion failed'); +} + +async function runSuites() { + for (const s of suites) { + currentSuite = { name: s.name, tests: [], passed: 0, failed: 0, skipped: 0, timeMs: 0 }; + log(`\n${'─'.repeat(60)}`); + log(` ${s.name}`); + log(`${'─'.repeat(60)}`); + emit({ event: 'suite_start', suite: s.name }); + + await s.fn(); + + results.suites.push(currentSuite); + results.totals.passed += currentSuite.passed; + results.totals.failed += currentSuite.failed; + results.totals.skipped += currentSuite.skipped; + results.totals.total += currentSuite.tests.length; + + emit({ event: 'suite_end', suite: s.name, passed: currentSuite.passed, failed: currentSuite.failed, skipped: currentSuite.skipped, timeMs: currentSuite.timeMs }); + } +} + +async function test(name, fn) { + const testResult = { name, status: 'pass', timeMs: 0, detail: '', tokens: {} }; + const start = Date.now(); + try { + const detail = await fn(); + testResult.timeMs = Date.now() - start; + testResult.detail = detail || ''; + currentSuite.passed++; + log(` βœ… ${name} (${testResult.timeMs}ms)${detail ? ` β€” ${detail}` : ''}`); + } catch (err) { + testResult.timeMs = Date.now() - start; + testResult.status = 'fail'; + testResult.detail = err.message; + currentSuite.failed++; + log(` ❌ ${name} (${testResult.timeMs}ms) β€” ${err.message}`); + } + currentSuite.timeMs += testResult.timeMs; + currentSuite.tests.push(testResult); + emit({ event: 'test_result', suite: currentSuite.name, test: name, status: testResult.status, timeMs: testResult.timeMs, detail: testResult.detail.slice(0, 120) }); +} + +function skip(name, reason) { + currentSuite.skipped++; + currentSuite.tests.push({ name, status: 'skip', timeMs: 0, detail: reason }); + log(` ⏭️ ${name} β€” ${reason}`); + emit({ event: 'test_result', suite: currentSuite.name, test: name, status: 'skip', timeMs: 0, detail: reason }); +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// DISK SPACE CHECK +// ═══════════════════════════════════════════════════════════════════════════════ + +function checkDiskSpace(targetDir, requiredGB) { + try { + fs.mkdirSync(targetDir, { recursive: true }); + // Use df to check available space on the partition + const dfOutput = execSync(`df -k "${targetDir}"`, { encoding: 'utf8' }); + const lines = dfOutput.trim().split('\n'); + if (lines.length >= 2) { + const parts = lines[1].split(/\s+/); + const availableKB = parseInt(parts[3], 10); + if (!isNaN(availableKB)) { + const availableGB = availableKB / (1024 * 1024); + if (availableGB < requiredGB) { + log(` ❌ Insufficient disk space`); + log(` Required: ${requiredGB.toFixed(1)} GB`); + log(` Available: ${availableGB.toFixed(1)} GB`); + log(` Location: ${targetDir}`); + emit({ event: 'error', message: `Insufficient disk space: need ${requiredGB}GB, have ${availableGB.toFixed(1)}GB` }); + process.exit(1); + } + log(` πŸ’Ύ Disk: ${availableGB.toFixed(1)} GB available (need ${requiredGB} GB) βœ“`); + return availableGB; + } + } + } catch (err) { + log(` ⚠️ Could not check disk space: ${err.message} β€” proceeding anyway`); + } + return -1; // unknown +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// VIDEO ACQUISITION +// ═══════════════════════════════════════════════════════════════════════════════ + +function checkSystemDeps() { + const deps = {}; + try { + const ytVer = execSync('yt-dlp --version', { encoding: 'utf8' }).trim(); + deps.ytdlp = ytVer; + } catch { + deps.ytdlp = null; + } + try { + const ffVer = execSync('ffmpeg -version', { encoding: 'utf8' }).split('\n')[0]; + deps.ffmpeg = ffVer; + } catch { + deps.ffmpeg = null; + } + return deps; +} + +function downloadVideo(annotation) { + const videoFile = path.join(VIDEO_CACHE_DIR, `${annotation.id}.mp4`); + + // Already cached + if (fs.existsSync(videoFile)) { + return videoFile; + } + + log(` πŸ“₯ Downloading ${annotation.id}...`); + try { + const result = spawnSync('yt-dlp', [ + '-f', 'best[height<=720][ext=mp4]/best[height<=720]/best', + '--no-playlist', + '--socket-timeout', '30', + '--retries', '3', + '-o', videoFile, + annotation.youtube_url, + ], { + encoding: 'utf8', + timeout: 120000, // 2 minute timeout per video + stdio: ['pipe', 'pipe', 'pipe'], + }); + + if (result.status !== 0) { + throw new Error(result.stderr?.slice(-200) || 'yt-dlp failed'); + } + + if (!fs.existsSync(videoFile)) { + // yt-dlp may append extension β€” find the file + const files = fs.readdirSync(VIDEO_CACHE_DIR).filter(f => f.startsWith(annotation.id)); + if (files.length > 0) { + const actual = path.join(VIDEO_CACHE_DIR, files[0]); + if (actual !== videoFile) fs.renameSync(actual, videoFile); + } else { + throw new Error('Download completed but file not found'); + } + } + + return videoFile; + } catch (err) { + log(` ⚠️ Download failed for ${annotation.id}: ${err.message}`); + return null; + } +} + +function extractFrames(videoFile, videoId) { + const frameDir = path.join(FRAMES_DIR, videoId); + + // Check cache + if (fs.existsSync(frameDir)) { + const existing = fs.readdirSync(frameDir).filter(f => f.endsWith('.jpg')); + if (existing.length >= FRAMES_PER_VIDEO) { + return existing.sort().map(f => path.join(frameDir, f)); + } + } + + fs.mkdirSync(frameDir, { recursive: true }); + + try { + // Get video duration + const probeResult = spawnSync('ffmpeg', [ + '-i', videoFile, + '-f', 'null', '-', + ], { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 30000 }); + + // Parse duration from ffmpeg stderr + const durationMatch = (probeResult.stderr || '').match(/Duration:\s*(\d{2}):(\d{2}):(\d{2})\.(\d+)/); + let durationSec = 30; // default if parse fails + if (durationMatch) { + durationSec = parseInt(durationMatch[1]) * 3600 + + parseInt(durationMatch[2]) * 60 + + parseInt(durationMatch[3]) + + parseInt(durationMatch[4]) / 100; + } + + // Extract evenly-spaced frames + const interval = Math.max(0.5, durationSec / (FRAMES_PER_VIDEO + 1)); + const framePaths = []; + + for (let i = 1; i <= FRAMES_PER_VIDEO; i++) { + const timestamp = (interval * i).toFixed(2); + const framePath = path.join(frameDir, `frame_${String(i).padStart(2, '0')}.jpg`); + + const extractResult = spawnSync('ffmpeg', [ + '-ss', timestamp, + '-i', videoFile, + '-frames:v', '1', + '-q:v', '2', + '-y', + framePath, + ], { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 15000 }); + + if (extractResult.status === 0 && fs.existsSync(framePath)) { + framePaths.push(framePath); + } + } + + if (framePaths.length === 0) { + throw new Error('No frames extracted'); + } + + return framePaths; + } catch (err) { + log(` ⚠️ Frame extraction failed for ${videoId}: ${err.message}`); + return []; + } +} + +function framesToBase64(framePaths) { + return framePaths.map(fp => { + const data = fs.readFileSync(fp); + return data.toString('base64'); + }); +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// VLM EVALUATION +// ═══════════════════════════════════════════════════════════════════════════════ + +const ANOMALY_DETECTION_PROMPT = `You are a smart home security camera AI analyst. You are evaluating video footage from a home camera. + +You will be shown multiple frames sampled from a video clip captured by a smart home security camera. + +## Task +Analyze the frames and determine whether the footage shows a NORMAL event or an ABNORMAL (anomalous) event. + +## Anomaly Categories +- **Wildlife**: Unusual wild animal presence (bears, coyotes, snakes β€” NOT birds at feeder) +- **Senior Care**: Falls, wandering, inactivity, forgotten appliances +- **Baby Monitoring**: Unsupervised children near hazards, equipment failures +- **Pet Monitoring**: Escaped pets, property damage by pets, pet distress +- **Home Security**: Break-ins, theft attempts, trespassing, suspicious behavior +- **Package Delivery**: Package theft, misdelivery, rough handling +- **General Activity**: Unusual time activity, equipment malfunction, property damage + +## Response Format +Respond with ONLY valid JSON (no markdown, no explanation): +{"result": 0, "reasoning": "brief 1-sentence explanation"} + +Where: +- result: 0 = NORMAL (no anomaly), 1 = ABNORMAL (anomaly detected) +- reasoning: Brief explanation of your determination`; + +async function evaluateClip(annotation, framePaths) { + const base64Frames = framesToBase64(framePaths); + + // Build multi-image message content + const imageContent = base64Frames.map((b64, i) => ({ + type: 'image_url', + image_url: { + url: `data:image/jpeg;base64,${b64}`, + detail: 'low', + }, + })); + + const messages = [ + { role: 'system', content: ANOMALY_DETECTION_PROMPT }, + { + role: 'user', + content: [ + { type: 'text', text: `Analyze these ${base64Frames.length} frames from a smart home camera video. Is there an anomaly?` }, + ...imageContent, + ], + }, + ]; + + const response = await vlmCall(messages, { + temperature: 0.1, + maxTokens: 256, + }); + + return response; +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// SUITE BUILDER +// ═══════════════════════════════════════════════════════════════════════════════ + +function loadAnnotations() { + const raw = JSON.parse(fs.readFileSync(path.join(FIXTURES_DIR, 'annotations.json'), 'utf8')); + + // Apply category filter + let filtered = raw; + if (CATEGORIES_FILTER !== 'all') { + const allowed = CATEGORIES_FILTER.split(',').map(c => c.trim().toLowerCase()); + filtered = raw.filter(a => allowed.some(c => + a.category.toLowerCase().includes(c) || c.includes(a.category.toLowerCase()) + )); + } + + // Group by category + const byCategory = {}; + for (const a of filtered) { + const cat = a.category; + if (!byCategory[cat]) byCategory[cat] = []; + byCategory[cat].push(a); + } + + // Apply max videos limit (distribute evenly across categories) + if (TEST_MODE === 'subset' || MAX_VIDEOS < filtered.length) { + const categories = Object.keys(byCategory); + const perCategory = Math.max(2, Math.floor(MAX_VIDEOS / categories.length)); + for (const cat of categories) { + if (byCategory[cat].length > perCategory) { + // Keep balanced normal/abnormal + const normal = byCategory[cat].filter(a => a.anomaly_tag === 0); + const abnormal = byCategory[cat].filter(a => a.anomaly_tag === 1); + const halfPer = Math.ceil(perCategory / 2); + byCategory[cat] = [ + ...normal.slice(0, halfPer), + ...abnormal.slice(0, halfPer), + ].slice(0, perCategory); + } + } + } + + return byCategory; +} + +const CATEGORY_EMOJIS = { + 'Wildlife': '🦊', + 'Senior Care': 'πŸ‘΄', + 'Baby Monitoring': 'πŸ‘Ά', + 'Pet Monitoring': '🐾', + 'Home Security': 'πŸ”’', + 'Package Delivery': 'πŸ“¦', + 'General Activity': '🏠', +}; + +function buildSuites(annotationsByCategory) { + for (const [category, annotations] of Object.entries(annotationsByCategory)) { + const emoji = CATEGORY_EMOJIS[category] || 'πŸ“‹'; + suite(`${emoji} ${category}`, async () => { + for (const annotation of annotations) { + const expectedTag = annotation.anomaly_tag; + const expectedLabel = expectedTag === 0 ? 'Normal' : 'Abnormal'; + + await test(`${annotation.id} β†’ ${expectedLabel}`, async () => { + // Step 1: Download video + const videoFile = SKIP_DOWNLOAD + ? path.join(VIDEO_CACHE_DIR, `${annotation.id}.mp4`) + : downloadVideo(annotation); + + if (!videoFile || !fs.existsSync(videoFile)) { + skip(annotation.id, 'Video not available'); + throw new Error('Video download failed or not cached'); + } + + // Step 2: Extract frames + const framePaths = extractFrames(videoFile, annotation.id); + if (framePaths.length === 0) { + throw new Error('No frames extracted from video'); + } + + // Step 3: VLM evaluation + const response = await evaluateClip(annotation, framePaths); + const parsed = parseJSON(response.content); + + // Step 4: Compare prediction vs ground truth + const predicted = parsed.result; + assert(predicted === 0 || predicted === 1, `Invalid result: ${predicted}`); + assert(predicted === expectedTag, + `Expected ${expectedLabel} (${expectedTag}), got ${predicted === 0 ? 'Normal' : 'Abnormal'} (${predicted}). VLM: "${(parsed.reasoning || '').slice(0, 80)}"`); + + return `${predicted === 0 ? 'Normal' : 'Abnormal'} βœ“ β€” "${(parsed.reasoning || '').slice(0, 60)}"`; + }); + } + }); + } +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// METRICS COMPUTATION +// ═══════════════════════════════════════════════════════════════════════════════ + +function computeMetrics() { + const perCategory = {}; + let totalTP = 0, totalFP = 0, totalTN = 0, totalFN = 0; + + for (const s of results.suites) { + // Extract category name (remove emoji prefix) + const catName = s.name.replace(/^[^\w]+\s*/, '').trim(); + let tp = 0, fp = 0, tn = 0, fn = 0; + + for (const t of s.tests) { + // Parse expected from test name + const isExpectedAbnormal = t.name.includes('Abnormal'); + const isExpectedNormal = t.name.includes('Normal'); + const passed = t.status === 'pass'; + + if (isExpectedAbnormal && passed) tp++; // Correctly detected anomaly + else if (isExpectedNormal && passed) tn++; // Correctly classified normal + else if (isExpectedAbnormal && !passed) fn++; // Missed anomaly + else if (isExpectedNormal && !passed) fp++; // False alarm + } + + const accuracy = (tp + tn) / Math.max(1, tp + fp + tn + fn); + const precision = tp / Math.max(1, tp + fp); + const recall = tp / Math.max(1, tp + fn); + const f1 = precision + recall > 0 ? 2 * (precision * recall) / (precision + recall) : 0; + + perCategory[catName] = { tp, fp, tn, fn, accuracy, precision, recall, f1, total: tp + fp + tn + fn }; + totalTP += tp; totalFP += fp; totalTN += tn; totalFN += fn; + } + + const overall = { + tp: totalTP, fp: totalFP, tn: totalTN, fn: totalFN, + accuracy: (totalTP + totalTN) / Math.max(1, totalTP + totalFP + totalTN + totalFN), + precision: totalTP / Math.max(1, totalTP + totalFP), + recall: totalTP / Math.max(1, totalTP + totalFN), + }; + overall.f1 = overall.precision + overall.recall > 0 + ? 2 * (overall.precision * overall.recall) / (overall.precision + overall.recall) : 0; + + results.metrics = { perCategory, overall }; +} + +// ═══════════════════════════════════════════════════════════════════════════════ +// MAIN +// ═══════════════════════════════════════════════════════════════════════════════ + +async function main() { + log(''); + log(' ╔══════════════════════════════════════════════════════════════╗'); + log(' β•‘ SmartHome-Bench β€” Video Anomaly Detection Benchmark β•‘'); + log(' β•‘ Based on SmartHome-Bench-LLM (1,203 videos, 7 cats) β•‘'); + log(' β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•'); + log(''); + + // Check VLM + if (!VLM_URL) { + log(' ❌ VLM server URL required. Pass --vlm http://localhost:5405'); + log(' This is a VLM-only benchmark (multi-frame video analysis).'); + process.exit(1); + } + + // Check system deps + const deps = checkSystemDeps(); + if (!SKIP_DOWNLOAD) { + if (!deps.ytdlp) { + log(' ❌ yt-dlp not found. Install: pip install yt-dlp'); + process.exit(1); + } + } + if (!deps.ffmpeg) { + log(' ❌ ffmpeg not found. Install: brew install ffmpeg'); + process.exit(1); + } + + // System info + results.system = { + platform: `${os.platform()} ${os.arch()}`, + cpus: os.cpus()[0]?.model || 'unknown', + totalRAM_GB: (os.totalmem() / 1073741824).toFixed(1), + node: process.version, + deps, + }; + + log(` VLM: ${VLM_URL}`); + log(` Mode: ${TEST_MODE} (max ${MAX_VIDEOS} videos)`); + log(` Filter: ${CATEGORIES_FILTER}`); + log(` Cache: ${VIDEO_CACHE_DIR}`); + log(` System: ${results.system.cpus} (${results.system.totalRAM_GB} GB RAM)`); + + // Emit ready + emit({ + event: 'ready', + model: VLM_MODEL || 'unknown', + system: results.system.cpus, + totalVideos: MAX_VIDEOS, + }); + + // Disk space check β€” full needs ~15GB, subset ~2GB + const requiredGB = TEST_MODE === 'full' ? 15 : 2; + checkDiskSpace(VIDEO_CACHE_DIR, requiredGB); + + // Ensure cache dirs + fs.mkdirSync(VIDEO_CACHE_DIR, { recursive: true }); + fs.mkdirSync(FRAMES_DIR, { recursive: true }); + + // Load and build suites + const annotationsByCategory = loadAnnotations(); + const totalClips = Object.values(annotationsByCategory).reduce((n, arr) => n + arr.length, 0); + log(`\n πŸ“Š Loaded ${totalClips} clips across ${Object.keys(annotationsByCategory).length} categories\n`); + + buildSuites(annotationsByCategory); + + // Run + const suiteStart = Date.now(); + await runSuites(); + results.totals.timeMs = Date.now() - suiteStart; + + // Compute metrics + computeMetrics(); + + // Summary + const { passed, failed, skipped, total, timeMs } = results.totals; + const tokPerSec = timeMs > 0 ? ((results.tokenTotals.total / (timeMs / 1000)).toFixed(1)) : '?'; + const overallAcc = (results.metrics.overall?.accuracy * 100 || 0).toFixed(1); + const overallF1 = (results.metrics.overall?.f1 * 100 || 0).toFixed(1); + + log(`\n${'═'.repeat(66)}`); + log(` RESULTS: ${passed}/${total} passed, ${failed} failed, ${skipped} skipped (${(timeMs / 1000).toFixed(1)}s)`); + log(` ACCURACY: ${overallAcc}% | F1: ${overallF1}%`); + log(` TOKENS: ${results.tokenTotals.total} total (${tokPerSec} tok/s)`); + log(` MODEL: ${results.model.vlm || 'unknown'}`); + log(`${'═'.repeat(66)}`); + + // Per-category breakdown + if (results.metrics.perCategory) { + log('\n Per-Category Breakdown:'); + log(` ${'Category'.padEnd(22)} ${'Acc'.padStart(6)} ${'Prec'.padStart(6)} ${'Rec'.padStart(6)} ${'F1'.padStart(6)} ${'TP'.padStart(4)} ${'FP'.padStart(4)} ${'TN'.padStart(4)} ${'FN'.padStart(4)}`); + log(` ${'─'.repeat(72)}`); + for (const [cat, m] of Object.entries(results.metrics.perCategory)) { + log(` ${cat.padEnd(22)} ${(m.accuracy * 100).toFixed(1).padStart(5)}% ${(m.precision * 100).toFixed(1).padStart(5)}% ${(m.recall * 100).toFixed(1).padStart(5)}% ${(m.f1 * 100).toFixed(1).padStart(5)}% ${String(m.tp).padStart(4)} ${String(m.fp).padStart(4)} ${String(m.tn).padStart(4)} ${String(m.fn).padStart(4)}`); + } + } + + if (failed > 0) { + log('\n Failures:'); + for (const s of results.suites) { + for (const t of s.tests) { + if (t.status === 'fail') log(` ❌ ${s.name} > ${t.name}: ${t.detail}`); + } + } + } + + // Save results + fs.mkdirSync(RESULTS_DIR, { recursive: true }); + const modelSlug = (results.model.vlm || 'unknown').replace(/[^a-zA-Z0-9_.-]/g, '_'); + const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const resultFile = path.join(RESULTS_DIR, `${modelSlug}_${ts}.json`); + fs.writeFileSync(resultFile, JSON.stringify(results, null, 2)); + log(`\n Results saved: ${resultFile}`); + + // Update index + const indexFile = path.join(RESULTS_DIR, 'index.json'); + let index = []; + try { index = JSON.parse(fs.readFileSync(indexFile, 'utf8')); } catch { } + index.push({ + file: path.basename(resultFile), + model: results.model.vlm || 'unknown', + timestamp: results.timestamp, + passed, failed, total, + accuracy: results.metrics.overall?.accuracy || 0, + f1: results.metrics.overall?.f1 || 0, + timeMs, + tokens: results.tokenTotals.total, + }); + fs.writeFileSync(indexFile, JSON.stringify(index, null, 2)); + + // Generate report + let reportPath = null; + log('\n Generating HTML report...'); + try { + const reportScript = path.join(__dirname, 'generate-report.cjs'); + reportPath = require(reportScript).generateReport(RESULTS_DIR); + log(` βœ… Report: ${reportPath}`); + + if (!NO_OPEN && !IS_SKILL_MODE && reportPath) { + try { + const openCmd = process.platform === 'darwin' ? 'open' : 'xdg-open'; + execSync(`${openCmd} "${reportPath}"`, { stdio: 'ignore' }); + log(` πŸ“‚ Opened in browser`); + } catch { + log(` ℹ️ Open manually: ${reportPath}`); + } + } + } catch (err) { + log(` ⚠️ Report generation failed: ${err.message}`); + } + + // Emit completion + emit({ + event: 'complete', + model: results.model.vlm, + passed, failed, skipped, total, + timeMs, + accuracy: results.metrics.overall?.accuracy || 0, + f1: results.metrics.overall?.f1 || 0, + tokens: results.tokenTotals.total, + tokPerSec: parseFloat(tokPerSec) || 0, + resultFile, + reportPath, + }); + + log(''); + process.exit(failed > 0 ? 1 : 0); +} + +// Run when executed directly +const isDirectRun = require.main === module || + (process.argv[1] && require('path').resolve(process.argv[1]) === __filename); + +if (isDirectRun) { + main().catch(err => { + log(`Fatal: ${err.message}`); + emit({ event: 'error', message: err.message }); + process.exit(1); + }); +} + +module.exports = { main }; diff --git a/skills/detection/yolo-detection-2026/requirements.txt b/skills/detection/yolo-detection-2026/requirements.txt index 641ea23..a8fa34a 100644 --- a/skills/detection/yolo-detection-2026/requirements.txt +++ b/skills/detection/yolo-detection-2026/requirements.txt @@ -2,6 +2,6 @@ # Install: pip install -r requirements.txt ultralytics>=8.3.0 # YOLOv11/v10/v8 inference -numpy>=1.24.0 +numpy>=1.24.0,<2.0.0 opencv-python-headless>=4.8.0 Pillow>=10.0.0 diff --git a/skills/detection/yolo-detection-2026/requirements_mps.txt b/skills/detection/yolo-detection-2026/requirements_mps.txt index eb018ea..a9e282f 100644 --- a/skills/detection/yolo-detection-2026/requirements_mps.txt +++ b/skills/detection/yolo-detection-2026/requirements_mps.txt @@ -4,7 +4,7 @@ torch>=2.4.0 torchvision>=0.19.0 ultralytics>=8.3.0 coremltools>=8.0 -numpy>=1.24.0 +numpy>=1.24.0,<2.0.0 opencv-python-headless>=4.8.0 Pillow>=10.0.0 diff --git a/skills/detection/yolo-detection-2026/scripts/env_config.py b/skills/detection/yolo-detection-2026/scripts/env_config.py index 1676e21..4386c53 100644 --- a/skills/detection/yolo-detection-2026/scripts/env_config.py +++ b/skills/detection/yolo-detection-2026/scripts/env_config.py @@ -338,6 +338,20 @@ def export_model(self, model, model_name: str) -> Optional[Path]: _log(f"Cached model found: {optimized_path}") return optimized_path + # Guard: numpy 2.x breaks coremltools PyTorchβ†’MIL converter + # (TypeError: only 0-dimensional arrays can be converted to Python scalars) + if spec.export_format == "coreml": + try: + import numpy as np + np_major = int(np.__version__.split('.')[0]) + if np_major >= 2: + _log(f"numpy {np.__version__} detected β€” CoreML export " + f"requires numpy<2.0.0 (coremltools incompatibility)") + _log("Fix: pip install 'numpy>=1.24,<2.0'") + return None + except Exception: + pass # If numpy check fails, try export anyway + try: _log(f"Exporting {model_name}.pt β†’ {spec.export_format} " f"(one-time, may take 30-120s)...") diff --git a/skills/lib/env_config.py b/skills/lib/env_config.py index 1676e21..4386c53 100644 --- a/skills/lib/env_config.py +++ b/skills/lib/env_config.py @@ -338,6 +338,20 @@ def export_model(self, model, model_name: str) -> Optional[Path]: _log(f"Cached model found: {optimized_path}") return optimized_path + # Guard: numpy 2.x breaks coremltools PyTorchβ†’MIL converter + # (TypeError: only 0-dimensional arrays can be converted to Python scalars) + if spec.export_format == "coreml": + try: + import numpy as np + np_major = int(np.__version__.split('.')[0]) + if np_major >= 2: + _log(f"numpy {np.__version__} detected β€” CoreML export " + f"requires numpy<2.0.0 (coremltools incompatibility)") + _log("Fix: pip install 'numpy>=1.24,<2.0'") + return None + except Exception: + pass # If numpy check fails, try export anyway + try: _log(f"Exporting {model_name}.pt β†’ {spec.export_format} " f"(one-time, may take 30-120s)...")