diff --git a/.gitignore b/.gitignore index c017617b..aa6644e6 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,5 @@ docs/.vitepress/cache # Database files *.db +autoresearch/results/ +extension/dist/ diff --git a/OPERATE.md b/OPERATE.md new file mode 100644 index 00000000..3ff7dffd --- /dev/null +++ b/OPERATE.md @@ -0,0 +1,140 @@ +# opencli operate — AI Browser Automation + +`opencli operate` lets an AI agent autonomously control your browser to complete tasks described in natural language. It reuses your existing Chrome login sessions, so no passwords needed. + +## Quick Start + +```bash +# 1. Configure LLM provider +export OPENCLI_PROVIDER=anthropic # or openai +export OPENCLI_MODEL=sonnet # alias or full model ID +export OPENCLI_API_KEY=sk-ant-... # your API key +export OPENCLI_BASE_URL=https://... # optional: API proxy (add /v1 for OpenAI proxies) + +# 2. Run +opencli operate "go to Hacker News and extract the top 5 stories" +opencli operate --url https://github.com/trending "extract the top 3 trending repos" +opencli operate -v "search for flights from NYC to LA on Google Flights" +``` + +## How It Works + +``` +You describe a task in natural language + → Agent observes the page (DOM snapshot) + → LLM decides what to do (click, type, scroll, extract...) + → Actions execute in your browser + → Agent observes the result + → Repeat until done +``` + +The agent uses your existing Chrome browser session through the OpenCLI extension, so it has access to all your logged-in accounts (Twitter, GitHub, Gmail, etc.) without needing passwords. + +## Options + +| Option | Default | Description | +|--------|---------|-------------| +| `--url ` | — | Starting URL (agent navigates if omitted) | +| `--max-steps ` | 50 | Maximum agent steps before timeout | +| `--screenshot` | false | Include screenshots in LLM context (more accurate but more expensive) | +| `--record` | false | Record action trace for debugging | +| `--save-as ` | — | Save successful operation as reusable CLI skill | +| `-v, --verbose` | false | Show step-by-step reasoning | + +## Configuration + +### Environment Variables + +```bash +# Required +export OPENCLI_PROVIDER=anthropic # Provider: anthropic or openai +export OPENCLI_API_KEY=sk-ant-... # API key for your provider + +# Optional +export OPENCLI_MODEL=sonnet # Model alias or full ID (default: sonnet) +export OPENCLI_BASE_URL=https://... # API proxy URL (must include /v1 for OpenAI proxies) +``` + +### Model Aliases + +| Provider | Aliases | Default | +|----------|---------|---------| +| anthropic | `sonnet`, `opus`, `haiku` | sonnet | +| openai | `gpt-5.4`, `gpt-4.1`, `gpt-4o`, `o3`, `o4-mini` | gpt-4o | + +You can also use full model IDs (e.g., `claude-sonnet-4-20250514`, `gpt-5.4`). + +### Verify Configuration + +```bash +opencli doctor # Shows LLM provider, model, and connectivity status +``` + +### Chrome Extension + +The OpenCLI browser extension must be installed and connected. Run `opencli doctor` to check. + +## Save as Skill + +After a successful operation, save it as a reusable CLI command that runs **without AI**: + +```bash +# First run: AI agent completes the task +opencli operate --save-as hn/top "get the top 5 Hacker News stories" --url https://news.ycombinator.com + +# Future runs: deterministic, no LLM needed +opencli hn top +``` + +The `--save-as` flag analyzes the agent's actions and captured network requests, then uses the LLM to generate an optimized TypeScript adapter. If the agent discovered an API during execution, the generated skill will call the API directly instead of replaying UI actions. + +## Cost Estimate + +Each `operate` run costs approximately **$0.01–$0.50** depending on task complexity: + +| Task Type | Typical Steps | Estimated Cost | +|-----------|--------------|----------------| +| Simple extract (page title) | 1–2 | $0.01 | +| Search + extract | 3–6 | $0.05–0.15 | +| Form filling | 3–8 | $0.05–0.20 | +| Multi-step navigation | 5–10 | $0.10–0.50 | + +Using `--save-as` adds one additional LLM call ($0.05–0.20) for skill generation. + +## Troubleshooting + +### "OPENCLI_API_KEY is not set" +Configure your LLM provider: +```bash +export OPENCLI_PROVIDER=anthropic +export OPENCLI_API_KEY=sk-ant-... +``` + +### "LLM returned HTML instead of JSON" +Your `OPENCLI_BASE_URL` is pointing to the proxy's dashboard, not its API endpoint. Add `/v1`: +```bash +export OPENCLI_BASE_URL='https://your-proxy.com/v1' +``` + +### "Extension not connected" +Run `opencli doctor` to diagnose. Make sure the OpenCLI extension is installed and enabled in Chrome. + +### "attach failed: Cannot access a chrome-extension:// URL" +Another Chrome extension (usually 1Password or a debugger extension) is interfering. The agent retries automatically (up to 5 times for operate commands), but if it persists, temporarily disable the conflicting extension. + +### "LLM returned empty response" +Your API proxy may be truncating responses, or the model name may not be supported by your proxy. Check `OPENCLI_MODEL` and `OPENCLI_BASE_URL`. + +### Agent fills wrong fields or misses content below the fold +The agent scrolls elements into view before interacting, but complex pages with many dynamic elements can sometimes cause issues. Try running with `-v` to see what the agent sees and does. + +## AutoResearch (Experimental) + +OpenCLI includes an AutoResearch framework that automatically optimizes the agent's performance: + +```bash +# Run automated optimization (requires Claude Code) +./autoresearch/run.sh +``` + +This uses Claude Code to iteratively modify the agent's code, evaluate against a test suite of 59 tasks, and commit only improvements. See `docs/superpowers/specs/2026-03-31-autoresearch-operate-design.md` for details. diff --git a/README.md b/README.md index 27a91a78..8f4703e4 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,31 @@ opencli hackernews top --limit 5 # Public API, no browser needed opencli bilibili hot --limit 5 # Browser command (requires Extension) ``` +### 4. AI Agent (New!) + +Let an AI agent operate your browser with natural language. Supports Anthropic and OpenAI: + +```bash +# Configure (one-time) +export OPENCLI_PROVIDER=anthropic # or openai +export OPENCLI_MODEL=sonnet # model alias +export OPENCLI_API_KEY=sk-ant-... # your API key +export OPENCLI_BASE_URL=https://... # optional: API proxy + +# Run +opencli operate "go to Hacker News and extract the top 5 stories" +opencli operate --url https://github.com/trending "extract top 3 trending repos" +``` + +Save successful operations as reusable commands (no AI needed for replay): + +```bash +opencli operate --save-as hn/top "get top 5 HN stories" --url https://news.ycombinator.com +opencli hn top # Runs without AI from now on +``` + +See [OPERATE.md](./OPERATE.md) for full documentation, configuration, and troubleshooting. + ### Update ```bash diff --git a/SKILL.md b/SKILL.md index 4b38d7a2..cbbb4e99 100644 --- a/SKILL.md +++ b/SKILL.md @@ -3,877 +3,59 @@ name: opencli description: "OpenCLI — Make any website or Electron App your CLI. Zero risk, AI-powered, reuse Chrome login." version: 1.5.6 author: jackwener -tags: [cli, browser, web, chrome-extension, cdp, bilibili, zhihu, twitter, github, v2ex, hackernews, reddit, xiaohongshu, xueqiu, youtube, boss, coupang, yollomi, AI, agent] +tags: [cli, browser, web, chrome-extension, cdp, AI, agent, operate] --- # OpenCLI -> Make any website or Electron App your CLI. Reuse Chrome login, zero risk, AI-powered discovery. +> Make any website or Electron App your CLI. Reuse Chrome login, zero risk, AI-powered. -> [!CAUTION] -> **AI Agent 必读:创建或修改任何适配器之前,你必须先阅读 [CLI-EXPLORER.md](./CLI-EXPLORER.md)!** -> 该文档包含完整的 API 发现工作流(必须使用浏览器探索)、5 级认证策略决策树、平台 SDK 速查表、`tap` 步骤调试流程、分页 API 模板、级联请求模式、以及常见陷阱。 -> **本文件(SKILL.md)仅提供命令参考和简化模板,不足以正确开发适配器。** +## Skills -> [!IMPORTANT] -> 创建或修改 adapter 时,再额外遵守 3 条收口规则: -> 1. 主参数优先用 positional arg,不要把 `query` / `id` / `url` 默认做成 `--query` / `--id` / `--url` -> 2. 预期中的 adapter 失败优先抛 `CliError` 子类,不要直接 throw 原始 `Error` -> 3. 新增 adapter 或新增用户可发现命令时,同步更新 adapter docs、`docs/adapters/index.md`、sidebar,以及 README/README.zh-CN 中受影响的入口 - -## Install & Run +OpenCLI has three specialized skills. Use the one that matches your task: +### 1. CLI Commands (`skills/cli/SKILL.md`) +Use existing CLI commands to fetch data, interact with websites and desktop apps. ```bash -# npm global install (recommended) -npm install -g @jackwener/opencli -opencli - -# Or from source -cd ~/code/opencli && npm install -npx tsx src/main.ts - -# Update to latest -npm update -g @jackwener/opencli -``` - -## Prerequisites - -Browser commands require: -1. Chrome browser running **(logged into target sites)** -2. **opencli Browser Bridge** Chrome extension installed (load `extension/` as unpacked in `chrome://extensions`) -3. No further setup needed — the daemon auto-starts on first browser command - -> **Note**: You must be logged into the target website in Chrome before running commands. Tabs opened during command execution are auto-closed afterwards. - -Public API commands (`hackernews`, `v2ex`) need no browser. - -## Commands Reference - -### Data Commands - -```bash -# Bilibili (browser) -opencli bilibili hot --limit 10 # B站热门视频 -opencli bilibili search "rust" # 搜索视频 (query positional) -opencli bilibili me # 我的信息 -opencli bilibili favorite # 我的收藏 -opencli bilibili history --limit 20 # 观看历史 -opencli bilibili feed --limit 10 # 动态时间线 -opencli bilibili user-videos --uid 12345 # 用户投稿 -opencli bilibili subtitle --bvid BV1xxx # 获取视频字幕 (支持 --lang zh-CN) -opencli bilibili dynamic --limit 10 # 动态 -opencli bilibili ranking --limit 10 # 排行榜 -opencli bilibili following --limit 20 # 我的关注列表 (支持 --uid 查看他人) - -# 知乎 (browser) -opencli zhihu hot --limit 10 # 知乎热榜 -opencli zhihu search "AI" # 搜索 (query positional) -opencli zhihu question 34816524 # 问题详情和回答 (id positional) - -# 小红书 (browser) -opencli xiaohongshu search "美食" # 搜索笔记 (query positional) -opencli xiaohongshu notifications # 通知(mentions/likes/connections) -opencli xiaohongshu feed --limit 10 # 推荐 Feed -opencli xiaohongshu user xxx # 用户主页 (id positional) -opencli xiaohongshu creator-notes --limit 10 # 创作者笔记列表 -opencli xiaohongshu creator-note-detail --note-id xxx # 笔记详情 -opencli xiaohongshu creator-notes-summary # 笔记数据概览 -opencli xiaohongshu creator-profile # 创作者资料 -opencli xiaohongshu creator-stats # 创作者数据统计 - -# 雪球 Xueqiu (browser) -opencli xueqiu hot-stock --limit 10 # 雪球热门股票榜 -opencli xueqiu stock --symbol SH600519 # 查看股票实时行情 -opencli xueqiu watchlist # 获取自选股/持仓列表 -opencli xueqiu feed # 我的关注 timeline -opencli xueqiu hot --limit 10 # 雪球热榜 -opencli xueqiu search "特斯拉" # 搜索 (query positional) -opencli xueqiu earnings-date SH600519 # 股票财报发布日期 (symbol positional) -opencli xueqiu fund-holdings # 蛋卷基金持仓明细 (支持 --account 过滤) -opencli xueqiu fund-snapshot # 蛋卷基金快照(总资产、子账户、持仓) - -# GitHub (via gh External CLI) -opencli gh repo list # 列出仓库 (passthrough to gh) -opencli gh pr list --limit 5 # PR 列表 -opencli gh issue list # Issue 列表 - -# Twitter/X (browser) -opencli twitter trending --limit 10 # 热门话题 -opencli twitter bookmarks --limit 20 # 获取收藏的书签推文 -opencli twitter search "AI" # 搜索推文 (query positional) -opencli twitter profile elonmusk # 用户资料 -opencli twitter timeline --limit 20 # 时间线 -opencli twitter thread 1234567890 # 推文 thread(原文 + 回复) -opencli twitter article 1891511252174299446 # 推文长文内容 -opencli twitter follow elonmusk # 关注用户 -opencli twitter unfollow elonmusk # 取消关注 -opencli twitter bookmark https://x.com/... # 收藏推文 -opencli twitter unbookmark https://x.com/... # 取消收藏 -opencli twitter post "Hello world" # 发布推文 (text positional) -opencli twitter like https://x.com/... # 点赞推文 (url positional) -opencli twitter reply https://x.com/... "Nice!" # 回复推文 (url + text positional) -opencli twitter delete https://x.com/... # 删除推文 (url positional) -opencli twitter block elonmusk # 屏蔽用户 (username positional) -opencli twitter unblock elonmusk # 取消屏蔽 (username positional) -opencli twitter followers elonmusk # 用户的粉丝列表 (user positional) -opencli twitter following elonmusk # 用户的关注列表 (user positional) -opencli twitter notifications --limit 20 # 通知列表 -opencli twitter hide-reply https://x.com/... # 隐藏回复 (url positional) -opencli twitter download elonmusk # 下载用户媒体 (username positional, 支持 --tweet-url) -opencli twitter accept "群,微信" # 自动接受含关键词的 DM 请求 (query positional) -opencli twitter reply-dm "消息内容" # 批量回复 DM (text positional) - -# Reddit (browser) -opencli reddit hot --limit 10 # 热门帖子 -opencli reddit hot --subreddit programming # 指定子版块 -opencli reddit frontpage --limit 10 # 首页 /r/all -opencli reddit popular --limit 10 # /r/popular 热门 -opencli reddit search "AI" --sort top --time week # 搜索(支持排序+时间过滤) -opencli reddit subreddit rust --sort top --time month # 子版块浏览(支持时间过滤) -opencli reddit read --post-id 1abc123 # 阅读帖子 + 评论 -opencli reddit user spez # 用户资料(karma、注册时间) -opencli reddit user-posts spez # 用户发帖历史 -opencli reddit user-comments spez # 用户评论历史 -opencli reddit upvote --post-id xxx --direction up # 投票(up/down/none) -opencli reddit save --post-id xxx # 收藏帖子 -opencli reddit comment --post-id xxx "Great!" # 发表评论 (text positional) -opencli reddit subscribe --subreddit python # 订阅子版块 -opencli reddit saved --limit 10 # 我的收藏 -opencli reddit upvoted --limit 10 # 我的赞 - -# V2EX (public + browser) -opencli v2ex hot --limit 10 # 热门话题 -opencli v2ex latest --limit 10 # 最新话题 -opencli v2ex topic 1024 # 主题详情 (id positional) -opencli v2ex daily # 每日签到 (browser) -opencli v2ex me # 我的信息 (browser) -opencli v2ex notifications --limit 10 # 通知 (browser) -opencli v2ex node python # 节点话题列表 (name positional) -opencli v2ex nodes --limit 30 # 所有节点列表 -opencli v2ex member username # 用户资料 (username positional) -opencli v2ex user username # 用户发帖列表 (username positional) -opencli v2ex replies 1024 # 主题回复列表 (id positional) - -# Hacker News (public) -opencli hackernews top --limit 10 # Top stories -opencli hackernews new --limit 10 # Newest stories -opencli hackernews best --limit 10 # Best stories -opencli hackernews ask --limit 10 # Ask HN posts -opencli hackernews show --limit 10 # Show HN posts -opencli hackernews jobs --limit 10 # Job postings -opencli hackernews search "rust" # 搜索 (query positional) -opencli hackernews user dang # 用户资料 (username positional) - -# BBC (public) -opencli bbc news --limit 10 # BBC News RSS headlines - -# 微博 (browser) -opencli weibo hot --limit 10 # 微博热搜 - -# BOSS直聘 (browser) -opencli boss search "AI agent" # 搜索职位 (query positional) -opencli boss detail --security-id xxx # 职位详情 -opencli boss recommend --limit 10 # 推荐职位 -opencli boss joblist --limit 10 # 职位列表 -opencli boss greet --security-id xxx # 打招呼 -opencli boss batchgreet --job-id xxx # 批量打招呼 -opencli boss send --uid xxx "消息内容" # 发消息 (text positional) -opencli boss chatlist --limit 10 # 聊天列表 -opencli boss chatmsg --security-id xxx # 聊天记录 -opencli boss invite --security-id xxx # 邀请沟通 -opencli boss mark --security-id xxx # 标记管理 -opencli boss exchange --security-id xxx # 交换联系方式 -opencli boss resume # 简历管理 -opencli boss stats # 数据统计 - -# YouTube (browser) -opencli youtube search "rust" # 搜索视频 (query positional) -opencli youtube video "https://www.youtube.com/watch?v=xxx" # 视频元数据 -opencli youtube transcript "https://www.youtube.com/watch?v=xxx" # 获取视频字幕/转录 -opencli youtube transcript "xxx" --lang zh-Hans --mode raw # 指定语言 + 原始时间戳模式 - -# Yahoo Finance (browser) -opencli yahoo-finance quote --symbol AAPL # 股票行情 - -# Sina Finance -opencli sinafinance news --limit 10 --type 1 # 7x24实时快讯 (0=全部 1=A股 2=宏观 3=公司 4=数据 5=市场 6=国际 7=观点 8=央行 9=其它) - -# Reuters (browser) -opencli reuters search "AI" # 路透社搜索 (query positional) - -# 什么值得买 (browser) -opencli smzdm search "耳机" # 搜索好价 (query positional) - -# 携程 (browser) -opencli ctrip search "三亚" # 搜索目的地 (query positional) - -# Antigravity (Electron/CDP) -opencli antigravity status # 检查 CDP 连接 -opencli antigravity send "hello" # 发送文本到当前 agent 聊天框 -opencli antigravity read # 读取整个聊天记录面板 -opencli antigravity new # 清空聊天、开启新对话 -opencli antigravity dump # 导出 DOM 和快照调试信息 -opencli antigravity extract-code # 自动抽取 AI 回复中的代码块 -opencli antigravity model claude # 切换底层模型 -opencli antigravity watch # 流式监听增量消息 - -# Barchart (browser) -opencli barchart quote --symbol AAPL # 股票行情 -opencli barchart options --symbol AAPL # 期权链 -opencli barchart greeks --symbol AAPL # 期权 Greeks -opencli barchart flow --limit 20 # 异常期权活动 - -# Jike 即刻 (browser) -opencli jike feed --limit 10 # 动态流 -opencli jike search "AI" # 搜索 (query positional) -opencli jike create "内容" # 发布动态 (text positional) -opencli jike like xxx # 点赞 (id positional) -opencli jike comment xxx "评论" # 评论 (id + text positional) -opencli jike repost xxx # 转发 (id positional) -opencli jike notifications # 通知 - -# Linux.do (public + browser) -opencli linux-do hot --limit 10 # 热门话题 -opencli linux-do latest --limit 10 # 最新话题 -opencli linux-do search "rust" # 搜索 (query positional) -opencli linux-do topic 1024 # 主题详情 (id positional) -opencli linux-do categories --limit 20 # 分类列表 (browser) -opencli linux-do category dev 7 # 分类内话题 (slug + id positional, browser) - -# StackOverflow (public) -opencli stackoverflow hot --limit 10 # 热门问题 -opencli stackoverflow search "typescript" # 搜索 (query positional) -opencli stackoverflow bounties --limit 10 # 悬赏问题 - -# WeRead 微信读书 (browser) -opencli weread shelf --limit 10 # 书架 -opencli weread search "AI" # 搜索图书 (query positional) -opencli weread book xxx # 图书详情 (book-id positional) -opencli weread highlights xxx # 划线笔记 (book-id positional) -opencli weread notes xxx # 想法笔记 (book-id positional) -opencli weread ranking --limit 10 # 排行榜 - -# Jimeng 即梦 AI (browser) -opencli jimeng generate --prompt "描述" # AI 生图 -opencli jimeng history --limit 10 # 生成历史 - -# Yollomi yollomi.com (browser — 需在 Chrome 登录 yollomi.com,复用站点 session) -opencli yollomi models --type image # 列出图像模型与积分 -opencli yollomi generate "提示词" --model z-image-turbo # 文生图 -opencli yollomi video "提示词" --model kling-2-1 # 视频 -opencli yollomi upload ./photo.jpg # 上传得 URL,供 img2img / 工具链使用 -opencli yollomi remove-bg # 去背景(免费) -opencli yollomi edit "改成油画风格" # Qwen 图像编辑 -opencli yollomi background # AI 背景生成 (5 credits) -opencli yollomi face-swap --source --target # 换脸 (3 credits) -opencli yollomi object-remover # AI 去除物体 (3 credits) -opencli yollomi restore # AI 修复老照片 (4 credits) -opencli yollomi try-on --person --cloth # 虚拟试衣 (3 credits) -opencli yollomi upscale # AI 超分辨率 (1 credit, 支持 --scale 2/4) - -# Grok (default + explicit web) -opencli grok ask --prompt "问题" # 提问 Grok(兼容默认路径) -opencli grok ask --prompt "问题" --web # 显式 grok.com consumer web UI 路径 - -# HuggingFace (public) -opencli hf top --limit 10 # 热门模型 - -# 超星学习通 (browser) -opencli chaoxing assignments # 作业列表 -opencli chaoxing exams # 考试列表 - -# Douban 豆瓣 (browser) -opencli douban search "三体" # 搜索 (query positional) -opencli douban top250 # 豆瓣 Top 250 -opencli douban subject 1234567 # 条目详情 (id positional) -opencli douban photos 30382501 # 图片列表 / 直链(默认海报) -opencli douban download 30382501 # 下载海报 / 剧照 -opencli douban marks --limit 10 # 我的标记 -opencli douban reviews --limit 10 # 短评 - -# Facebook (browser) -opencli facebook feed --limit 10 # 动态流 -opencli facebook profile username # 用户资料 (id positional) -opencli facebook search "AI" # 搜索 (query positional) -opencli facebook friends # 好友列表 -opencli facebook groups # 群组 -opencli facebook events # 活动 -opencli facebook notifications # 通知 -opencli facebook memories # 回忆 -opencli facebook add-friend username # 添加好友 (id positional) -opencli facebook join-group groupid # 加入群组 (id positional) - -# Instagram (browser) -opencli instagram explore # 探索 -opencli instagram profile username # 用户资料 (id positional) -opencli instagram search "AI" # 搜索 (query positional) -opencli instagram user username # 用户详情 (id positional) -opencli instagram followers username # 粉丝 (id positional) -opencli instagram following username # 关注 (id positional) -opencli instagram follow username # 关注用户 (id positional) -opencli instagram unfollow username # 取消关注 (id positional) -opencli instagram like postid # 点赞 (id positional) -opencli instagram unlike postid # 取消点赞 (id positional) -opencli instagram comment postid "评论" # 评论 (id + text positional) -opencli instagram save postid # 收藏 (id positional) -opencli instagram unsave postid # 取消收藏 (id positional) -opencli instagram saved # 已收藏列表 - -# TikTok (browser) -opencli tiktok explore # 探索 -opencli tiktok search "AI" # 搜索 (query positional) -opencli tiktok profile username # 用户资料 (id positional) -opencli tiktok user username # 用户详情 (id positional) -opencli tiktok following username # 关注列表 (id positional) -opencli tiktok follow username # 关注 (id positional) -opencli tiktok unfollow username # 取消关注 (id positional) -opencli tiktok like videoid # 点赞 (id positional) -opencli tiktok unlike videoid # 取消点赞 (id positional) -opencli tiktok comment videoid "评论" # 评论 (id + text positional) -opencli tiktok save videoid # 收藏 (id positional) -opencli tiktok unsave videoid # 取消收藏 (id positional) -opencli tiktok live # 直播 -opencli tiktok notifications # 通知 -opencli tiktok friends # 朋友 - -# Medium (browser) -opencli medium feed --limit 10 # 动态流 -opencli medium search "AI" # 搜索 (query positional) -opencli medium user username # 用户主页 (id positional) - -# Substack (browser) -opencli substack feed --limit 10 # 订阅动态 -opencli substack search "AI" # 搜索 (query positional) -opencli substack publication name # 出版物详情 (id positional) - -# Sinablog 新浪博客 (browser) -opencli sinablog hot --limit 10 # 热门 -opencli sinablog search "AI" # 搜索 (query positional) -opencli sinablog article url # 文章详情 -opencli sinablog user username # 用户主页 (id positional) - -# Lobsters (public) -opencli lobsters hot --limit 10 # 热门 -opencli lobsters newest --limit 10 # 最新 -opencli lobsters active --limit 10 # 活跃 -opencli lobsters tag rust # 按标签筛选 (tag positional) - -# Google (public) -opencli google news --limit 10 # 新闻 -opencli google search "AI" # 搜索 (query positional) -opencli google suggest "AI" # 搜索建议 (query positional) -opencli google trends # 趋势 - -# DEV.to (public) -opencli devto top --limit 10 # 热门文章 -opencli devto tag javascript --limit 10 # 按标签 (tag positional) -opencli devto user username # 用户文章 (username positional) - -# Steam (public) -opencli steam top-sellers --limit 10 # 热销游戏 - -# Apple Podcasts (public) -opencli apple-podcasts top --limit 10 # 热门播客排行榜 (支持 --country us/cn/gb/jp) -opencli apple-podcasts search "科技" # 搜索播客 (query positional) -opencli apple-podcasts episodes 12345 # 播客剧集列表 (id positional, 用 search 获取 ID) - -# arXiv (public) -opencli arxiv search "attention" # 搜索论文 (query positional) -opencli arxiv paper 1706.03762 # 论文详情 (id positional) - -# Bloomberg (public RSS + browser) -opencli bloomberg main --limit 10 # Bloomberg 首页头条 (RSS) -opencli bloomberg markets --limit 10 # 市场新闻 (RSS) -opencli bloomberg tech --limit 10 # 科技新闻 (RSS) -opencli bloomberg politics --limit 10 # 政治新闻 (RSS) -opencli bloomberg economics --limit 10 # 经济新闻 (RSS) -opencli bloomberg opinions --limit 10 # 观点 (RSS) -opencli bloomberg industries --limit 10 # 行业新闻 (RSS) -opencli bloomberg businessweek --limit 10 # Businessweek (RSS) -opencli bloomberg feeds # 列出所有 RSS feed 别名 -opencli bloomberg news "https://..." # 阅读 Bloomberg 文章全文 (link positional, browser) - -# Coupang 쿠팡 (browser) -opencli coupang search "耳机" # 搜索商品 (query positional, 支持 --filter rocket) -opencli coupang add-to-cart 12345 # 加入购物车 (product-id positional, 或 --url) - -# Dictionary (public) -opencli dictionary search "serendipity" # 单词释义 (word positional) -opencli dictionary synonyms "happy" # 近义词 (word positional) -opencli dictionary examples "ubiquitous" # 例句 (word positional) - -# 豆包 Doubao Web (browser) -opencli doubao status # 检查豆包页面状态 -opencli doubao new # 新建对话 -opencli doubao send "你好" # 发送消息 (text positional) -opencli doubao read # 读取对话记录 -opencli doubao ask "问题" # 一键提问并等回复 (text positional) - -# 京东 JD (browser) -opencli jd item 100291143898 # 商品详情 (sku positional, 含价格/主图/规格) - -# LinkedIn (browser) -opencli linkedin search "AI engineer" # 搜索职位 (query positional, 支持 --location/--company/--remote) -opencli linkedin timeline --limit 20 # 首页动态流 - -# Pixiv (browser) -opencli pixiv ranking --limit 20 # 插画排行榜 (支持 --mode daily/weekly/monthly) -opencli pixiv search "風景" # 搜索插画 (query positional) -opencli pixiv user 12345 # 画师资料 (uid positional) -opencli pixiv illusts 12345 # 画师作品列表 (user-id positional) -opencli pixiv detail 12345 # 插画详情 (id positional) -opencli pixiv download 12345 # 下载插画 (illust-id positional) - -# Web (browser) -opencli web read --url "https://..." # 抓取任意网页并导出为 Markdown - -# 微信公众号 Weixin (browser) -opencli weixin download --url "https://mp.weixin.qq.com/s/xxx" # 下载公众号文章为 Markdown - -# 小宇宙 Xiaoyuzhou (public) -opencli xiaoyuzhou podcast 12345 # 播客资料 (id positional) -opencli xiaoyuzhou podcast-episodes 12345 # 播客剧集列表 (id positional) -opencli xiaoyuzhou episode 12345 # 单集详情 (id positional) - -# Wikipedia (public) -opencli wikipedia search "AI" # 搜索 (query positional) -opencli wikipedia summary "Python" # 摘要 (title positional) +opencli twitter trending --limit 10 +opencli hackernews top --limit 5 +opencli bilibili hot ``` -### Desktop Adapter Commands - +### 2. Browser Automation (`skills/operate/SKILL.md`) +AI agent or manual browser control. Navigate, click, type, extract — with existing Chrome login sessions. ```bash -# Cursor (desktop — CDP via Electron) -opencli cursor status # 检查连接 -opencli cursor send "message" # 发送消息 -opencli cursor read # 读取回复 -opencli cursor new # 新建对话 -opencli cursor dump # 导出 DOM 调试信息 -opencli cursor composer # Composer 模式 -opencli cursor model claude # 切换模型 -opencli cursor extract-code # 提取代码块 -opencli cursor ask "question" # 一键提问并等回复 -opencli cursor screenshot # 截图 -opencli cursor history # 对话历史 -opencli cursor export # 导出对话 - -# Codex (desktop — headless CLI agent) -opencli codex status # 检查连接 -opencli codex send "message" # 发送消息 -opencli codex read # 读取回复 -opencli codex new # 新建对话 -opencli codex dump # 导出调试信息 -opencli codex extract-diff # 提取 diff -opencli codex model gpt-4 # 切换模型 -opencli codex ask "question" # 一键提问并等回复 -opencli codex screenshot # 截图 -opencli codex history # 对话历史 -opencli codex export # 导出对话 - -# ChatGPT (desktop — macOS AppleScript/CDP) -opencli chatgpt status # 检查应用状态 -opencli chatgpt new # 新建对话 -opencli chatgpt send "message" # 发送消息 -opencli chatgpt read # 读取回复 -opencli chatgpt ask "question" # 一键提问并等回复 - -# ChatWise (desktop — multi-LLM client) -opencli chatwise status # 检查连接 -opencli chatwise new # 新建对话 -opencli chatwise send "message" # 发送消息 -opencli chatwise read # 读取回复 -opencli chatwise ask "question" # 一键提问并等回复 -opencli chatwise model claude # 切换模型 -opencli chatwise history # 对话历史 -opencli chatwise export # 导出对话 -opencli chatwise screenshot # 截图 - -# Notion (desktop — CDP via Electron) -opencli notion status # 检查连接 -opencli notion search "keyword" # 搜索页面 -opencli notion read # 读取当前页面 -opencli notion new # 新建页面 -opencli notion write "content" # 写入内容 -opencli notion sidebar # 侧边栏导航 -opencli notion favorites # 收藏列表 -opencli notion export # 导出 +# AI agent mode (requires OPENCLI_API_KEY) +opencli operate "go to HN and extract top 5 stories" +opencli operate --save-as hn/top "get top HN stories" # Save as reusable CLI -# Discord App (desktop — CDP via Electron) -opencli discord-app status # 检查连接 -opencli discord-app send "message" # 发送消息 -opencli discord-app read # 读取消息 -opencli discord-app channels # 频道列表 -opencli discord-app servers # 服务器列表 -opencli discord-app search "keyword" # 搜索 -opencli discord-app members # 成员列表 - -# Doubao App 豆包桌面版 (desktop — CDP via Electron) -opencli doubao-app status # 检查连接 -opencli doubao-app new # 新建对话 -opencli doubao-app send "message" # 发送消息 -opencli doubao-app read # 读取回复 -opencli doubao-app ask "question" # 一键提问并等回复 -opencli doubao-app screenshot # 截图 -opencli doubao-app dump # 导出 DOM 调试信息 +# Manual mode (Claude Code controls the loop) +opencli browse open https://example.com +opencli browse state +opencli browse click 3 ``` -### Management Commands - +### 3. Adapter Development (`skills/adapter-dev/SKILL.md`) +Create new CLI commands from websites. Explore APIs, record traffic, write TypeScript adapters. ```bash -opencli list # List all commands (including External CLIs) -opencli list --json # JSON output -opencli list -f yaml # YAML output -opencli install # Auto-install an external CLI (e.g., gh, obsidian) -opencli register # Register a local custom CLI for unified discovery -opencli validate # Validate all CLI definitions -opencli validate bilibili # Validate specific site -opencli doctor # Diagnose browser bridge (auto-starts daemon, includes live test) +opencli explore https://example.com +opencli record https://example.com +opencli generate https://example.com --goal "hot" ``` -### AI Agent Workflow +## Quick Setup ```bash -# Deep Explore: network intercept → response analysis → capability inference -opencli explore --site - -# Synthesize: generate evaluate-based YAML pipelines from explore artifacts -opencli synthesize - -# Generate: one-shot explore → synthesize → register -opencli generate --goal "hot" - -# Record: YOU operate the page, opencli captures every API call → YAML candidates -# Opens the URL in automation window, injects fetch/XHR interceptor into ALL tabs, -# polls every 2s, auto-stops after 60s (or press Enter to stop early). -opencli record # 录制,site name 从域名推断 -opencli record --site mysite # 指定 site name -opencli record --timeout 120000 # 自定义超时(毫秒,默认 60000) -opencli record --poll 1000 # 缩短轮询间隔(毫秒,默认 2000) -opencli record --out .opencli/record/x # 自定义输出目录 -# Output: -# .opencli/record//captured.json ← 原始捕获数据(带 url/method/body) -# .opencli/record//candidates/*.yaml ← 高置信度候选适配器(score ≥ 8,有 array 结果) - -# Strategy Cascade: auto-probe PUBLIC → COOKIE → HEADER -opencli cascade - -# Explore with interactive fuzzing (click buttons to trigger lazy APIs) -opencli explore --auto --click "字幕,CC,评论" - -# Validate: validate adapter definitions -opencli validate -``` - -## Output Formats - -All built-in commands support `--format` / `-f` with `table`, `json`, `yaml`, `md`, and `csv`. -The `list` command supports the same formats and also keeps `--json` as a compatibility alias. - -```bash -opencli list -f yaml # YAML command registry -opencli bilibili hot -f table # Default: rich table -opencli bilibili hot -f json # JSON (pipe to jq, feed to AI agent) -opencli bilibili hot -f yaml # YAML (readable structured output) -opencli bilibili hot -f md # Markdown -opencli bilibili hot -f csv # CSV -``` - -## Verbose Mode - -```bash -opencli bilibili hot -v # Show each pipeline step and data flow -``` - -## Record Workflow - -`record` 是为「无法用 `explore` 自动发现」的页面(需要登录操作、复杂交互、SPA 内路由)准备的手动录制方案。 - -### 工作原理 - -``` -opencli record - → 打开 automation window 并导航到目标 URL - → 向所有 tab 注入 fetch/XHR 拦截器(幂等,可重复注入) - → 每 2s 轮询一次:发现新 tab 自动注入,drain 所有 tab 的捕获缓冲区 - → 超时(默认 60s)或按 Enter 停止 - → 分析捕获到的 JSON 请求:去重 → 评分 → 生成候选 YAML +npm install -g @jackwener/opencli +opencli doctor # Verify Chrome extension + daemon ``` -**拦截器特性**: -- 同时 patch `window.fetch` 和 `XMLHttpRequest` -- 只捕获 `Content-Type: application/json` 的响应 -- 过滤纯对象少于 2 个 key 的响应(避免 tracking/ping) -- 跨 tab 隔离:每个 tab 独立缓冲区,轮询时分别 drain -- 幂等注入:同一 tab 二次注入时先 restore 原始函数再重新 patch,不丢失已捕获数据 - -### 使用步骤 +## Configuration ```bash -# 1. 启动录制(建议 --timeout 给足操作时间) -opencli record "https://example.com/page" --timeout 120000 - -# 2. 在弹出的 automation window 里正常操作页面: -# - 打开列表、搜索、点击条目、切换 Tab -# - 凡是触发网络请求的操作都会被捕获 - -# 3. 完成操作后按 Enter 停止(或等超时自动停止) - -# 4. 查看结果 -cat .opencli/record//captured.json # 原始捕获 -ls .opencli/record//candidates/ # 候选 YAML -``` - -### 页面类型与捕获预期 - -| 页面类型 | 预期捕获量 | 说明 | -|---------|-----------|------| -| 列表/搜索页 | 多(5~20+) | 每次搜索/翻页都会触发新请求 | -| 详情页(只读) | 少(1~5) | 首屏数据一次性返回,后续操作走 form/redirect | -| SPA 内路由跳转 | 中等 | 路由切换会触发新接口,但首屏请求在注入前已发出 | -| 需要登录的页面 | 视操作而定 | 确保 Chrome 已登录目标网站 | - -> **注意**:如果页面在导航完成前就发出了大部分请求(服务端渲染 / SSR 注水),拦截器会错过这些请求。 -> 解决方案:在页面加载完成后,手动触发能产生新请求的操作(搜索、翻页、切 Tab、展开折叠项等)。 - -### 候选 YAML → TS CLI 转换 - -生成的候选 YAML 是起点,通常需要转换为 TypeScript(尤其是 tae 等内部系统): - -**候选 YAML 结构**(自动生成): -```yaml -site: tae -name: getList # 从 URL path 推断的名称 -strategy: cookie -browser: true -pipeline: - - navigate: https://... - - evaluate: | - (async () => { - const res = await fetch('/approval/getList.json?procInsId=...', { credentials: 'include' }); - const data = await res.json(); - return (data?.content?.operatorRecords || []).map(item => ({ ... })); - })() +# For AI agent (opencli operate) +export OPENCLI_PROVIDER=anthropic # or openai +export OPENCLI_MODEL=sonnet # model alias +export OPENCLI_API_KEY=sk-ant-... # API key +export OPENCLI_BASE_URL=https://... # optional proxy ``` - -**转换为 TS CLI**(参考 `src/clis/tae/add-expense.ts` 风格): -```typescript -import { cli, Strategy } from '../../registry.js'; - -cli({ - site: 'tae', - name: 'get-approval', - description: '查看报销单审批流程和操作记录', - domain: 'tae.alibaba-inc.com', - strategy: Strategy.COOKIE, - browser: true, - args: [ - { name: 'proc_ins_id', type: 'string', required: true, positional: true, help: '流程实例 ID(procInsId)' }, - ], - columns: ['step', 'operator', 'action', 'time'], - func: async (page, kwargs) => { - await page.goto('https://tae.alibaba-inc.com/expense/pc.html?_authType=SAML'); - await page.wait(2); - const result = await page.evaluate(`(async () => { - const res = await fetch('/approval/getList.json?taskId=&procInsId=${kwargs.proc_ins_id}', { - credentials: 'include' - }); - const data = await res.json(); - return data?.content?.operatorRecords || []; - })()`); - return (result as any[]).map((r, i) => ({ - step: i + 1, - operator: r.operatorName || r.userId, - action: r.operationType, - time: r.operateTime, - })); - }, -}); -``` - -**转换要点**: -1. URL 中的动态 ID(`procInsId`、`taskId` 等)提取为 `args` -2. `captured.json` 里的真实 body 结构用于确定正确的数据路径(如 `content.operatorRecords`) -3. tae 系统统一用 `{ success, content, errorCode, errorMsg }` 外层包裹,取数据要走 `content.*` -4. 认证方式:cookie(`credentials: 'include'`),不需要额外 header -5. 文件放入 `src/clis//`,无需手动注册,`npm run build` 后自动发现 - -### 故障排查 - -| 现象 | 原因 | 解法 | -|------|------|------| -| 捕获 0 条请求 | 拦截器注入失败,或页面无 JSON API | 检查 daemon 是否运行:`curl localhost:19825/status` | -| 捕获量少(1~3 条) | 页面是只读详情页,首屏数据已在注入前发出 | 手动操作触发更多请求(搜索/翻页),或换用列表页 | -| 候选 YAML 为 0 | 捕获到的 JSON 都没有 array 结构 | 直接看 `captured.json` 手写 TS CLI | -| 新开的 tab 没有被拦截 | 轮询间隔内 tab 已关闭 | 缩短 `--poll 500` | -| 二次运行 record 时数据不连续 | 正常,每次 `record` 启动都是新的 automation window | 无需处理 | - -## Creating Adapters - -> [!TIP] -> **快速模式**:如果你只想为一个具体页面生成一个命令,直接看 [CLI-ONESHOT.md](./CLI-ONESHOT.md)。 -> 只需要一个 URL + 一句话描述,4 步搞定。 - -> [!IMPORTANT] -> **完整模式 — 在写任何代码之前,先阅读 [CLI-EXPLORER.md](./CLI-EXPLORER.md)。** -> 它包含:① AI Agent 浏览器探索工作流 ② 认证策略决策树 ③ 平台 SDK(如 Bilibili 的 `apiGet`/`fetchJson`)④ YAML vs TS 选择指南 ⑤ `tap` 步骤调试方法 ⑥ 级联请求模板 ⑦ 常见陷阱表。 -> **下方仅为简化模板参考,直接使用极易踩坑。** - -### YAML Pipeline (declarative, recommended) - -Create `src/clis//.yaml`: - -```yaml -site: mysite -name: hot -description: Hot topics -domain: www.mysite.com -strategy: cookie # public | cookie | header | intercept | ui -browser: true - -args: - limit: - type: int - default: 20 - description: Number of items - -pipeline: - - navigate: https://www.mysite.com - - - evaluate: | - (async () => { - const res = await fetch('/api/hot', { credentials: 'include' }); - const d = await res.json(); - return d.data.items.map(item => ({ - title: item.title, - score: item.score, - })); - })() - - - map: - rank: ${{ index + 1 }} - title: ${{ item.title }} - score: ${{ item.score }} - - - limit: ${{ args.limit }} - -columns: [rank, title, score] -``` - -For public APIs (no browser): - -```yaml -strategy: public -browser: false - -pipeline: - - fetch: - url: https://api.example.com/hot.json - - select: data.items - - map: - title: ${{ item.title }} - - limit: ${{ args.limit }} -``` - -### TypeScript Adapter (programmatic) - -Create `src/clis//.ts`. It will be automatically dynamically loaded (DO NOT manually import it in `index.ts`): - -```typescript -import { cli, Strategy } from '../../registry.js'; - -cli({ - site: 'mysite', - name: 'search', - strategy: Strategy.INTERCEPT, // Or COOKIE - args: [{ name: 'query', required: true, positional: true }], - columns: ['rank', 'title', 'url'], - func: async (page, kwargs) => { - await page.goto('https://www.mysite.com/search'); - - // Inject native XHR/Fetch interceptor hook - await page.installInterceptor('/api/search'); - - // Auto scroll down to trigger lazy loading - await page.autoScroll({ times: 3, delayMs: 2000 }); - - // Retrieve intercepted JSON payloads - const requests = await page.getInterceptedRequests(); - - let results = []; - for (const req of requests) { - results.push(...req.data.items); - } - return results.map((item, i) => ({ - rank: i + 1, title: item.title, url: item.url, - })); - }, -}); -``` - -**When to use TS**: XHR interception (`page.installInterceptor`), infinite scrolling (`page.autoScroll`), cookie extraction, complex data transforms (like GraphQL unwrapping). - -## Pipeline Steps - -| Step | Description | Example | -|------|-------------|---------| -| `navigate` | Go to URL | `navigate: https://example.com` | -| `fetch` | HTTP request (browser cookies) | `fetch: { url: "...", params: { q: "..." } }` | -| `evaluate` | Run JavaScript in page | `evaluate: \| (async () => { ... })()` | -| `select` | Extract JSON path | `select: data.items` | -| `map` | Map fields | `map: { title: "${{ item.title }}" }` | -| `filter` | Filter items | `filter: item.score > 100` | -| `sort` | Sort items | `sort: { by: score, order: desc }` | -| `limit` | Cap result count | `limit: ${{ args.limit }}` | -| `intercept` | Declarative XHR capture | `intercept: { trigger: "navigate:...", capture: "api/hot" }` | -| `tap` | Store action + XHR capture | `tap: { store: "feed", action: "fetchFeeds", capture: "homefeed" }` | -| `snapshot` | Page accessibility tree | `snapshot: { interactive: true }` | -| `click` | Click element | `click: ${{ ref }}` | -| `type` | Type text | `type: { ref: "@1", text: "hello" }` | -| `wait` | Wait for time/text | `wait: 2` or `wait: { text: "loaded" }` | -| `press` | Press key | `press: Enter` | - -## Template Syntax - -```yaml -# Arguments with defaults -${{ args.query }} -${{ args.limit | default(20) }} - -# Current item (in map/filter) -${{ item.title }} -${{ item.data.nested.field }} - -# Index (0-based) -${{ index }} -${{ index + 1 }} -``` - -## 5-Tier Authentication Strategy - -| Tier | Name | Method | Example | -|------|------|--------|---------| -| 1 | `public` | No auth, Node.js fetch | Hacker News, V2EX | -| 2 | `cookie` | Browser fetch with `credentials: include` | Bilibili, Zhihu | -| 3 | `header` | Custom headers (ct0, Bearer) | Twitter GraphQL | -| 4 | `intercept` | XHR interception + store mutation | 小红书 Pinia | -| 5 | `ui` | Full UI automation (click/type/scroll) | Last resort | - -## Environment Variables - -| Variable | Default | Description | -|----------|---------|-------------| -| `OPENCLI_DAEMON_PORT` | 19825 | Daemon listen port | -| `OPENCLI_BROWSER_CONNECT_TIMEOUT` | 30 | Browser connection timeout (sec) | -| `OPENCLI_BROWSER_COMMAND_TIMEOUT` | 45 | Command execution timeout (sec) | -| `OPENCLI_BROWSER_EXPLORE_TIMEOUT` | 120 | Explore timeout (sec) | -| `OPENCLI_VERBOSE` | — | Show daemon/extension logs | - -## Troubleshooting - -| Issue | Solution | -|-------|----------| -| `npx not found` | Install Node.js: `brew install node` | -| `Extension not connected` | 1) Chrome must be open 2) Install opencli Browser Bridge extension | -| `Target page context` error | Add `navigate:` step before `evaluate:` in YAML | -| Empty table data | Check if evaluate returns correct data path | -| Daemon issues | `curl localhost:19825/status` to check, `curl localhost:19825/logs` for extension logs | diff --git a/autoresearch/baseline.txt b/autoresearch/baseline.txt new file mode 100644 index 00000000..5a532fa0 --- /dev/null +++ b/autoresearch/baseline.txt @@ -0,0 +1 @@ +53/59 diff --git a/autoresearch/eval.ts b/autoresearch/eval.ts new file mode 100644 index 00000000..09cfc02f --- /dev/null +++ b/autoresearch/eval.ts @@ -0,0 +1,321 @@ +#!/usr/bin/env npx tsx +/** + * AutoResearch Evaluation Runner + * + * Runs all tasks in tasks.json against the current `opencli operate` build, + * judges each result, and outputs a score report. + * + * Usage: + * npx tsx autoresearch/eval.ts # Run all tasks + * npx tsx autoresearch/eval.ts --train-only # Run only train set (15 tasks) + * npx tsx autoresearch/eval.ts --test-only # Run only test set (5 tasks) + * npx tsx autoresearch/eval.ts --task example-title # Run a single task + */ + +import { execSync } from 'node:child_process'; +import { readFileSync, writeFileSync, mkdirSync, readdirSync } from 'node:fs'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const TASKS_FILE = join(__dirname, 'tasks.json'); +const RESULTS_DIR = join(__dirname, 'results'); +const BASELINE_FILE = join(__dirname, 'baseline.txt'); + +// ── Types ────────────────────────────────────────────────────────── + +interface TaskDef { + name: string; + command: string; + url?: string; + maxSteps?: number; + judge: JudgeCriteria; + set?: 'test'; // omitted = train +} + +type JudgeCriteria = + | { type: 'contains'; field: string; value: string } + | { type: 'arrayMinLength'; field: string; minLength: number } + | { type: 'arrayFieldsPresent'; field: string; minLength: number; requiredFields: string[] } + | { type: 'nonEmpty'; field: string } + | { type: 'matchesPattern'; field: string; pattern: string } + | { type: 'successTrue' }; + +interface TaskResult { + name: string; + passed: boolean; + steps: number; + cost: number; + error?: string; + duration: number; + set: 'train' | 'test'; +} + +interface EvalResult { + timestamp: string; + score: string; + trainScore: string; + testScore: string; + tasks: TaskResult[]; + totalCost: number; + duration: string; +} + +// ── Judge Functions ──────────────────────────────────────────────── + +function judge(criteria: JudgeCriteria, result: any): boolean { + try { + if (criteria.type === 'successTrue') { + return result.success === true; + } + + const data = getField(result, criteria.field); + + switch (criteria.type) { + case 'contains': { + const str = typeof data === 'string' ? data : JSON.stringify(data); + return str.toLowerCase().includes(criteria.value.toLowerCase()); + } + case 'arrayMinLength': { + if (Array.isArray(data)) return data.length >= criteria.minLength; + // extractedData might be a stringified array or object with array field + const parsed = tryParseArray(data); + return parsed !== null && parsed.length >= criteria.minLength; + } + case 'arrayFieldsPresent': { + let arr = Array.isArray(data) ? data : tryParseArray(data); + if (!arr || arr.length < criteria.minLength) return false; + return arr.slice(0, criteria.minLength).every((item: any) => + criteria.requiredFields.every(f => item[f] !== undefined && item[f] !== null && item[f] !== '') + ); + } + case 'nonEmpty': { + if (data === null || data === undefined) return false; + if (typeof data === 'string') return data.trim().length > 0; + if (Array.isArray(data)) return data.length > 0; + if (typeof data === 'object') return Object.keys(data).length > 0; + return true; + } + case 'matchesPattern': { + const str = typeof data === 'string' ? data : JSON.stringify(data); + return new RegExp(criteria.pattern).test(str); + } + default: + return false; + } + } catch { + return false; + } +} + +function getField(obj: any, field: string): any { + if (!obj) return undefined; + return obj[field]; +} + +function tryParseArray(data: any): any[] | null { + if (Array.isArray(data)) return data; + if (typeof data === 'string') { + try { + const parsed = JSON.parse(data); + if (Array.isArray(parsed)) return parsed; + // Look for array inside parsed object + for (const val of Object.values(parsed)) { + if (Array.isArray(val)) return val as any[]; + } + } catch { /* not JSON */ } + } + if (typeof data === 'object' && data !== null) { + for (const val of Object.values(data)) { + if (Array.isArray(val)) return val as any[]; + } + } + return null; +} + +// ── Run a single task ────────────────────────────────────────────── + +function runTask(task: TaskDef): TaskResult { + const maxSteps = task.maxSteps ?? 10; + const start = Date.now(); + + const args = [ + 'node', 'dist/main.js', 'operate', + ...(task.url ? ['--url', task.url] : []), + '--max-steps', String(maxSteps), + JSON.stringify(task.command), + ]; + + let output: string; + try { + output = execSync(args.join(' '), { + cwd: join(__dirname, '..'), + timeout: maxSteps * 30_000, // 30s per step max + encoding: 'utf-8', + env: { ...process.env }, + stdio: ['pipe', 'pipe', 'pipe'], + }); + } catch (err: any) { + // Command failed but may still have output + output = err.stdout ?? ''; + } + + const duration = Date.now() - start; + + // Parse the result from CLI output + const result = parseOperateOutput(output); + + const passed = judge(task.judge, result); + + return { + name: task.name, + passed, + steps: result?.stepsCompleted ?? 0, + cost: result?.tokenUsage?.estimatedCost ?? 0, + error: passed ? undefined : (result?.result ?? 'unknown failure').slice(0, 200), + duration, + set: task.set === 'test' ? 'test' : 'train', + }; +} + +function parseOperateOutput(output: string): any { + // The CLI outputs structured info. Try to extract key fields. + const result: any = { success: false }; + + if (output.includes('✓ Task completed successfully')) { + result.success = true; + } + + // Extract "Steps: N" from the stats line + const stepsMatch = output.match(/Steps:\s*(\d+)/); + if (stepsMatch) result.stepsCompleted = parseInt(stepsMatch[1], 10); + + // Extract cost + const costMatch = output.match(/Cost:\s*~\$([0-9.]+)/); + if (costMatch) result.tokenUsage = { estimatedCost: parseFloat(costMatch[1]) }; + + // Extract "Extracted data:" section — try multiple patterns + const dataMatch = output.match(/Extracted data:\s*\n([\s\S]*?)(?:\n\nSteps:|\nSteps:)/); + if (dataMatch) { + const dataStr = dataMatch[1].trim(); + try { + result.extractedData = JSON.parse(dataStr); + } catch { + result.extractedData = dataStr; + } + } + + // If no "Extracted data:" section, try to get data from the result text + if (!result.extractedData) { + // The result text after ✓ might contain the extracted info + const allText = output.split('Steps:')[0]; + const successText = allText.split('✓ Task completed successfully\n')[1]; + if (successText) { + const cleaned = successText.trim(); + if (cleaned) { + try { + result.extractedData = JSON.parse(cleaned); + } catch { + result.extractedData = cleaned; + } + } + } + } + + // Extract result text (line after ✓ or ✗) + const resultMatch = output.match(/[✓✗] .+\n\n([\s\S]*?)(?:\n\nExtracted data:|\n\nSteps:)/); + if (resultMatch) result.result = resultMatch[1].trim(); + + return result; +} + +// ── Main ─────────────────────────────────────────────────────────── + +function main() { + const args = process.argv.slice(2); + const trainOnly = args.includes('--train-only'); + const testOnly = args.includes('--test-only'); + const singleTask = args.includes('--task') ? args[args.indexOf('--task') + 1] : null; + + const allTasks: TaskDef[] = JSON.parse(readFileSync(TASKS_FILE, 'utf-8')); + + let tasks: TaskDef[]; + if (singleTask) { + tasks = allTasks.filter(t => t.name === singleTask); + if (tasks.length === 0) { + console.error(`Task "${singleTask}" not found. Available: ${allTasks.map(t => t.name).join(', ')}`); + process.exit(1); + } + } else if (trainOnly) { + tasks = allTasks.filter(t => t.set !== 'test'); + } else if (testOnly) { + tasks = allTasks.filter(t => t.set === 'test'); + } else { + tasks = allTasks; + } + + console.log(`\n🔬 AutoResearch Eval — ${tasks.length} tasks\n`); + + const results: TaskResult[] = []; + const evalStart = Date.now(); + + for (let i = 0; i < tasks.length; i++) { + const task = tasks[i]; + process.stdout.write(` [${i + 1}/${tasks.length}] ${task.name}...`); + + const result = runTask(task); + results.push(result); + + const icon = result.passed ? '✓' : '✗'; + const costStr = result.cost > 0 ? ` ($${result.cost.toFixed(3)})` : ''; + console.log(` ${icon} ${result.steps} steps, ${Math.round(result.duration / 1000)}s${costStr}`); + } + + const evalDuration = Date.now() - evalStart; + + // Calculate scores + const trainResults = results.filter(r => r.set === 'train'); + const testResults = results.filter(r => r.set === 'test'); + const totalPassed = results.filter(r => r.passed).length; + const trainPassed = trainResults.filter(r => r.passed).length; + const testPassed = testResults.filter(r => r.passed).length; + const totalCost = results.reduce((sum, r) => sum + r.cost, 0); + + const evalResult: EvalResult = { + timestamp: new Date().toISOString(), + score: `${totalPassed}/${results.length}`, + trainScore: `${trainPassed}/${trainResults.length}`, + testScore: `${testPassed}/${testResults.length}`, + tasks: results, + totalCost, + duration: `${Math.round(evalDuration / 60000)}min`, + }; + + // Print summary + console.log(`\n${'─'.repeat(50)}`); + console.log(` Score: ${evalResult.score} (train: ${evalResult.trainScore}, test: ${evalResult.testScore})`); + console.log(` Cost: $${totalCost.toFixed(3)}`); + console.log(` Time: ${evalResult.duration}`); + + const failures = results.filter(r => !r.passed); + if (failures.length > 0) { + console.log(`\n Failures:`); + for (const f of failures) { + console.log(` ✗ ${f.name}: ${f.error ?? 'unknown'}`); + } + } + console.log(''); + + // Save result + mkdirSync(RESULTS_DIR, { recursive: true }); + const existingRounds = readdirSync(RESULTS_DIR).filter(f => f.startsWith('round-')).length; + const roundNum = String(existingRounds + 1).padStart(3, '0'); + const resultPath = join(RESULTS_DIR, `round-${roundNum}.json`); + writeFileSync(resultPath, JSON.stringify(evalResult, null, 2), 'utf-8'); + console.log(` Results saved to: ${resultPath}`); + + // Output score for scripting + console.log(`\nSCORE=${totalPassed}/${results.length}`); +} + +main(); diff --git a/autoresearch/program.md b/autoresearch/program.md new file mode 100644 index 00000000..64278281 --- /dev/null +++ b/autoresearch/program.md @@ -0,0 +1,98 @@ +# AutoResearch: OpenCLI Operate Optimization + +## Your Mission + +You are an AI researcher optimizing `opencli operate` — a browser automation +agent. Your goal: maximize the task success rate on a fixed evaluation set. + +## Current State + +- Baseline score: see `autoresearch/baseline.txt` +- Latest results: see `autoresearch/results/` (most recent round file) +- Agent code: `src/agent/` (all files are modifiable) + +## The Loop + +For each round: + +1. **Analyze** — Read the latest eval results. Which tasks failed? Why? +2. **Hypothesize** — Form a theory about what to change +3. **Modify** — Edit files in `src/agent/` +4. **Build** — Run `npm run build`. Must compile cleanly. +5. **Evaluate** — Run `npx tsx autoresearch/eval.ts --train-only` for quick feedback +6. **Decide** — If train score improved: + - Run `npx tsx autoresearch/eval.ts` (full eval including test set) + - If total score >= baseline: `git commit` and update `autoresearch/baseline.txt` + - If total score < baseline: `git revert` +7. **Log** — Record what you tried and why it worked or didn't + +## Rules + +### MUST +- Only modify files in `src/agent/` +- Run `npm run build` after every change (must compile) +- Run eval to measure impact before committing +- Commit with message: `autoresearch: {score} — {what changed}` +- Make BOLD changes. Small parameter tweaks get lost in variance. + +### MUST NOT +- Do NOT modify `autoresearch/eval.ts` or `autoresearch/tasks.json` +- Do NOT hardcode logic for specific task names or URLs +- Do NOT modify files outside `src/agent/` +- Do NOT skip the eval step + +## Strategy Guide + +### What tends to work (from Browser Use's experience) +- **Prompt rewrites** often beat code changes +- **DOM format changes** (e.g., more concise serialization) save tokens and improve accuracy +- **Action strategy changes** (when to scroll, how to verify input) fix entire categories of failures +- **Better error messages** to the LLM help it self-correct + +### Common failure root causes +- Element not in viewport → agent types into wrong element +- LLM uses wrong element index (index from previous step, element moved) +- LLM calls `done` prematurely without completing all requirements +- LLM hallucinates data instead of extracting from page +- Autocomplete fields not handled (need to wait for suggestions) +- Page loads slowly, DOM snapshot captures loading/skeleton state + +### What to look at when analyzing failures +- How many steps did the failing task use? (max_steps = ran out of time) +- Did the LLM ever see the correct data in the DOM snapshot? +- Did actions report success but not actually work? +- Was the evaluation judge too strict or too lenient? + +## Files You Can Modify + +| File | Purpose | Impact | +|------|---------|--------| +| `src/agent/prompts.ts` | System prompt, step messages | HIGH — directly controls LLM behavior | +| `src/agent/agent-loop.ts` | Core loop, planning, loop detection | HIGH — controls flow and recovery | +| `src/agent/action-executor.ts` | How actions are executed | HIGH — click/type/scroll reliability | +| `src/agent/dom-context.ts` | DOM snapshot + element info | HIGH — what the LLM "sees" | +| `src/agent/types.ts` | Action schemas, response format | MEDIUM — changes what LLM can do | +| `src/agent/llm-client.ts` | LLM API wrapper | LOW — mostly infrastructure | +| `src/agent/trace-recorder.ts` | Network capture | LOW — only affects skill generation | +| `src/agent/api-discovery.ts` | API scoring | LOW — only affects skill generation | +| `src/agent/skill-saver.ts` | TS code generation | LOW — only affects --save-as | +| `src/agent/cli-handler.ts` | CLI bridge | LOW — mostly boilerplate | + +Focus on the HIGH impact files first. + +## Example Round + +``` +Round 5: + Previous: 14/20 + Analysis: 3 tasks fail because LLM calls done after filling only visible + form fields, missing fields below the fold. 1 task fails because extract + returns empty (page still loading). + Change: Added rule to prompts.ts: "Before calling done on form tasks, + scroll to bottom to verify all fields are filled." Also added 1s wait + after navigate in action-executor.ts. + Train eval: 16/15 → improvement + Full eval: 17/20 → improvement over 14/20 + Action: git commit "autoresearch: 17/20 — scroll-before-done rule + post-navigate wait" + Updated baseline.txt to 17/20 +``` diff --git a/autoresearch/run.sh b/autoresearch/run.sh new file mode 100755 index 00000000..fab41965 --- /dev/null +++ b/autoresearch/run.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# AutoResearch launcher for OpenCLI Operate +# +# Usage: +# ./autoresearch/run.sh # Run with defaults +# ./autoresearch/run.sh --rounds 5 # Suggest round count in prompt + +set -e +cd "$(dirname "$0")/.." + +# Ensure build is current +echo "Building OpenCLI..." +npm run build > /dev/null 2>&1 +echo "Build OK" + +# Read current baseline +BASELINE="0/20" +if [ -f autoresearch/baseline.txt ]; then + BASELINE=$(cat autoresearch/baseline.txt) +fi +echo "Current baseline: $BASELINE" +echo "" + +# Count existing rounds +ROUNDS=$(ls autoresearch/results/round-*.json 2>/dev/null | wc -l | tr -d ' ') +echo "Completed rounds: $ROUNDS" +echo "" + +# Launch Claude Code +echo "Starting AutoResearch session..." +echo "─────────────────────────────────" + +claude -p \ + --dangerously-skip-permissions \ + --model sonnet \ + --system-prompt "$(cat autoresearch/program.md)" \ + "You are starting an AutoResearch session for opencli operate. + +Current baseline: $BASELINE +Completed rounds: $ROUNDS + +Read autoresearch/tasks.json to understand the evaluation tasks. +$([ "$ROUNDS" -gt 0 ] && echo "Read the latest result file in autoresearch/results/ to understand what was tried before.") + +Your goal: improve the success rate by modifying src/agent/ files. +Run the eval loop: analyze → modify → build → eval → commit or revert. +Aim for 10-20 rounds of iteration. + +Start by running the eval to establish/verify the current baseline: + npx tsx autoresearch/eval.ts" diff --git a/autoresearch/tasks.json b/autoresearch/tasks.json new file mode 100644 index 00000000..8cdd2155 --- /dev/null +++ b/autoresearch/tasks.json @@ -0,0 +1,634 @@ +[ + { + "name": "extract-title-example", + "command": "Extract the main heading text", + "url": "https://example.com", + "maxSteps": 5, + "judge": { + "type": "contains", + "field": "extractedData", + "value": "Example Domain" + } + }, + { + "name": "extract-title-iana", + "command": "Extract the page heading", + "url": "https://www.iana.org", + "maxSteps": 5, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "extract-paragraph-wiki-js", + "command": "Extract the first paragraph of the article", + "url": "https://en.wikipedia.org/wiki/JavaScript", + "maxSteps": 5, + "judge": { + "type": "contains", + "field": "extractedData", + "value": "programming language" + } + }, + { + "name": "extract-paragraph-wiki-python", + "command": "Extract the first paragraph of the article", + "url": "https://en.wikipedia.org/wiki/Python_(programming_language)", + "maxSteps": 5, + "judge": { + "type": "contains", + "field": "extractedData", + "value": "programming language" + } + }, + { + "name": "extract-github-stars", + "command": "Find the number of stars on this repository", + "url": "https://github.com/browser-use/browser-use", + "maxSteps": 5, + "judge": { + "type": "matchesPattern", + "field": "extractedData", + "pattern": "\\d" + } + }, + { + "name": "extract-github-description", + "command": "Extract the repository description", + "url": "https://github.com/anthropics/claude-code", + "maxSteps": 5, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "extract-github-readme-heading", + "command": "Extract the first heading from the README", + "url": "https://github.com/vercel/next.js", + "maxSteps": 5, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "extract-npm-downloads", + "command": "Find the weekly download count for this package", + "url": "https://www.npmjs.com/package/zod", + "maxSteps": 8, + "judge": { + "type": "matchesPattern", + "field": "extractedData", + "pattern": "\\d" + } + }, + { + "name": "extract-npm-description", + "command": "Extract the package description", + "url": "https://www.npmjs.com/package/express", + "maxSteps": 5, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "list-hn-top5", + "command": "Extract the top 5 stories with their titles and scores", + "url": "https://news.ycombinator.com", + "maxSteps": 8, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 5 + } + }, + { + "name": "list-hn-top10", + "command": "Extract the top 10 stories with title, score, and author", + "url": "https://news.ycombinator.com", + "maxSteps": 8, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 10 + } + }, + { + "name": "list-books-5", + "command": "Extract the first 5 books with their title and price", + "url": "https://books.toscrape.com", + "maxSteps": 8, + "judge": { + "type": "arrayFieldsPresent", + "field": "extractedData", + "minLength": 5, + "requiredFields": [ + "title", + "price" + ] + } + }, + { + "name": "list-books-10", + "command": "Extract the first 10 books with their title, price, and rating", + "url": "https://books.toscrape.com", + "maxSteps": 10, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 10 + } + }, + { + "name": "list-quotes-3", + "command": "Extract the first 3 quotes with their text and author", + "url": "https://quotes.toscrape.com", + "maxSteps": 8, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 3 + } + }, + { + "name": "list-quotes-tags", + "command": "Extract the first 5 quotes with their text, author, and tags", + "url": "https://quotes.toscrape.com", + "maxSteps": 8, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 5 + } + }, + { + "name": "list-github-trending", + "command": "Extract the top 3 trending repositories today with their name and description", + "url": "https://github.com/trending", + "maxSteps": 8, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 3 + } + }, + { + "name": "list-github-trending-lang", + "command": "Extract the top 5 trending Python repositories today", + "url": "https://github.com/trending/python", + "maxSteps": 8, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 5 + } + }, + { + "name": "list-jsonplaceholder-posts", + "command": "Extract the first 5 posts (title and body) from the API", + "url": "https://jsonplaceholder.typicode.com/posts", + "maxSteps": 5, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 5 + } + }, + { + "name": "list-jsonplaceholder-users", + "command": "Extract the names and emails of all users", + "url": "https://jsonplaceholder.typicode.com/users", + "maxSteps": 5, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 5 + } + }, + { + "name": "search-google", + "command": "Search for 'opencli github' and extract the titles of the top 3 results", + "url": "https://www.google.com", + "maxSteps": 10, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 3 + } + }, + { + "name": "search-ddg", + "command": "Search for 'weather beijing' and extract the search results", + "url": "https://duckduckgo.com", + "maxSteps": 10, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "search-ddg-tech", + "command": "Search for 'TypeScript tutorial' and extract the first 3 result titles and URLs", + "url": "https://duckduckgo.com", + "maxSteps": 10, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 3 + } + }, + { + "name": "search-wiki", + "command": "Search for 'Rust programming language' in the search box, click the result, and extract the first sentence", + "url": "https://en.wikipedia.org", + "maxSteps": 10, + "judge": { + "type": "contains", + "field": "extractedData", + "value": "programming language" + } + }, + { + "name": "search-npm", + "command": "Search for 'react' and extract the name and description of the top 3 packages", + "url": "https://www.npmjs.com", + "maxSteps": 10, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 3 + } + }, + { + "name": "search-github", + "command": "Search for 'browser automation' and extract the top 3 repository names", + "url": "https://github.com/search", + "maxSteps": 10, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 3 + } + }, + { + "name": "nav-click-link-example", + "command": "Click the 'More information...' link and extract the heading of the new page", + "url": "https://example.com", + "maxSteps": 8, + "judge": { + "type": "contains", + "field": "extractedData", + "value": "IANA" + } + }, + { + "name": "nav-click-hn-first", + "command": "Click on the first story link and extract the title of the page you land on", + "url": "https://news.ycombinator.com", + "maxSteps": 8, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "nav-click-hn-comments", + "command": "Click on the comments link of the first story and extract the story title from the comments page", + "url": "https://news.ycombinator.com", + "maxSteps": 8, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "nav-click-wiki-link", + "command": "Click on the 'History' section link in the table of contents and extract the first sentence of that section", + "url": "https://en.wikipedia.org/wiki/JavaScript", + "maxSteps": 10, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "nav-click-github-tab", + "command": "Click on the 'Issues' tab and extract the title of the first open issue", + "url": "https://github.com/vercel/next.js", + "maxSteps": 10, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "nav-go-back", + "command": "Click the 'More information...' link, then go back, and extract the heading of the original page", + "url": "https://example.com", + "maxSteps": 10, + "judge": { + "type": "contains", + "field": "extractedData", + "value": "Example Domain" + } + }, + { + "name": "nav-multi-step", + "command": "Click the Next page link at the bottom, then extract the first quote from page 2", + "url": "https://quotes.toscrape.com", + "maxSteps": 10, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "scroll-footer-quotes", + "command": "Scroll to the bottom of the page and extract the text in the footer", + "url": "https://quotes.toscrape.com", + "maxSteps": 8, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "scroll-footer-books", + "command": "Scroll to the bottom and extract the pagination info (e.g. Page 1 of 50)", + "url": "https://books.toscrape.com", + "maxSteps": 8, + "judge": { + "type": "matchesPattern", + "field": "extractedData", + "pattern": "\\d" + } + }, + { + "name": "scroll-long-page", + "command": "Scroll down and count how many posts are on this page", + "url": "https://jsonplaceholder.typicode.com/posts", + "maxSteps": 8, + "judge": { + "type": "matchesPattern", + "field": "extractedData", + "pattern": "\\d" + } + }, + { + "name": "scroll-find-element", + "command": "Scroll down to find the 'Next' pagination link and extract its URL", + "url": "https://quotes.toscrape.com", + "maxSteps": 8, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "scroll-lazy-load", + "command": "Scroll down to load more content and extract the total number of items visible on the page", + "url": "https://books.toscrape.com", + "maxSteps": 8, + "judge": { + "type": "matchesPattern", + "field": "extractedData", + "pattern": "\\d" + } + }, + { + "name": "form-simple-name", + "command": "Fill in the Customer Name field with 'OpenCLI Test' and the Telephone field with '1234567890'. Do not submit.", + "url": "https://httpbin.org/forms/post", + "maxSteps": 8, + "judge": { + "type": "successTrue" + } + }, + { + "name": "form-text-inputs", + "command": "Fill the Customer Name with 'Alice' and Telephone with '555-1234'. Do not submit.", + "url": "https://httpbin.org/forms/post", + "maxSteps": 8, + "judge": { + "type": "successTrue" + } + }, + { + "name": "form-radio-select", + "command": "Select the 'Medium' pizza size option. Do not submit.", + "url": "https://httpbin.org/forms/post", + "maxSteps": 8, + "judge": { + "type": "successTrue" + } + }, + { + "name": "form-checkbox", + "command": "Check the 'Cheese' topping checkbox. Do not submit.", + "url": "https://httpbin.org/forms/post", + "maxSteps": 8, + "judge": { + "type": "successTrue" + } + }, + { + "name": "form-textarea", + "command": "Type 'This is an automated test by OpenCLI' into the Delivery Instructions textarea. Do not submit.", + "url": "https://httpbin.org/forms/post", + "maxSteps": 8, + "judge": { + "type": "successTrue" + } + }, + { + "name": "form-login-fake", + "command": "Fill the username field with 'testuser' and the password field with 'testpass'. Do not submit.", + "url": "https://the-internet.herokuapp.com/login", + "maxSteps": 8, + "judge": { + "type": "successTrue" + } + }, + { + "name": "complex-wiki-toc", + "command": "Extract the table of contents headings from the JavaScript article", + "url": "https://en.wikipedia.org/wiki/JavaScript", + "maxSteps": 8, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 5 + } + }, + { + "name": "complex-books-detail", + "command": "Click on the first book, then extract its title, price, and description from the detail page", + "url": "https://books.toscrape.com", + "maxSteps": 10, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "complex-quotes-page2", + "command": "Navigate to page 2 of quotes and extract the first 3 quotes with author", + "url": "https://quotes.toscrape.com", + "maxSteps": 10, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 3 + } + }, + { + "name": "complex-github-repo-info", + "command": "Extract the repository language, license, and last commit date", + "url": "https://github.com/expressjs/express", + "maxSteps": 8, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "complex-hn-story-comments", + "command": "Click the first story, go back, then click the comments link and extract the number of comments", + "url": "https://news.ycombinator.com", + "maxSteps": 12, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + } + }, + { + "name": "complex-multi-extract", + "command": "Extract both the page title AND the first paragraph text", + "url": "https://en.wikipedia.org/wiki/TypeScript", + "maxSteps": 5, + "judge": { + "type": "contains", + "field": "extractedData", + "value": "TypeScript" + } + }, + { + "name": "bench-reddit-top5", + "command": "Extract the titles of the top 5 posts on the front page", + "url": "https://old.reddit.com", + "maxSteps": 8, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 5 + }, + "set": "test" + }, + { + "name": "bench-imdb-matrix", + "command": "Search for 'The Matrix' and extract the year and rating of the first result", + "url": "https://www.imdb.com", + "maxSteps": 10, + "judge": { + "type": "contains", + "field": "extractedData", + "value": "1999" + }, + "set": "test" + }, + { + "name": "bench-npm-zod", + "command": "Search for the 'zod' package and extract its weekly download count and description", + "url": "https://www.npmjs.com", + "maxSteps": 10, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + }, + "set": "test" + }, + { + "name": "bench-wiki-search", + "command": "Search for 'machine learning', click the result, and extract the first sentence", + "url": "https://en.wikipedia.org", + "maxSteps": 10, + "judge": { + "type": "contains", + "field": "extractedData", + "value": "learning" + }, + "set": "test" + }, + { + "name": "bench-github-profile", + "command": "Extract the bio and number of public repositories", + "url": "https://github.com/torvalds", + "maxSteps": 8, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + }, + "set": "test" + }, + { + "name": "bench-books-category", + "command": "Click on the 'Science' category, then extract the first 3 book titles", + "url": "https://books.toscrape.com", + "maxSteps": 10, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 3 + }, + "set": "test" + }, + { + "name": "bench-quotes-author", + "command": "Click on the first author link and extract the author's bio", + "url": "https://quotes.toscrape.com", + "maxSteps": 10, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + }, + "set": "test" + }, + { + "name": "bench-ddg-images", + "command": "Search for 'sunset' and extract the text of the first 3 search results", + "url": "https://duckduckgo.com", + "maxSteps": 10, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 3 + }, + "set": "test" + }, + { + "name": "bench-httpbin-headers", + "command": "Extract the User-Agent and Host headers shown on this page", + "url": "https://httpbin.org/headers", + "maxSteps": 5, + "judge": { + "type": "nonEmpty", + "field": "extractedData" + }, + "set": "test" + }, + { + "name": "bench-jsonapi-todo", + "command": "Extract the first 5 todo items with their title and completion status", + "url": "https://jsonplaceholder.typicode.com/todos", + "maxSteps": 5, + "judge": { + "type": "arrayMinLength", + "field": "extractedData", + "minLength": 5 + }, + "set": "test" + } +] \ No newline at end of file diff --git a/docs/superpowers/specs/2026-03-30-open-operator-design.md b/docs/superpowers/specs/2026-03-30-open-operator-design.md new file mode 100644 index 00000000..911dd7ac --- /dev/null +++ b/docs/superpowers/specs/2026-03-30-open-operator-design.md @@ -0,0 +1,288 @@ +# OpenCLI "open-operator" Implementation Plan + +## Context + +OpenCLI 是一个将网站转化为 CLI 命令的工具(TypeScript/Node.js),使用 Chrome Extension + daemon 架构进行浏览器自动化。当前所有浏览器交互都是**确定性的** — 通过 JS 注入执行固定脚本。 + +本计划为 OpenCLI 新增 **AI Agent 浏览器自动化能力**(代号 "open-operator"),实现 Browser Use 风格的 LLM 驱动控制循环:观察页面 → LLM 推理 → 执行动作 → 重复,直到任务完成。成功的操作可沉淀为可复用的 CLI skill。 + +**关键决策**: +- 在 OpenCLI 现有 Extension + daemon 架构上实现(`chrome.debugger` 已验证支持所有所需 CDP domain) +- TypeScript 实现(不引入 Python 子进程) +- 保留 OpenCLI 的核心优势:复用用户浏览器登录态 + +--- + +## Architecture + +``` +opencli operate "在 Google Flights 搜索航班" + │ + ├── CLI (operate command) ──▶ AgentLoop + │ │ + │ Phase 1: Build Context │ buildDomContext(page) + │ - DOM snapshot (text) │ → 现有 dom-snapshot.ts + │ - Element coord map │ → 新增坐标提取 + │ - Screenshot (optional) │ → page.screenshot() + │ - Action history │ → 上一步结果 + │ │ + │ Phase 2: Call LLM │ Anthropic Claude API + │ - System prompt │ → 行为指令 + action schema + │ - Structured JSON output │ → { thinking, memory, nextGoal, actions[] } + │ │ + │ Phase 3: Execute Actions │ ActionExecutor + │ - Native CDP Input.* │ → dispatchMouseEvent/KeyEvent + │ - 或 fallback JS 注入 │ → 现有 page.click/typeText + │ │ + │ Phase 4: Observe & Repeat │ Loop detection, error recovery + │ │ + ├── --save-as site/name ──▶ Trace → YAML skill (复用 pipeline 系统) + └── 输出: 结果 + token 用量 + 成本 +``` + +--- + +## Phase 1: CDP Infrastructure Enhancement (~200 LOC modifications) + +**Goal**: 添加 CDP passthrough 能力,支持原生 Input 事件 + +### 修改文件 + +| File | Change | +|------|--------| +| `extension/src/protocol.ts` | 添加 `'cdp'` action type, `cdpMethod/cdpParams` 字段 | +| `extension/src/background.ts` | 添加 `handleCdp()` — 转发 `chrome.debugger.sendCommand(method, params)` | +| `src/browser/daemon-client.ts` | 添加 `'cdp'` 到 action union, 新字段 | +| `src/types.ts` | IPage 新增可选方法: `cdp?()`, `nativeClick?()`, `nativeType?()`, `nativeKeyPress?()`, `getElementBounds?()` | +| `src/browser/page.ts` | 实现新 IPage 方法(通过 daemon 调用 CDP passthrough) | + +### 核心设计:CDP Passthrough + +不为每个 CDP 方法单独加 handler,而是添加一个通用 `cdp` action,直接转发 `chrome.debugger.sendCommand(method, params)`。Agent 可访问任意 CDP domain,无需修改协议。 + +所有新方法标记为 `?`(可选),**不影响现有 300+ CLI 命令**。 + +--- + +## Phase 2: LLM-Ready DOM Context (~250 LOC new) + +**Goal**: 在现有 `dom-snapshot.ts` 基础上,补充元素坐标映射 + +### 新文件 + +**`src/agent/dom-context.ts`** + +- 复用 `page.snapshot()` 获取 LLM 友好的文本(`[42]` 格式) +- 额外运行一段 JS 收集所有 `[data-opencli-ref]` 元素的 `getBoundingClientRect()` +- 输出 `DomContext`: `{ snapshotText, elementMap: Map, url, title, viewport }` + +**关键洞察**:OpenCLI 的 `dom-snapshot.ts` 已经实现了 Browser Use 的 DOM 序列化的 13/15 功能(交互元素索引、可见性过滤、遮挡检测、Shadow DOM、iframe 等),只差坐标映射。 + +--- + +## Phase 3: Agent Loop Core (~1100 LOC new) + +**Goal**: LLM 驱动的浏览器控制循环 + +### 新依赖 + +```json +"zod": "^3.23.0", +"@anthropic-ai/sdk": "^0.39.0" +``` + +### 新文件 + +| File | ~LOC | Purpose | +|------|------|---------| +| `src/agent/types.ts` | 150 | Zod schemas: actions (click/type/navigate/scroll/wait/extract/done/go_back/press_key), AgentResponse, AgentConfig | +| `src/agent/prompts.ts` | 200 | System prompt template, per-step message builder, error recovery message | +| `src/agent/llm-client.ts` | 150 | Anthropic SDK wrapper, token tracking, JSON 解析 + Zod 验证 | +| `src/agent/action-executor.ts` | 250 | Action dispatch: LLM action → IPage 方法调用(优先 native CDP,fallback JS 注入) | +| `src/agent/agent-loop.ts` | 350 | 核心循环: context → LLM → execute → observe → repeat;含 loop detection、message compaction、budget warning | +| `src/agent/index.ts` | 10 | Barrel exports | + +### Agent Loop 细节 + +``` +while (step < maxSteps && !done) { + 1. domContext = buildDomContext(page) + 2. screenshot = opts.screenshot ? page.screenshot() : null + 3. message = buildStepMessage(domContext, previousResults, screenshot) + 4. response = llm.chat(systemPrompt, messageHistory) // → AgentResponse + 5. for (action of response.actions) { + if (action.type === 'done') → return success + result = executor.execute(action, domContext.elementMap) + if (result.error) consecutiveErrors++ + else consecutiveErrors = 0 + } + 6. Loop detection: 最近 3 步动作序列相同 → 注入 "try different approach" 警告 + 7. Message compaction: 历史超过 20 轮 → 压缩旧步骤 + 8. Verbose output: step#, thinking, actions, results +} +``` + +--- + +## Phase 4: CLI Integration (~300 LOC) + +**Goal**: `opencli operate ` 命令 + +### 修改文件 + +| File | Change | +|------|--------| +| `src/cli.ts` | 添加 `operate` 命令(alias `op`),跟 explore/record 同样的模式 | +| `src/errors.ts` | 添加 `AgentError`, `AgentBudgetError` | + +### 新文件 + +**`src/agent/cli-handler.ts`** (~150 LOC) + +CLI-to-agent bridge:验证 API key → 创建 browser session → 运行 AgentLoop → 渲染结果 + +### 命令用法 + +```bash +# 基础用法 +opencli operate "在 GitHub 上 star browser-use 项目" + +# 指定起始 URL +opencli operate --url https://flights.google.com "搜索 3月15日 北京到东京的航班" + +# 录制并保存为 skill +opencli operate --save-as flights/search "搜索航班" --url https://flights.google.com + +# 详细输出(显示每步推理) +opencli operate -v "在 Hacker News 上找到今天最热门的 AI 文章" + +# 使用 screenshot 模式(更贵但更准确) +opencli operate --screenshot "填写这个表单" +``` + +--- + +## Phase 5: Skill Sedimentation (~350 LOC new) + +**Goal**: 成功操作 → 可复用的 YAML CLI 命令 + +### 新文件 + +| File | ~LOC | Purpose | +|------|------|---------| +| `src/agent/trace-recorder.ts` | 150 | 录制每步动作 + 解析 durable CSS selector(优先 data-testid > id > aria-label > 结构路径) | +| `src/agent/skill-saver.ts` | 200 | Trace → YAML pipeline 转换,写入 `~/.opencli/clis//.yaml` | + +### 沉淀流程 + +``` +Agent 执行: click[42] → type[73, "北京"] → click[88] → extract + ↓ TraceRecorder +Trace: [{ action: click, selector: "[data-testid='search-btn']" }, + { action: type, selector: "#origin", text: "{{args.from}}" }, ...] + ↓ SkillSaver +YAML: steps: + - navigate: https://flights.google.com + - evaluate: "document.querySelector('[data-testid=search-btn]').click()" + - evaluate: "..." (focus + type) + - ... + ↓ 写入 ~/.opencli/clis/flights/search.yaml + ↓ 下次直接 `opencli flights search --from 北京 --to 东京` +``` + +生成的 YAML 兼容现有 `executePipeline()` 系统,**无需 LLM 即可重放**。 + +--- + +## File Summary + +### New Files (11 files, ~2200 LOC) + +| File | Phase | ~LOC | +|------|-------|------| +| `src/agent/types.ts` | 3 | 150 | +| `src/agent/dom-context.ts` | 2 | 250 | +| `src/agent/prompts.ts` | 3 | 200 | +| `src/agent/llm-client.ts` | 3 | 150 | +| `src/agent/action-executor.ts` | 3 | 250 | +| `src/agent/agent-loop.ts` | 3 | 350 | +| `src/agent/cli-handler.ts` | 4 | 150 | +| `src/agent/trace-recorder.ts` | 5 | 150 | +| `src/agent/skill-saver.ts` | 5 | 200 | +| `src/agent/index.ts` | 3 | 10 | + +### Modified Files (7 files, ~200 LOC additions) + +| File | Phase | +|------|-------| +| `extension/src/protocol.ts` | 1 | +| `extension/src/background.ts` | 1 | +| `src/browser/daemon-client.ts` | 1 | +| `src/types.ts` | 1 | +| `src/browser/page.ts` | 1 | +| `src/errors.ts` | 4 | +| `src/cli.ts` | 4 | + +--- + +## Dependency Graph + +``` +Phase 1 (CDP) ←──────── foundational + │ + ├── Phase 2 (DOM Context) + │ │ + │ └──▶ Phase 3 (Agent Loop) ←── depends on 1 + 2 + │ │ + │ ├──▶ Phase 4 (CLI) ←── thin wrapper + │ │ + │ └──▶ Phase 5 (Skill Save) ←── post-processing +``` + +Phase 1 和 2 可以并行开发。Phase 3 依赖两者。Phase 4 是薄壳。Phase 5 在 Phase 3 稳定后实现。 + +--- + +## Verification Plan + +### Phase 1 验证 +```bash +# 在 worktree 中构建 extension +cd ~/code/opencli/.claude/worktrees/open-operator/extension && npm run build + +# 测试 CDP passthrough +node -e " + // 通过 daemon 发送 CDP 命令 + // 验证 Accessibility.getFullAXTree() 返回 AX tree + // 验证 Input.dispatchMouseEvent() 产生 isTrusted:true 事件 +" +``` + +### Phase 2-3 验证 +```bash +# 在 worktree 中编译 +cd ~/code/opencli/.claude/worktrees/open-operator && npm run build + +# 基础 agent 测试 +OPENCLI_API_KEY=... node dist/main.js operate "go to example.com and tell me the page title" -v +``` + +### Phase 4-5 验证 +```bash +# 完整流程测试:operate → save → replay +OPENCLI_API_KEY=... node dist/main.js operate \ + --save-as test/example \ + --url https://example.com \ + "find the main heading text" -v + +# 验证 skill 已保存 +cat ~/.opencli/clis/test/example.yaml + +# 重放(无需 LLM) +node dist/main.js test example +``` + +### 运行现有测试 +```bash +cd ~/code/opencli/.claude/worktrees/open-operator && npm test +``` diff --git a/docs/superpowers/specs/2026-03-31-autoresearch-operate-design.md b/docs/superpowers/specs/2026-03-31-autoresearch-operate-design.md new file mode 100644 index 00000000..d6c59374 --- /dev/null +++ b/docs/superpowers/specs/2026-03-31-autoresearch-operate-design.md @@ -0,0 +1,169 @@ +# AutoResearch for OpenCLI Operate + +Use the AutoResearch method (Karpathy, 2025) to automatically optimize +`opencli operate`'s task success rate through iterative, AI-driven +experimentation. + +## Goal + +Improve `opencli operate` success rate on a fixed set of 20 browser +automation tasks. The AI agent modifies any file in `src/agent/`, rebuilds, +evaluates, and commits only if the score improves. + +## Approach: Classic AutoResearch + +``` +loop: + 1. Claude Code reads program.md + last round's results + 2. Analyzes failed tasks, decides optimization direction + 3. Modifies src/agent/ files + 4. npm run build (must compile) + 5. Runs eval.ts (20 tasks, serial, real browser + real websites) + 6. Score >= baseline → git commit → update baseline + 7. Score < baseline → git revert → log failed attempt + 8. Repeat (10-20 rounds per session) +``` + +## File Structure + +``` +autoresearch/ +├── program.md # Research instructions for Claude Code +├── tasks.json # 20 task definitions + success criteria +├── eval.ts # Evaluation runner +├── run.sh # Launch script +├── baseline.txt # Current best score (e.g. "14/20") +└── results/ + └── round-NNN.json # Per-round results +``` + +## Task Set (20 tasks) + +### Self-built tasks (15) — train set + +| # | Task | Type | Success Criteria | +|---|------|------|------------------| +| 1 | Open example.com, extract page title | Extract | extractedData contains "Example Domain" | +| 2 | Search "opencli github" on Google, extract top 3 results | Search+Extract | extractedData is array of 3+ items | +| 3 | Open HN, extract top 5 stories | List extract | 5 items, each has title | +| 4 | Open Wikipedia "JavaScript", extract first paragraph | Long text | contains "programming language" | +| 5 | Open GitHub opencli repo, extract star count | Single value | extractedData contains a number | +| 6 | Search "weather beijing" on DuckDuckGo | Search engine | extractedData non-empty | +| 7 | Open a form page, fill name+email fields | Form fill | input values non-empty | +| 8 | Open httpbin.org/forms/post, fill all fields | Complex form | all fields have values | +| 9 | Open books.toscrape.com, extract 5 books (title+price) | Structured | 5 items with title+price | +| 10 | Open quotes.toscrape.com, extract 3 quotes+authors | Structured | 3 items with quote+author | +| 11 | Open page, scroll to bottom, extract footer text | Scroll+Extract | extractedData has footer text | +| 12 | Open GitHub trending, extract top 3 repos | Dynamic page | 3 items with repo name | +| 13 | Open HN → click first story → extract article title | Multi-step | extractedData has title | +| 14 | Open example.com → click "More information" → extract new page title | Link follow | contains "IANA" | +| 15 | Open jsonplaceholder.typicode.com, extract endpoint list | API docs | non-empty array | + +### Public benchmark subset (5) — test set + +Selected from WebArena or similar benchmarks. Claude Code sees the +score but not the failure details, preventing overfitting. + +Tasks TBD during implementation (must be publicly accessible websites). + +## Evaluation Script (eval.ts) + +```typescript +interface Task { + name: string; + command: string; // natural language task + url?: string; // --url parameter + maxSteps?: number; // default 10 + judge: (result: AgentResult) => boolean; +} + +async function evaluate(tasks: Task[]): Promise { + const results = []; + for (const task of tasks) { + const result = await runOperate(task.command, task.url, task.maxSteps); + const passed = task.judge(result); + results.push({ name: task.name, passed, steps, cost }); + } + return { score: `${passed}/${total}`, tasks: results, totalCost, duration }; +} +``` + +Judge functions per task: +- String inclusion: `result.extractedData includes "X"` +- Array length: `Array.isArray(data) && data.length >= N` +- Field presence: `data?.[0]?.title && data?.[0]?.price` + +## program.md (Research Instructions) + +Core rules for Claude Code: +1. Only modify `src/agent/` files +2. Must `npm run build` and pass compilation after changes +3. Must run `eval.ts` for full evaluation +4. Commit only if score >= baseline, revert otherwise +5. Prefer bold architectural changes over parameter tweaks +6. Do NOT modify eval.ts, tasks.json, or program.md +7. Do NOT hardcode task-specific logic + +Strategy guidance: +- Analyze verbose logs of failed tasks to find root causes +- Common failures: element not in viewport, wrong DOM index, LLM hallucination, premature done +- Prompt optimization often beats code changes +- Try different DOM representation formats +- Try different action combination strategies + +## Launch Script (run.sh) + +```bash +#!/bin/bash +cd "$(dirname "$0")/.." +claude -p \ + --dangerously-skip-permissions \ + --model sonnet \ + --system-prompt "$(cat autoresearch/program.md)" \ + "Read autoresearch/tasks.json and the latest results in autoresearch/results/. \ + Your goal: improve opencli operate success rate. \ + Current baseline: $(cat autoresearch/baseline.txt). \ + Run eval, analyze failures, make changes, repeat." +``` + +## Result Format + +Each round produces `autoresearch/results/round-NNN.json`: + +```json +{ + "round": 3, + "timestamp": "2026-03-31T15:30:00Z", + "score": "16/20", + "baseline": "14/20", + "committed": true, + "changes": "Simplified system prompt, added scroll-before-extract", + "tasks": [ + { "name": "example-title", "passed": true, "steps": 1, "cost": 0.004 }, + { "name": "google-search", "passed": false, "steps": 10, "error": "max_steps" } + ], + "totalCost": 1.85, + "duration": "22min" +} +``` + +## Overfitting Prevention + +1. **Train/test split**: 15 self-built tasks are train (Claude sees failure logs), 5 benchmark tasks are test (only sees score) +2. **No task-specific changes**: program.md explicitly forbids hardcoding for individual tasks +3. **Human merge review**: After session ends, human reviews the git diff and rejects overfitting changes + +## Constraints + +- Modifiable scope: `src/agent/` only (all files) +- Execution: Real browser, real websites, real LLM API calls +- Cost estimate: ~$1-3 per round (20 tasks × ~$0.05-0.15 each) +- Time estimate: 15-30 minutes per round +- Session target: 10-20 rounds (~3-8 hours total) + +## Success Criteria + +- Establish a reproducible baseline score +- Achieve measurable improvement (e.g., 14/20 → 17/20) +- Changes are generalizable (test set score also improves) +- All changes pass human review (no overfitting) diff --git a/extension/dist/background.js b/extension/dist/background.js deleted file mode 100644 index dc9ab08c..00000000 --- a/extension/dist/background.js +++ /dev/null @@ -1,861 +0,0 @@ -//#region src/protocol.ts -/** Default daemon port */ -var DAEMON_PORT = 19825; -var DAEMON_HOST = "localhost"; -var DAEMON_WS_URL = `ws://${DAEMON_HOST}:${DAEMON_PORT}/ext`; -/** Lightweight health-check endpoint — probed before each WebSocket attempt. */ -var DAEMON_PING_URL = `http://${DAEMON_HOST}:${DAEMON_PORT}/ping`; -/** Base reconnect delay for extension WebSocket (ms) */ -var WS_RECONNECT_BASE_DELAY = 2e3; -/** Max reconnect delay (ms) */ -var WS_RECONNECT_MAX_DELAY = 6e4; -//#endregion -//#region src/cdp.ts -/** -* CDP execution via chrome.debugger API. -* -* chrome.debugger only needs the "debugger" permission — no host_permissions. -* It can attach to any http/https tab. Avoid chrome:// and chrome-extension:// -* tabs (resolveTabId in background.ts filters them). -*/ -var attached = /* @__PURE__ */ new Set(); -/** Internal blank page used when no user URL is provided. */ -var BLANK_PAGE$1 = "data:text/html,"; -/** Check if a URL can be attached via CDP — only allow http(s) and our internal blank page. */ -function isDebuggableUrl$1(url) { - if (!url) return true; - return url.startsWith("http://") || url.startsWith("https://") || url === BLANK_PAGE$1; -} -async function ensureAttached(tabId) { - try { - const tab = await chrome.tabs.get(tabId); - if (!isDebuggableUrl$1(tab.url)) { - attached.delete(tabId); - throw new Error(`Cannot debug tab ${tabId}: URL is ${tab.url ?? "unknown"}`); - } - } catch (e) { - if (e instanceof Error && e.message.startsWith("Cannot debug tab")) throw e; - attached.delete(tabId); - throw new Error(`Tab ${tabId} no longer exists`); - } - if (attached.has(tabId)) try { - await chrome.debugger.sendCommand({ tabId }, "Runtime.evaluate", { - expression: "1", - returnByValue: true - }); - return; - } catch { - attached.delete(tabId); - } - try { - await chrome.debugger.attach({ tabId }, "1.3"); - } catch (e) { - const msg = e instanceof Error ? e.message : String(e); - const hint = msg.includes("chrome-extension://") ? ". Tip: another Chrome extension may be interfering — try disabling other extensions" : ""; - if (msg.includes("Another debugger is already attached")) { - try { - await chrome.debugger.detach({ tabId }); - } catch {} - try { - await chrome.debugger.attach({ tabId }, "1.3"); - } catch { - throw new Error(`attach failed: ${msg}${hint}`); - } - } else throw new Error(`attach failed: ${msg}${hint}`); - } - attached.add(tabId); - try { - await chrome.debugger.sendCommand({ tabId }, "Runtime.enable"); - } catch {} -} -async function evaluate(tabId, expression) { - await ensureAttached(tabId); - const result = await chrome.debugger.sendCommand({ tabId }, "Runtime.evaluate", { - expression, - returnByValue: true, - awaitPromise: true - }); - if (result.exceptionDetails) { - const errMsg = result.exceptionDetails.exception?.description || result.exceptionDetails.text || "Eval error"; - throw new Error(errMsg); - } - return result.result?.value; -} -var evaluateAsync = evaluate; -/** -* Capture a screenshot via CDP Page.captureScreenshot. -* Returns base64-encoded image data. -*/ -async function screenshot(tabId, options = {}) { - await ensureAttached(tabId); - const format = options.format ?? "png"; - if (options.fullPage) { - const metrics = await chrome.debugger.sendCommand({ tabId }, "Page.getLayoutMetrics"); - const size = metrics.cssContentSize || metrics.contentSize; - if (size) await chrome.debugger.sendCommand({ tabId }, "Emulation.setDeviceMetricsOverride", { - mobile: false, - width: Math.ceil(size.width), - height: Math.ceil(size.height), - deviceScaleFactor: 1 - }); - } - try { - const params = { format }; - if (format === "jpeg" && options.quality !== void 0) params.quality = Math.max(0, Math.min(100, options.quality)); - return (await chrome.debugger.sendCommand({ tabId }, "Page.captureScreenshot", params)).data; - } finally { - if (options.fullPage) await chrome.debugger.sendCommand({ tabId }, "Emulation.clearDeviceMetricsOverride").catch(() => {}); - } -} -/** -* Set local file paths on a file input element via CDP DOM.setFileInputFiles. -* This bypasses the need to send large base64 payloads through the message channel — -* Chrome reads the files directly from the local filesystem. -* -* @param tabId - Target tab ID -* @param files - Array of absolute local file paths -* @param selector - CSS selector to find the file input (optional, defaults to first file input) -*/ -async function setFileInputFiles(tabId, files, selector) { - await ensureAttached(tabId); - await chrome.debugger.sendCommand({ tabId }, "DOM.enable"); - const doc = await chrome.debugger.sendCommand({ tabId }, "DOM.getDocument"); - const query = selector || "input[type=\"file\"]"; - const result = await chrome.debugger.sendCommand({ tabId }, "DOM.querySelector", { - nodeId: doc.root.nodeId, - selector: query - }); - if (!result.nodeId) throw new Error(`No element found matching selector: ${query}`); - await chrome.debugger.sendCommand({ tabId }, "DOM.setFileInputFiles", { - files, - nodeId: result.nodeId - }); -} -async function detach(tabId) { - if (!attached.has(tabId)) return; - attached.delete(tabId); - try { - await chrome.debugger.detach({ tabId }); - } catch {} -} -function registerListeners() { - chrome.tabs.onRemoved.addListener((tabId) => { - attached.delete(tabId); - }); - chrome.debugger.onDetach.addListener((source) => { - if (source.tabId) attached.delete(source.tabId); - }); - chrome.tabs.onUpdated.addListener(async (tabId, info) => { - if (info.url && !isDebuggableUrl$1(info.url)) await detach(tabId); - }); -} -//#endregion -//#region src/background.ts -var ws = null; -var reconnectTimer = null; -var reconnectAttempts = 0; -var _origLog = console.log.bind(console); -var _origWarn = console.warn.bind(console); -var _origError = console.error.bind(console); -function forwardLog(level, args) { - if (!ws || ws.readyState !== WebSocket.OPEN) return; - try { - const msg = args.map((a) => typeof a === "string" ? a : JSON.stringify(a)).join(" "); - ws.send(JSON.stringify({ - type: "log", - level, - msg, - ts: Date.now() - })); - } catch {} -} -console.log = (...args) => { - _origLog(...args); - forwardLog("info", args); -}; -console.warn = (...args) => { - _origWarn(...args); - forwardLog("warn", args); -}; -console.error = (...args) => { - _origError(...args); - forwardLog("error", args); -}; -/** -* Probe the daemon via its /ping HTTP endpoint before attempting a WebSocket -* connection. fetch() failures are silently catchable; new WebSocket() is not -* — Chrome logs ERR_CONNECTION_REFUSED to the extension error page before any -* JS handler can intercept it. By keeping the probe inside connect() every -* call site remains unchanged and the guard can never be accidentally skipped. -*/ -async function connect() { - if (ws?.readyState === WebSocket.OPEN || ws?.readyState === WebSocket.CONNECTING) return; - try { - if (!(await fetch(DAEMON_PING_URL, { signal: AbortSignal.timeout(1e3) })).ok) return; - } catch { - return; - } - try { - ws = new WebSocket(DAEMON_WS_URL); - } catch { - scheduleReconnect(); - return; - } - ws.onopen = () => { - console.log("[opencli] Connected to daemon"); - reconnectAttempts = 0; - if (reconnectTimer) { - clearTimeout(reconnectTimer); - reconnectTimer = null; - } - ws?.send(JSON.stringify({ - type: "hello", - version: chrome.runtime.getManifest().version - })); - }; - ws.onmessage = async (event) => { - try { - const result = await handleCommand(JSON.parse(event.data)); - ws?.send(JSON.stringify(result)); - } catch (err) { - console.error("[opencli] Message handling error:", err); - } - }; - ws.onclose = () => { - console.log("[opencli] Disconnected from daemon"); - ws = null; - scheduleReconnect(); - }; - ws.onerror = () => { - ws?.close(); - }; -} -/** -* After MAX_EAGER_ATTEMPTS (reaching 60s backoff), stop scheduling reconnects. -* The keepalive alarm (~24s) will still call connect() periodically, but at a -* much lower frequency — reducing console noise when the daemon is not running. -*/ -var MAX_EAGER_ATTEMPTS = 6; -function scheduleReconnect() { - if (reconnectTimer) return; - reconnectAttempts++; - if (reconnectAttempts > MAX_EAGER_ATTEMPTS) return; - const delay = Math.min(WS_RECONNECT_BASE_DELAY * Math.pow(2, reconnectAttempts - 1), WS_RECONNECT_MAX_DELAY); - reconnectTimer = setTimeout(() => { - reconnectTimer = null; - connect(); - }, delay); -} -var automationSessions = /* @__PURE__ */ new Map(); -var WINDOW_IDLE_TIMEOUT = 3e4; -function getWorkspaceKey(workspace) { - return workspace?.trim() || "default"; -} -function resetWindowIdleTimer(workspace) { - const session = automationSessions.get(workspace); - if (!session) return; - if (session.idleTimer) clearTimeout(session.idleTimer); - session.idleDeadlineAt = Date.now() + WINDOW_IDLE_TIMEOUT; - session.idleTimer = setTimeout(async () => { - const current = automationSessions.get(workspace); - if (!current) return; - if (!current.owned) { - console.log(`[opencli] Borrowed workspace ${workspace} detached from window ${current.windowId} (idle timeout)`); - automationSessions.delete(workspace); - return; - } - try { - await chrome.windows.remove(current.windowId); - console.log(`[opencli] Automation window ${current.windowId} (${workspace}) closed (idle timeout)`); - } catch {} - automationSessions.delete(workspace); - }, WINDOW_IDLE_TIMEOUT); -} -/** Get or create the dedicated automation window. */ -async function getAutomationWindow(workspace) { - const existing = automationSessions.get(workspace); - if (existing) try { - await chrome.windows.get(existing.windowId); - return existing.windowId; - } catch { - automationSessions.delete(workspace); - } - const session = { - windowId: (await chrome.windows.create({ - url: BLANK_PAGE, - focused: false, - width: 1280, - height: 900, - type: "normal" - })).id, - idleTimer: null, - idleDeadlineAt: Date.now() + WINDOW_IDLE_TIMEOUT, - owned: true, - preferredTabId: null - }; - automationSessions.set(workspace, session); - console.log(`[opencli] Created automation window ${session.windowId} (${workspace})`); - resetWindowIdleTimer(workspace); - await new Promise((resolve) => setTimeout(resolve, 200)); - return session.windowId; -} -chrome.windows.onRemoved.addListener((windowId) => { - for (const [workspace, session] of automationSessions.entries()) if (session.windowId === windowId) { - console.log(`[opencli] Automation window closed (${workspace})`); - if (session.idleTimer) clearTimeout(session.idleTimer); - automationSessions.delete(workspace); - } -}); -var initialized = false; -function initialize() { - if (initialized) return; - initialized = true; - chrome.alarms.create("keepalive", { periodInMinutes: .4 }); - registerListeners(); - connect(); - console.log("[opencli] OpenCLI extension initialized"); -} -chrome.runtime.onInstalled.addListener(() => { - initialize(); -}); -chrome.runtime.onStartup.addListener(() => { - initialize(); -}); -chrome.alarms.onAlarm.addListener((alarm) => { - if (alarm.name === "keepalive") connect(); -}); -chrome.runtime.onMessage.addListener((msg, _sender, sendResponse) => { - if (msg?.type === "getStatus") sendResponse({ - connected: ws?.readyState === WebSocket.OPEN, - reconnecting: reconnectTimer !== null - }); - return false; -}); -async function handleCommand(cmd) { - const workspace = getWorkspaceKey(cmd.workspace); - resetWindowIdleTimer(workspace); - try { - switch (cmd.action) { - case "exec": return await handleExec(cmd, workspace); - case "navigate": return await handleNavigate(cmd, workspace); - case "tabs": return await handleTabs(cmd, workspace); - case "cookies": return await handleCookies(cmd); - case "screenshot": return await handleScreenshot(cmd, workspace); - case "close-window": return await handleCloseWindow(cmd, workspace); - case "sessions": return await handleSessions(cmd); - case "set-file-input": return await handleSetFileInput(cmd, workspace); - case "bind-current": return await handleBindCurrent(cmd, workspace); - default: return { - id: cmd.id, - ok: false, - error: `Unknown action: ${cmd.action}` - }; - } - } catch (err) { - return { - id: cmd.id, - ok: false, - error: err instanceof Error ? err.message : String(err) - }; - } -} -/** Internal blank page used when no user URL is provided. */ -var BLANK_PAGE = "data:text/html,"; -/** Check if a URL can be attached via CDP — only allow http(s) and our internal blank page. */ -function isDebuggableUrl(url) { - if (!url) return true; - return url.startsWith("http://") || url.startsWith("https://") || url === BLANK_PAGE; -} -/** Check if a URL is safe for user-facing navigation (http/https only). */ -function isSafeNavigationUrl(url) { - return url.startsWith("http://") || url.startsWith("https://"); -} -/** Minimal URL normalization for same-page comparison: root slash + default port only. */ -function normalizeUrlForComparison(url) { - if (!url) return ""; - try { - const parsed = new URL(url); - if (parsed.protocol === "https:" && parsed.port === "443" || parsed.protocol === "http:" && parsed.port === "80") parsed.port = ""; - const pathname = parsed.pathname === "/" ? "" : parsed.pathname; - return `${parsed.protocol}//${parsed.host}${pathname}${parsed.search}${parsed.hash}`; - } catch { - return url; - } -} -function isTargetUrl(currentUrl, targetUrl) { - return normalizeUrlForComparison(currentUrl) === normalizeUrlForComparison(targetUrl); -} -function matchesDomain(url, domain) { - if (!url) return false; - try { - const parsed = new URL(url); - return parsed.hostname === domain || parsed.hostname.endsWith(`.${domain}`); - } catch { - return false; - } -} -function matchesBindCriteria(tab, cmd) { - if (!tab.id || !isDebuggableUrl(tab.url)) return false; - if (cmd.matchDomain && !matchesDomain(tab.url, cmd.matchDomain)) return false; - if (cmd.matchPathPrefix) try { - if (!new URL(tab.url).pathname.startsWith(cmd.matchPathPrefix)) return false; - } catch { - return false; - } - return true; -} -function isNotebooklmWorkspace(workspace) { - return workspace === "site:notebooklm"; -} -function classifyNotebooklmUrl(url) { - if (!url) return "other"; - try { - const parsed = new URL(url); - if (parsed.hostname !== "notebooklm.google.com") return "other"; - return parsed.pathname.startsWith("/notebook/") ? "notebook" : "home"; - } catch { - return "other"; - } -} -function scoreWorkspaceTab(workspace, tab) { - if (!tab.id || !isDebuggableUrl(tab.url)) return -1; - if (isNotebooklmWorkspace(workspace)) { - const kind = classifyNotebooklmUrl(tab.url); - if (kind === "other") return -1; - if (kind === "notebook") return tab.active ? 400 : 300; - return tab.active ? 200 : 100; - } - return -1; -} -function setWorkspaceSession(workspace, session) { - const existing = automationSessions.get(workspace); - if (existing?.idleTimer) clearTimeout(existing.idleTimer); - automationSessions.set(workspace, { - ...session, - idleTimer: null, - idleDeadlineAt: Date.now() + WINDOW_IDLE_TIMEOUT - }); -} -async function maybeBindWorkspaceToExistingTab(workspace) { - if (!isNotebooklmWorkspace(workspace)) return null; - const tabs = await chrome.tabs.query({}); - let bestTab = null; - let bestScore = -1; - for (const tab of tabs) { - const score = scoreWorkspaceTab(workspace, tab); - if (score > bestScore) { - bestScore = score; - bestTab = tab; - } - } - if (!bestTab?.id || bestScore < 0) return null; - setWorkspaceSession(workspace, { - windowId: bestTab.windowId, - owned: false, - preferredTabId: bestTab.id - }); - console.log(`[opencli] Workspace ${workspace} bound to existing tab ${bestTab.id} in window ${bestTab.windowId}`); - resetWindowIdleTimer(workspace); - return bestTab.id; -} -/** -* Resolve target tab in the automation window. -* If explicit tabId is given, use that directly. -* Otherwise, find or create a tab in the dedicated automation window. -*/ -async function resolveTabId(tabId, workspace) { - if (tabId !== void 0) try { - const tab = await chrome.tabs.get(tabId); - const session = automationSessions.get(workspace); - const matchesSession = session ? session.preferredTabId !== null ? session.preferredTabId === tabId : tab.windowId === session.windowId : false; - if (isDebuggableUrl(tab.url) && matchesSession) return tabId; - if (session && !matchesSession) console.warn(`[opencli] Tab ${tabId} is not bound to workspace ${workspace}, re-resolving`); - else if (!isDebuggableUrl(tab.url)) console.warn(`[opencli] Tab ${tabId} URL is not debuggable (${tab.url}), re-resolving`); - } catch { - console.warn(`[opencli] Tab ${tabId} no longer exists, re-resolving`); - } - const adoptedTabId = await maybeBindWorkspaceToExistingTab(workspace); - if (adoptedTabId !== null) return adoptedTabId; - const existingSession = automationSessions.get(workspace); - if (existingSession?.preferredTabId !== null) try { - const preferredTab = await chrome.tabs.get(existingSession.preferredTabId); - if (isDebuggableUrl(preferredTab.url)) return preferredTab.id; - } catch { - automationSessions.delete(workspace); - } - const windowId = await getAutomationWindow(workspace); - const tabs = await chrome.tabs.query({ windowId }); - const debuggableTab = tabs.find((t) => t.id && isDebuggableUrl(t.url)); - if (debuggableTab?.id) return debuggableTab.id; - const reuseTab = tabs.find((t) => t.id); - if (reuseTab?.id) { - await chrome.tabs.update(reuseTab.id, { url: BLANK_PAGE }); - await new Promise((resolve) => setTimeout(resolve, 300)); - try { - const updated = await chrome.tabs.get(reuseTab.id); - if (isDebuggableUrl(updated.url)) return reuseTab.id; - console.warn(`[opencli] data: URI was intercepted (${updated.url}), creating fresh tab`); - } catch {} - } - const newTab = await chrome.tabs.create({ - windowId, - url: BLANK_PAGE, - active: true - }); - if (!newTab.id) throw new Error("Failed to create tab in automation window"); - return newTab.id; -} -async function listAutomationTabs(workspace) { - const session = automationSessions.get(workspace); - if (!session) return []; - if (session.preferredTabId !== null) try { - return [await chrome.tabs.get(session.preferredTabId)]; - } catch { - automationSessions.delete(workspace); - return []; - } - try { - return await chrome.tabs.query({ windowId: session.windowId }); - } catch { - automationSessions.delete(workspace); - return []; - } -} -async function listAutomationWebTabs(workspace) { - return (await listAutomationTabs(workspace)).filter((tab) => isDebuggableUrl(tab.url)); -} -async function handleExec(cmd, workspace) { - if (!cmd.code) return { - id: cmd.id, - ok: false, - error: "Missing code" - }; - const tabId = await resolveTabId(cmd.tabId, workspace); - try { - const data = await evaluateAsync(tabId, cmd.code); - return { - id: cmd.id, - ok: true, - data - }; - } catch (err) { - return { - id: cmd.id, - ok: false, - error: err instanceof Error ? err.message : String(err) - }; - } -} -async function handleNavigate(cmd, workspace) { - if (!cmd.url) return { - id: cmd.id, - ok: false, - error: "Missing url" - }; - if (!isSafeNavigationUrl(cmd.url)) return { - id: cmd.id, - ok: false, - error: "Blocked URL scheme -- only http:// and https:// are allowed" - }; - const tabId = await resolveTabId(cmd.tabId, workspace); - const beforeTab = await chrome.tabs.get(tabId); - const beforeNormalized = normalizeUrlForComparison(beforeTab.url); - const targetUrl = cmd.url; - if (beforeTab.status === "complete" && isTargetUrl(beforeTab.url, targetUrl)) return { - id: cmd.id, - ok: true, - data: { - title: beforeTab.title, - url: beforeTab.url, - tabId, - timedOut: false - } - }; - await detach(tabId); - await chrome.tabs.update(tabId, { url: targetUrl }); - let timedOut = false; - await new Promise((resolve) => { - let settled = false; - let checkTimer = null; - let timeoutTimer = null; - const finish = () => { - if (settled) return; - settled = true; - chrome.tabs.onUpdated.removeListener(listener); - if (checkTimer) clearTimeout(checkTimer); - if (timeoutTimer) clearTimeout(timeoutTimer); - resolve(); - }; - const isNavigationDone = (url) => { - return isTargetUrl(url, targetUrl) || normalizeUrlForComparison(url) !== beforeNormalized; - }; - const listener = (id, info, tab) => { - if (id !== tabId) return; - if (info.status === "complete" && isNavigationDone(tab.url ?? info.url)) finish(); - }; - chrome.tabs.onUpdated.addListener(listener); - checkTimer = setTimeout(async () => { - try { - const currentTab = await chrome.tabs.get(tabId); - if (currentTab.status === "complete" && isNavigationDone(currentTab.url)) finish(); - } catch {} - }, 100); - timeoutTimer = setTimeout(() => { - timedOut = true; - console.warn(`[opencli] Navigate to ${targetUrl} timed out after 15s`); - finish(); - }, 15e3); - }); - const tab = await chrome.tabs.get(tabId); - return { - id: cmd.id, - ok: true, - data: { - title: tab.title, - url: tab.url, - tabId, - timedOut - } - }; -} -async function handleTabs(cmd, workspace) { - switch (cmd.op) { - case "list": { - const data = (await listAutomationWebTabs(workspace)).map((t, i) => ({ - index: i, - tabId: t.id, - url: t.url, - title: t.title, - active: t.active - })); - return { - id: cmd.id, - ok: true, - data - }; - } - case "new": { - if (cmd.url && !isSafeNavigationUrl(cmd.url)) return { - id: cmd.id, - ok: false, - error: "Blocked URL scheme -- only http:// and https:// are allowed" - }; - const windowId = await getAutomationWindow(workspace); - const tab = await chrome.tabs.create({ - windowId, - url: cmd.url ?? BLANK_PAGE, - active: true - }); - return { - id: cmd.id, - ok: true, - data: { - tabId: tab.id, - url: tab.url - } - }; - } - case "close": { - if (cmd.index !== void 0) { - const target = (await listAutomationWebTabs(workspace))[cmd.index]; - if (!target?.id) return { - id: cmd.id, - ok: false, - error: `Tab index ${cmd.index} not found` - }; - await chrome.tabs.remove(target.id); - await detach(target.id); - return { - id: cmd.id, - ok: true, - data: { closed: target.id } - }; - } - const tabId = await resolveTabId(cmd.tabId, workspace); - await chrome.tabs.remove(tabId); - await detach(tabId); - return { - id: cmd.id, - ok: true, - data: { closed: tabId } - }; - } - case "select": { - if (cmd.index === void 0 && cmd.tabId === void 0) return { - id: cmd.id, - ok: false, - error: "Missing index or tabId" - }; - if (cmd.tabId !== void 0) { - const session = automationSessions.get(workspace); - let tab; - try { - tab = await chrome.tabs.get(cmd.tabId); - } catch { - return { - id: cmd.id, - ok: false, - error: `Tab ${cmd.tabId} no longer exists` - }; - } - if (!session || tab.windowId !== session.windowId) return { - id: cmd.id, - ok: false, - error: `Tab ${cmd.tabId} is not in the automation window` - }; - await chrome.tabs.update(cmd.tabId, { active: true }); - return { - id: cmd.id, - ok: true, - data: { selected: cmd.tabId } - }; - } - const target = (await listAutomationWebTabs(workspace))[cmd.index]; - if (!target?.id) return { - id: cmd.id, - ok: false, - error: `Tab index ${cmd.index} not found` - }; - await chrome.tabs.update(target.id, { active: true }); - return { - id: cmd.id, - ok: true, - data: { selected: target.id } - }; - } - default: return { - id: cmd.id, - ok: false, - error: `Unknown tabs op: ${cmd.op}` - }; - } -} -async function handleCookies(cmd) { - if (!cmd.domain && !cmd.url) return { - id: cmd.id, - ok: false, - error: "Cookie scope required: provide domain or url to avoid dumping all cookies" - }; - const details = {}; - if (cmd.domain) details.domain = cmd.domain; - if (cmd.url) details.url = cmd.url; - const data = (await chrome.cookies.getAll(details)).map((c) => ({ - name: c.name, - value: c.value, - domain: c.domain, - path: c.path, - secure: c.secure, - httpOnly: c.httpOnly, - expirationDate: c.expirationDate - })); - return { - id: cmd.id, - ok: true, - data - }; -} -async function handleScreenshot(cmd, workspace) { - const tabId = await resolveTabId(cmd.tabId, workspace); - try { - const data = await screenshot(tabId, { - format: cmd.format, - quality: cmd.quality, - fullPage: cmd.fullPage - }); - return { - id: cmd.id, - ok: true, - data - }; - } catch (err) { - return { - id: cmd.id, - ok: false, - error: err instanceof Error ? err.message : String(err) - }; - } -} -async function handleCloseWindow(cmd, workspace) { - const session = automationSessions.get(workspace); - if (session) { - if (session.owned) try { - await chrome.windows.remove(session.windowId); - } catch {} - if (session.idleTimer) clearTimeout(session.idleTimer); - automationSessions.delete(workspace); - } - return { - id: cmd.id, - ok: true, - data: { closed: true } - }; -} -async function handleSetFileInput(cmd, workspace) { - if (!cmd.files || !Array.isArray(cmd.files) || cmd.files.length === 0) return { - id: cmd.id, - ok: false, - error: "Missing or empty files array" - }; - const tabId = await resolveTabId(cmd.tabId, workspace); - try { - await setFileInputFiles(tabId, cmd.files, cmd.selector); - return { - id: cmd.id, - ok: true, - data: { count: cmd.files.length } - }; - } catch (err) { - return { - id: cmd.id, - ok: false, - error: err instanceof Error ? err.message : String(err) - }; - } -} -async function handleSessions(cmd) { - const now = Date.now(); - const data = await Promise.all([...automationSessions.entries()].map(async ([workspace, session]) => ({ - workspace, - windowId: session.windowId, - tabCount: (await chrome.tabs.query({ windowId: session.windowId })).filter((tab) => isDebuggableUrl(tab.url)).length, - idleMsRemaining: Math.max(0, session.idleDeadlineAt - now) - }))); - return { - id: cmd.id, - ok: true, - data - }; -} -async function handleBindCurrent(cmd, workspace) { - const activeTabs = await chrome.tabs.query({ - active: true, - lastFocusedWindow: true - }); - const fallbackTabs = await chrome.tabs.query({ lastFocusedWindow: true }); - const allTabs = await chrome.tabs.query({}); - const boundTab = activeTabs.find((tab) => matchesBindCriteria(tab, cmd)) ?? fallbackTabs.find((tab) => matchesBindCriteria(tab, cmd)) ?? allTabs.find((tab) => matchesBindCriteria(tab, cmd)); - if (!boundTab?.id) return { - id: cmd.id, - ok: false, - error: cmd.matchDomain || cmd.matchPathPrefix ? `No visible tab matching ${cmd.matchDomain ?? "domain"}${cmd.matchPathPrefix ? ` ${cmd.matchPathPrefix}` : ""}` : "No active debuggable tab found" - }; - setWorkspaceSession(workspace, { - windowId: boundTab.windowId, - owned: false, - preferredTabId: boundTab.id - }); - resetWindowIdleTimer(workspace); - console.log(`[opencli] Workspace ${workspace} explicitly bound to tab ${boundTab.id} (${boundTab.url})`); - return { - id: cmd.id, - ok: true, - data: { - tabId: boundTab.id, - windowId: boundTab.windowId, - url: boundTab.url, - title: boundTab.title, - workspace - } - }; -} -//#endregion diff --git a/extension/package-lock.json b/extension/package-lock.json index dfc34964..2288e01c 100644 --- a/extension/package-lock.json +++ b/extension/package-lock.json @@ -1,12 +1,12 @@ { "name": "opencli-extension", - "version": "1.5.4", + "version": "1.5.5", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "opencli-extension", - "version": "1.5.4", + "version": "1.5.5", "devDependencies": { "@types/chrome": "^0.0.287", "typescript": "^5.7.0", diff --git a/extension/src/background.ts b/extension/src/background.ts index 85cbd3ca..c7559238 100644 --- a/extension/src/background.ts +++ b/extension/src/background.ts @@ -259,6 +259,8 @@ async function handleCommand(cmd: Command): Promise { return await handleScreenshot(cmd, workspace); case 'close-window': return await handleCloseWindow(cmd, workspace); + case 'cdp': + return await handleCdp(cmd, workspace); case 'sessions': return await handleSessions(cmd); case 'set-file-input': @@ -280,12 +282,12 @@ async function handleCommand(cmd: Command): Promise { // ─── Action handlers ───────────────────────────────────────────────── /** Internal blank page used when no user URL is provided. */ -const BLANK_PAGE = 'data:text/html,'; +const BLANK_PAGE = 'about:blank'; -/** Check if a URL can be attached via CDP — only allow http(s) and our internal blank page. */ +/** Check if a URL can be attached via CDP — only allow http(s) and blank pages. */ function isDebuggableUrl(url?: string): boolean { if (!url) return true; // empty/undefined = tab still loading, allow it - return url.startsWith('http://') || url.startsWith('https://') || url === BLANK_PAGE; + return url.startsWith('http://') || url.startsWith('https://') || url === 'about:blank' || url.startsWith('data:'); } /** Check if a URL is safe for user-facing navigation (http/https only). */ @@ -428,7 +430,7 @@ async function resolveTabId(tabId: number | undefined, workspace: string): Promi if (adoptedTabId !== null) return adoptedTabId; const existingSession = automationSessions.get(workspace); - if (existingSession?.preferredTabId !== null) { + if (existingSession && existingSession.preferredTabId !== null) { try { const preferredTab = await chrome.tabs.get(existingSession.preferredTabId); if (isDebuggableUrl(preferredTab.url)) return preferredTab.id!; @@ -494,7 +496,8 @@ async function handleExec(cmd: Command, workspace: string): Promise { if (!cmd.code) return { id: cmd.id, ok: false, error: 'Missing code' }; const tabId = await resolveTabId(cmd.tabId, workspace); try { - const data = await executor.evaluateAsync(tabId, cmd.code); + const aggressive = workspace.startsWith('operate:'); + const data = await executor.evaluateAsync(tabId, cmd.code, aggressive); return { id: cmd.id, ok: true, data }; } catch (err) { return { id: cmd.id, ok: false, error: err instanceof Error ? err.message : String(err) }; @@ -685,6 +688,50 @@ async function handleScreenshot(cmd: Command, workspace: string): Promise { + if (!cmd.cdpMethod) return { id: cmd.id, ok: false, error: 'Missing cdpMethod' }; + if (!CDP_ALLOWLIST.has(cmd.cdpMethod)) { + return { id: cmd.id, ok: false, error: `CDP method not permitted: ${cmd.cdpMethod}` }; + } + const tabId = await resolveTabId(cmd.tabId, workspace); + try { + const aggressive = workspace.startsWith('operate:'); + await executor.ensureAttached(tabId, aggressive); + const data = await chrome.debugger.sendCommand( + { tabId }, + cmd.cdpMethod, + cmd.cdpParams ?? {}, + ); + return { id: cmd.id, ok: true, data }; + } catch (err) { + return { id: cmd.id, ok: false, error: err instanceof Error ? err.message : String(err) }; + } +} + async function handleCloseWindow(cmd: Command, workspace: string): Promise { const session = automationSessions.get(workspace); if (session) { diff --git a/extension/src/cdp.ts b/extension/src/cdp.ts index 09f609c4..83c0e2f7 100644 --- a/extension/src/cdp.ts +++ b/extension/src/cdp.ts @@ -8,16 +8,13 @@ const attached = new Set(); -/** Internal blank page used when no user URL is provided. */ -const BLANK_PAGE = 'data:text/html,'; - -/** Check if a URL can be attached via CDP — only allow http(s) and our internal blank page. */ +/** Check if a URL can be attached via CDP — only allow http(s) and blank pages. */ function isDebuggableUrl(url?: string): boolean { if (!url) return true; // empty/undefined = tab still loading, allow it - return url.startsWith('http://') || url.startsWith('https://') || url === BLANK_PAGE; + return url.startsWith('http://') || url.startsWith('https://') || url === 'about:blank' || url.startsWith('data:'); } -async function ensureAttached(tabId: number): Promise { +export async function ensureAttached(tabId: number, aggressiveRetry: boolean = false): Promise { // Verify the tab URL is debuggable before attempting attach try { const tab = await chrome.tabs.get(tabId); @@ -46,24 +43,47 @@ async function ensureAttached(tabId: number): Promise { } } - try { - await chrome.debugger.attach({ tabId }, '1.3'); - } catch (e: unknown) { - const msg = e instanceof Error ? e.message : String(e); - const hint = msg.includes('chrome-extension://') - ? '. Tip: another Chrome extension may be interfering — try disabling other extensions' - : ''; - if (msg.includes('Another debugger is already attached')) { + // Retry attach up to 3 times — other extensions (1Password, Playwright MCP Bridge) + // can temporarily interfere with chrome.debugger. A short delay usually resolves it. + // Normal commands: 2 retries, 500ms delay (fast fail for non-operate use) + // Operate commands: 5 retries, 1500ms delay (aggressive, tolerates extension interference) + const MAX_ATTACH_RETRIES = aggressiveRetry ? 5 : 2; + const RETRY_DELAY_MS = aggressiveRetry ? 1500 : 500; + let lastError = ''; + + for (let attempt = 1; attempt <= MAX_ATTACH_RETRIES; attempt++) { + try { + // Force detach first to clear any stale state from other extensions try { await chrome.debugger.detach({ tabId }); } catch { /* ignore */ } - try { - await chrome.debugger.attach({ tabId }, '1.3'); - } catch { - throw new Error(`attach failed: ${msg}${hint}`); + await chrome.debugger.attach({ tabId }, '1.3'); + lastError = ''; + break; // Success + } catch (e: unknown) { + lastError = e instanceof Error ? e.message : String(e); + if (attempt < MAX_ATTACH_RETRIES) { + console.warn(`[opencli] attach attempt ${attempt}/${MAX_ATTACH_RETRIES} failed: ${lastError}, retrying in ${RETRY_DELAY_MS}ms...`); + await new Promise(resolve => setTimeout(resolve, RETRY_DELAY_MS)); + // Re-verify tab URL before retrying (it may have changed) + try { + const tab = await chrome.tabs.get(tabId); + if (!isDebuggableUrl(tab.url)) { + lastError = `Tab URL changed to ${tab.url} during retry`; + break; // Don't retry if URL became un-debuggable + } + } catch { + lastError = `Tab ${tabId} no longer exists`; + break; + } } - } else { - throw new Error(`attach failed: ${msg}${hint}`); } } + + if (lastError) { + const hint = lastError.includes('chrome-extension://') + ? '. Tip: another Chrome extension may be interfering — try disabling other extensions' + : ''; + throw new Error(`attach failed: ${lastError}${hint}`); + } attached.add(tabId); try { @@ -73,26 +93,45 @@ async function ensureAttached(tabId: number): Promise { } } -export async function evaluate(tabId: number, expression: string): Promise { - await ensureAttached(tabId); - - const result = await chrome.debugger.sendCommand({ tabId }, 'Runtime.evaluate', { - expression, - returnByValue: true, - awaitPromise: true, - }) as { - result?: { type: string; value?: unknown; description?: string; subtype?: string }; - exceptionDetails?: { exception?: { description?: string }; text?: string }; - }; +export async function evaluate(tabId: number, expression: string, aggressiveRetry: boolean = false): Promise { + // Retry the entire evaluate (attach + command). + // Normal: 2 retries. Operate: 3 retries (tolerates extension interference). + const MAX_EVAL_RETRIES = aggressiveRetry ? 3 : 2; + for (let attempt = 1; attempt <= MAX_EVAL_RETRIES; attempt++) { + try { + await ensureAttached(tabId, aggressiveRetry); + + const result = await chrome.debugger.sendCommand({ tabId }, 'Runtime.evaluate', { + expression, + returnByValue: true, + awaitPromise: true, + }) as { + result?: { type: string; value?: unknown; description?: string; subtype?: string }; + exceptionDetails?: { exception?: { description?: string }; text?: string }; + }; + + if (result.exceptionDetails) { + const errMsg = result.exceptionDetails.exception?.description + || result.exceptionDetails.text + || 'Eval error'; + throw new Error(errMsg); + } - if (result.exceptionDetails) { - const errMsg = result.exceptionDetails.exception?.description - || result.exceptionDetails.text - || 'Eval error'; - throw new Error(errMsg); + return result.result?.value; + } catch (e) { + const msg = e instanceof Error ? e.message : String(e); + // Only retry on attach/debugger errors, not on JS eval errors + const isAttachError = msg.includes('attach failed') || msg.includes('Debugger is not attached') + || msg.includes('chrome-extension://') || msg.includes('Target closed'); + if (isAttachError && attempt < MAX_EVAL_RETRIES) { + attached.delete(tabId); // Force re-attach on next attempt + await new Promise(resolve => setTimeout(resolve, 1000)); + continue; + } + throw e; + } } - - return result.result?.value; + throw new Error('evaluate: max retries exhausted'); } export const evaluateAsync = evaluate; diff --git a/extension/src/protocol.ts b/extension/src/protocol.ts index 0eebeea2..3cb37938 100644 --- a/extension/src/protocol.ts +++ b/extension/src/protocol.ts @@ -5,7 +5,7 @@ * Everything else is just JS code sent via 'exec'. */ -export type Action = 'exec' | 'navigate' | 'tabs' | 'cookies' | 'screenshot' | 'close-window' | 'sessions' | 'set-file-input' | 'bind-current'; +export type Action = 'exec' | 'navigate' | 'tabs' | 'cookies' | 'screenshot' | 'close-window' | 'sessions' | 'set-file-input' | 'bind-current' | 'cdp'; export interface Command { /** Unique request ID */ @@ -40,6 +40,10 @@ export interface Command { files?: string[]; /** CSS selector for file input element (set-file-input action) */ selector?: string; + /** CDP method name for 'cdp' action (e.g. 'Accessibility.getFullAXTree') */ + cdpMethod?: string; + /** CDP method params for 'cdp' action */ + cdpParams?: Record; } export interface Result { diff --git a/package-lock.json b/package-lock.json index 9301498e..eeb83377 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,13 +10,16 @@ "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { + "@anthropic-ai/sdk": "^0.80.0", "chalk": "^5.3.0", "cli-table3": "^0.6.5", "commander": "^14.0.3", "js-yaml": "^4.1.0", + "openai": "^6.33.0", "turndown": "^7.2.2", "undici": "^7.24.6", - "ws": "^8.18.0" + "ws": "^8.18.0", + "zod": "^4.3.6" }, "bin": { "opencli": "dist/main.js" @@ -196,6 +199,7 @@ "integrity": "sha512-y1IOpG6OSmTpGg/CT0YBb/EAhR2nsC18QWp9Jy8HO9iGySpcwaTvs5kHa17daP3BMTwWyaX9/1tDTDQshZzXdg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@algolia/client-common": "5.49.2", "@algolia/requester-browser-xhr": "5.49.2", @@ -293,6 +297,26 @@ "node": ">= 14.0.0" } }, + "node_modules/@anthropic-ai/sdk": { + "version": "0.80.0", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.80.0.tgz", + "integrity": "sha512-WeXLn7zNVk3yjeshn+xZHvld6AoFUOR3Sep6pSoHho5YbSi6HwcirqgPA5ccFuW8QTVJAAU7N8uQQC6Wa9TG+g==", + "license": "MIT", + "dependencies": { + "json-schema-to-ts": "^3.1.1" + }, + "bin": { + "anthropic-ai-sdk": "bin/cli" + }, + "peerDependencies": { + "zod": "^3.25.0 || ^4.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, "node_modules/@babel/helper-string-parser": { "version": "7.27.1", "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz", @@ -329,6 +353,15 @@ "node": ">=6.0.0" } }, + "node_modules/@babel/runtime": { + "version": "7.29.2", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.29.2.tgz", + "integrity": "sha512-JiDShH45zKHWyGe4ZNVRrCjBz8Nh9TMmZG1kh4QTK8hCBTWBi8Da+i7s1fJw7/lYpM4ccepSNfqzZ/QvABBi5g==", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, "node_modules/@babel/types": { "version": "7.29.0", "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.29.0.tgz", @@ -411,7 +444,6 @@ "dev": true, "license": "MIT", "optional": true, - "peer": true, "dependencies": { "@emnapi/wasi-threads": "1.2.0", "tslib": "^2.4.0" @@ -424,7 +456,6 @@ "dev": true, "license": "MIT", "optional": true, - "peer": true, "dependencies": { "tslib": "^2.4.0" } @@ -436,7 +467,6 @@ "dev": true, "license": "MIT", "optional": true, - "peer": true, "dependencies": { "tslib": "^2.4.0" } @@ -914,22 +944,20 @@ "license": "BSD-2-Clause" }, "node_modules/@napi-rs/wasm-runtime": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.2.tgz", - "integrity": "sha512-sNXv5oLJ7ob93xkZ1XnxisYhGYXfaG9f65/ZgYuAu3qt7b3NadcOEhLvx28hv31PgX8SZJRYrAIPQilQmFpLVw==", + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.1.tgz", + "integrity": "sha512-p64ah1M1ld8xjWv3qbvFwHiFVWrq1yFvV4f7w+mzaqiR4IlSgkqhcRdHwsGgomwzBH51sRY4NEowLxnaBjcW/A==", "dev": true, "license": "MIT", "optional": true, "dependencies": { + "@emnapi/core": "^1.7.1", + "@emnapi/runtime": "^1.7.1", "@tybys/wasm-util": "^0.10.1" }, "funding": { "type": "github", "url": "https://github.com/sponsors/Brooooooklyn" - }, - "peerDependencies": { - "@emnapi/core": "^1.7.1", - "@emnapi/runtime": "^1.7.1" } }, "node_modules/@oxc-project/types": { @@ -943,9 +971,9 @@ } }, "node_modules/@rolldown/binding-android-arm64": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.0.0-rc.12.tgz", - "integrity": "sha512-pv1y2Fv0JybcykuiiD3qBOBdz6RteYojRFY1d+b95WVuzx211CRh+ytI/+9iVyWQ6koTh5dawe4S/yRfOFjgaA==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.0.0-rc.11.tgz", + "integrity": "sha512-SJ+/g+xNnOh6NqYxD0V3uVN4W3VfnrGsC9/hoglicgTNfABFG9JjISvkkU0dNY84MNHLWyOgxP9v9Y9pX4S7+A==", "cpu": [ "arm64" ], @@ -960,9 +988,9 @@ } }, "node_modules/@rolldown/binding-darwin-arm64": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-rc.12.tgz", - "integrity": "sha512-cFYr6zTG/3PXXF3pUO+umXxt1wkRK/0AYT8lDwuqvRC+LuKYWSAQAQZjCWDQpAH172ZV6ieYrNnFzVVcnSflAg==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-rc.11.tgz", + "integrity": "sha512-7WQgR8SfOPwmDZGFkThUvsmd/nwAWv91oCO4I5LS7RKrssPZmOt7jONN0cW17ydGC1n/+puol1IpoieKqQidmg==", "cpu": [ "arm64" ], @@ -977,9 +1005,9 @@ } }, "node_modules/@rolldown/binding-darwin-x64": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-x64/-/binding-darwin-x64-1.0.0-rc.12.tgz", - "integrity": "sha512-ZCsYknnHzeXYps0lGBz8JrF37GpE9bFVefrlmDrAQhOEi4IOIlcoU1+FwHEtyXGx2VkYAvhu7dyBf75EJQffBw==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-x64/-/binding-darwin-x64-1.0.0-rc.11.tgz", + "integrity": "sha512-39Ks6UvIHq4rEogIfQBoBRusj0Q0nPVWIvqmwBLaT6aqQGIakHdESBVOPRRLacy4WwUPIx4ZKzfZ9PMW+IeyUQ==", "cpu": [ "x64" ], @@ -994,9 +1022,9 @@ } }, "node_modules/@rolldown/binding-freebsd-x64": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-freebsd-x64/-/binding-freebsd-x64-1.0.0-rc.12.tgz", - "integrity": "sha512-dMLeprcVsyJsKolRXyoTH3NL6qtsT0Y2xeuEA8WQJquWFXkEC4bcu1rLZZSnZRMtAqwtrF/Ib9Ddtpa/Gkge9Q==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-freebsd-x64/-/binding-freebsd-x64-1.0.0-rc.11.tgz", + "integrity": "sha512-jfsm0ZHfhiqrvWjJAmzsqiIFPz5e7mAoCOPBNTcNgkiid/LaFKiq92+0ojH+nmJmKYkre4t71BWXUZDNp7vsag==", "cpu": [ "x64" ], @@ -1011,9 +1039,9 @@ } }, "node_modules/@rolldown/binding-linux-arm-gnueabihf": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.0.0-rc.12.tgz", - "integrity": "sha512-YqWjAgGC/9M1lz3GR1r1rP79nMgo3mQiiA+Hfo+pvKFK1fAJ1bCi0ZQVh8noOqNacuY1qIcfyVfP6HoyBRZ85Q==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.0.0-rc.11.tgz", + "integrity": "sha512-zjQaUtSyq1nVe3nxmlSCuR96T1LPlpvmJ0SZy0WJFEsV4kFbXcq2u68L4E6O0XeFj4aex9bEauqjW8UQBeAvfQ==", "cpu": [ "arm" ], @@ -1028,9 +1056,9 @@ } }, "node_modules/@rolldown/binding-linux-arm64-gnu": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.0.0-rc.12.tgz", - "integrity": "sha512-/I5AS4cIroLpslsmzXfwbe5OmWvSsrFuEw3mwvbQ1kDxJ822hFHIx+vsN/TAzNVyepI/j/GSzrtCIwQPeKCLIg==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.0.0-rc.11.tgz", + "integrity": "sha512-WMW1yE6IOnehTcFE9eipFkm3XN63zypWlrJQ2iF7NrQ9b2LDRjumFoOGJE8RJJTJCTBAdmLMnJ8uVitACUUo1Q==", "cpu": [ "arm64" ], @@ -1045,9 +1073,9 @@ } }, "node_modules/@rolldown/binding-linux-arm64-musl": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.0.0-rc.12.tgz", - "integrity": "sha512-V6/wZztnBqlx5hJQqNWwFdxIKN0m38p8Jas+VoSfgH54HSj9tKTt1dZvG6JRHcjh6D7TvrJPWFGaY9UBVOaWPw==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.0.0-rc.11.tgz", + "integrity": "sha512-jfndI9tsfm4APzjNt6QdBkYwre5lRPUgHeDHoI7ydKUuJvz3lZeCfMsI56BZj+7BYqiKsJm7cfd/6KYV7ubrBg==", "cpu": [ "arm64" ], @@ -1062,9 +1090,9 @@ } }, "node_modules/@rolldown/binding-linux-ppc64-gnu": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-ppc64-gnu/-/binding-linux-ppc64-gnu-1.0.0-rc.12.tgz", - "integrity": "sha512-AP3E9BpcUYliZCxa3w5Kwj9OtEVDYK6sVoUzy4vTOJsjPOgdaJZKFmN4oOlX0Wp0RPV2ETfmIra9x1xuayFB7g==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-ppc64-gnu/-/binding-linux-ppc64-gnu-1.0.0-rc.11.tgz", + "integrity": "sha512-ZlFgw46NOAGMgcdvdYwAGu2Q+SLFA9LzbJLW+iyMOJyhj5wk6P3KEE9Gct4xWwSzFoPI7JCdYmYMzVtlgQ+zfw==", "cpu": [ "ppc64" ], @@ -1079,9 +1107,9 @@ } }, "node_modules/@rolldown/binding-linux-s390x-gnu": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-s390x-gnu/-/binding-linux-s390x-gnu-1.0.0-rc.12.tgz", - "integrity": "sha512-nWwpvUSPkoFmZo0kQazZYOrT7J5DGOJ/+QHHzjvNlooDZED8oH82Yg67HvehPPLAg5fUff7TfWFHQS8IV1n3og==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-s390x-gnu/-/binding-linux-s390x-gnu-1.0.0-rc.11.tgz", + "integrity": "sha512-hIOYmuT6ofM4K04XAZd3OzMySEO4K0/nc9+jmNcxNAxRi6c5UWpqfw3KMFV4MVFWL+jQsSh+bGw2VqmaPMTLyw==", "cpu": [ "s390x" ], @@ -1096,9 +1124,9 @@ } }, "node_modules/@rolldown/binding-linux-x64-gnu": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.0.0-rc.12.tgz", - "integrity": "sha512-RNrafz5bcwRy+O9e6P8Z/OCAJW/A+qtBczIqVYwTs14pf4iV1/+eKEjdOUta93q2TsT/FI0XYDP3TCky38LMAg==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.0.0-rc.11.tgz", + "integrity": "sha512-qXBQQO9OvkjjQPLdUVr7Nr2t3QTZI7s4KZtfw7HzBgjbmAPSFwSv4rmET9lLSgq3rH/ndA3ngv3Qb8l2njoPNA==", "cpu": [ "x64" ], @@ -1113,9 +1141,9 @@ } }, "node_modules/@rolldown/binding-linux-x64-musl": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-musl/-/binding-linux-x64-musl-1.0.0-rc.12.tgz", - "integrity": "sha512-Jpw/0iwoKWx3LJ2rc1yjFrj+T7iHZn2JDg1Yny1ma0luviFS4mhAIcd1LFNxK3EYu3DHWCps0ydXQ5i/rrJ2ig==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-musl/-/binding-linux-x64-musl-1.0.0-rc.11.tgz", + "integrity": "sha512-/tpFfoSTzUkH9LPY+cYbqZBDyyX62w5fICq9qzsHLL8uTI6BHip3Q9Uzft0wylk/i8OOwKik8OxW+QAhDmzwmg==", "cpu": [ "x64" ], @@ -1130,9 +1158,9 @@ } }, "node_modules/@rolldown/binding-openharmony-arm64": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-openharmony-arm64/-/binding-openharmony-arm64-1.0.0-rc.12.tgz", - "integrity": "sha512-vRugONE4yMfVn0+7lUKdKvN4D5YusEiPilaoO2sgUWpCvrncvWgPMzK00ZFFJuiPgLwgFNP5eSiUlv2tfc+lpA==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-openharmony-arm64/-/binding-openharmony-arm64-1.0.0-rc.11.tgz", + "integrity": "sha512-mcp3Rio2w72IvdZG0oQ4bM2c2oumtwHfUfKncUM6zGgz0KgPz4YmDPQfnXEiY5t3+KD/i8HG2rOB/LxdmieK2g==", "cpu": [ "arm64" ], @@ -1147,9 +1175,9 @@ } }, "node_modules/@rolldown/binding-wasm32-wasi": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-wasm32-wasi/-/binding-wasm32-wasi-1.0.0-rc.12.tgz", - "integrity": "sha512-ykGiLr/6kkiHc0XnBfmFJuCjr5ZYKKofkx+chJWDjitX+KsJuAmrzWhwyOMSHzPhzOHOy7u9HlFoa5MoAOJ/Zg==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-wasm32-wasi/-/binding-wasm32-wasi-1.0.0-rc.11.tgz", + "integrity": "sha512-LXk5Hii1Ph9asuGRjBuz8TUxdc1lWzB7nyfdoRgI0WGPZKmCxvlKk8KfYysqtr4MfGElu/f/pEQRh8fcEgkrWw==", "cpu": [ "wasm32" ], @@ -1164,9 +1192,9 @@ } }, "node_modules/@rolldown/binding-win32-arm64-msvc": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.0.0-rc.12.tgz", - "integrity": "sha512-5eOND4duWkwx1AzCxadcOrNeighiLwMInEADT0YM7xeEOOFcovWZCq8dadXgcRHSf3Ulh1kFo/qvzoFiCLOL1Q==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.0.0-rc.11.tgz", + "integrity": "sha512-dDwf5otnx0XgRY1yqxOC4ITizcdzS/8cQ3goOWv3jFAo4F+xQYni+hnMuO6+LssHHdJW7+OCVL3CoU4ycnh35Q==", "cpu": [ "arm64" ], @@ -1181,9 +1209,9 @@ } }, "node_modules/@rolldown/binding-win32-x64-msvc": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.0.0-rc.12.tgz", - "integrity": "sha512-PyqoipaswDLAZtot351MLhrlrh6lcZPo2LSYE+VDxbVk24LVKAGOuE4hb8xZQmrPAuEtTZW8E6D2zc5EUZX4Lw==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.0.0-rc.11.tgz", + "integrity": "sha512-LN4/skhSggybX71ews7dAj6r2geaMJfm3kMbK2KhFMg9B10AZXnKoLCVVgzhMHL0S+aKtr4p8QbAW8k+w95bAA==", "cpu": [ "x64" ], @@ -1198,9 +1226,9 @@ } }, "node_modules/@rolldown/pluginutils": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.12.tgz", - "integrity": "sha512-HHMwmarRKvoFsJorqYlFeFRzXZqCt2ETQlEDOb9aqssrnVBB1/+xgTGtuTrIk5vzLNX1MjMtTf7W9z3tsSbrxw==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.11.tgz", + "integrity": "sha512-xQO9vbwBecJRv9EUcQ/y0dzSTJgA7Q6UVN7xp6B81+tBGSLVAK03yJ9NkJaUA7JFD91kbjxRSC/mDnmvXzbHoQ==", "dev": true, "license": "MIT" }, @@ -1785,31 +1813,31 @@ "license": "ISC" }, "node_modules/@vitest/expect": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-4.1.2.tgz", - "integrity": "sha512-gbu+7B0YgUJ2nkdsRJrFFW6X7NTP44WlhiclHniUhxADQJH5Szt9mZ9hWnJPJ8YwOK5zUOSSlSvyzRf0u1DSBQ==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-4.1.1.tgz", + "integrity": "sha512-xAV0fqBTk44Rn6SjJReEQkHP3RrqbJo6JQ4zZ7/uVOiJZRarBtblzrOfFIZeYUrukp2YD6snZG6IBqhOoHTm+A==", "dev": true, "license": "MIT", "dependencies": { "@standard-schema/spec": "^1.1.0", "@types/chai": "^5.2.2", - "@vitest/spy": "4.1.2", - "@vitest/utils": "4.1.2", + "@vitest/spy": "4.1.1", + "@vitest/utils": "4.1.1", "chai": "^6.2.2", - "tinyrainbow": "^3.1.0" + "tinyrainbow": "^3.0.3" }, "funding": { "url": "https://opencollective.com/vitest" } }, "node_modules/@vitest/mocker": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/@vitest/mocker/-/mocker-4.1.2.tgz", - "integrity": "sha512-Ize4iQtEALHDttPRCmN+FKqOl2vxTiNUhzobQFFt/BM1lRUTG7zRCLOykG/6Vo4E4hnUdfVLo5/eqKPukcWW7Q==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@vitest/mocker/-/mocker-4.1.1.tgz", + "integrity": "sha512-h3BOylsfsCLPeceuCPAAJ+BvNwSENgJa4hXoXu4im0bs9Lyp4URc4JYK4pWLZ4pG/UQn7AT92K6IByi6rE6g3A==", "dev": true, "license": "MIT", "dependencies": { - "@vitest/spy": "4.1.2", + "@vitest/spy": "4.1.1", "estree-walker": "^3.0.3", "magic-string": "^0.30.21" }, @@ -1830,26 +1858,26 @@ } }, "node_modules/@vitest/pretty-format": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/@vitest/pretty-format/-/pretty-format-4.1.2.tgz", - "integrity": "sha512-dwQga8aejqeuB+TvXCMzSQemvV9hNEtDDpgUKDzOmNQayl2OG241PSWeJwKRH3CiC+sESrmoFd49rfnq7T4RnA==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@vitest/pretty-format/-/pretty-format-4.1.1.tgz", + "integrity": "sha512-GM+TEQN5WhOygr1lp7skeVjdLPqqWMHsfzXrcHAqZJi/lIVh63H0kaRCY8MDhNWikx19zBUK8ceaLB7X5AH9NQ==", "dev": true, "license": "MIT", "dependencies": { - "tinyrainbow": "^3.1.0" + "tinyrainbow": "^3.0.3" }, "funding": { "url": "https://opencollective.com/vitest" } }, "node_modules/@vitest/runner": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/@vitest/runner/-/runner-4.1.2.tgz", - "integrity": "sha512-Gr+FQan34CdiYAwpGJmQG8PgkyFVmARK8/xSijia3eTFgVfpcpztWLuP6FttGNfPLJhaZVP/euvujeNYar36OQ==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@vitest/runner/-/runner-4.1.1.tgz", + "integrity": "sha512-f7+FPy75vN91QGWsITueq0gedwUZy1fLtHOCMeQpjs8jTekAHeKP80zfDEnhrleviLHzVSDXIWuCIOFn3D3f8A==", "dev": true, "license": "MIT", "dependencies": { - "@vitest/utils": "4.1.2", + "@vitest/utils": "4.1.1", "pathe": "^2.0.3" }, "funding": { @@ -1857,14 +1885,14 @@ } }, "node_modules/@vitest/snapshot": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/@vitest/snapshot/-/snapshot-4.1.2.tgz", - "integrity": "sha512-g7yfUmxYS4mNxk31qbOYsSt2F4m1E02LFqO53Xpzg3zKMhLAPZAjjfyl9e6z7HrW6LvUdTwAQR3HHfLjpko16A==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@vitest/snapshot/-/snapshot-4.1.1.tgz", + "integrity": "sha512-kMVSgcegWV2FibXEx9p9WIKgje58lcTbXgnJixfcg15iK8nzCXhmalL0ZLtTWLW9PH1+1NEDShiFFedB3tEgWg==", "dev": true, "license": "MIT", "dependencies": { - "@vitest/pretty-format": "4.1.2", - "@vitest/utils": "4.1.2", + "@vitest/pretty-format": "4.1.1", + "@vitest/utils": "4.1.1", "magic-string": "^0.30.21", "pathe": "^2.0.3" }, @@ -1873,9 +1901,9 @@ } }, "node_modules/@vitest/spy": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/@vitest/spy/-/spy-4.1.2.tgz", - "integrity": "sha512-DU4fBnbVCJGNBwVA6xSToNXrkZNSiw59H8tcuUspVMsBDBST4nfvsPsEHDHGtWRRnqBERBQu7TrTKskmjqTXKA==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@vitest/spy/-/spy-4.1.1.tgz", + "integrity": "sha512-6Ti/KT5OVaiupdIZEuZN7l3CZcR0cxnxt70Z0//3CtwgObwA6jZhmVBA3yrXSVN3gmwjgd7oDNLlsXz526gpRA==", "dev": true, "license": "MIT", "funding": { @@ -1883,15 +1911,15 @@ } }, "node_modules/@vitest/utils": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/@vitest/utils/-/utils-4.1.2.tgz", - "integrity": "sha512-xw2/TiX82lQHA06cgbqRKFb5lCAy3axQ4H4SoUFhUsg+wztiet+co86IAMDtF6Vm1hc7J6j09oh/rgDn+JdKIQ==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@vitest/utils/-/utils-4.1.1.tgz", + "integrity": "sha512-cNxAlaB3sHoCdL6pj6yyUXv9Gry1NHNg0kFTXdvSIZXLHsqKH7chiWOkwJ5s5+d/oMwcoG9T0bKU38JZWKusrQ==", "dev": true, "license": "MIT", "dependencies": { - "@vitest/pretty-format": "4.1.2", + "@vitest/pretty-format": "4.1.1", "convert-source-map": "^2.0.0", - "tinyrainbow": "^3.1.0" + "tinyrainbow": "^3.0.3" }, "funding": { "url": "https://opencollective.com/vitest" @@ -2168,6 +2196,7 @@ "integrity": "sha512-1K0wtDaRONwfhL4h8bbJ9qTjmY6rhGgRvvagXkMBsAOMNr+3Q2SffHECh9DIuNVrMA1JwA0zCwhyepgBZVakng==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@algolia/abtesting": "1.15.2", "@algolia/client-abtesting": "5.49.2", @@ -2496,6 +2525,7 @@ "integrity": "sha512-/yNdlIkpWbM0ptxno3ONTuf+2g318kh2ez3KSeZN5dZ8YC6AAmgeWz+GasYYiBJPFaYcSAPeu4GfhUaChzIJXA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "tabbable": "^6.4.0" } @@ -2618,12 +2648,26 @@ "js-yaml": "bin/js-yaml.js" } }, + "node_modules/json-schema-to-ts": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/json-schema-to-ts/-/json-schema-to-ts-3.1.1.tgz", + "integrity": "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.18.3", + "ts-algebra": "^2.0.0" + }, + "engines": { + "node": ">=16" + } + }, "node_modules/lightningcss": { "version": "1.32.0", "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.32.0.tgz", "integrity": "sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ==", "dev": true, "license": "MPL-2.0", + "peer": true, "dependencies": { "detect-libc": "^2.0.3" }, @@ -3068,6 +3112,27 @@ "regex-recursion": "^6.0.2" } }, + "node_modules/openai": { + "version": "6.33.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-6.33.0.tgz", + "integrity": "sha512-xAYN1W3YsDXJWA5F277135YfkEk6H7D3D6vWwRhJ3OEkzRgcyK8z/P5P9Gyi/wB4N8kK9kM5ZjprfvyHagKmpw==", + "license": "Apache-2.0", + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + }, "node_modules/pathe": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz", @@ -3090,11 +3155,12 @@ "license": "ISC" }, "node_modules/picomatch": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", - "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", + "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -3198,14 +3264,14 @@ "license": "MIT" }, "node_modules/rolldown": { - "version": "1.0.0-rc.12", - "resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.12.tgz", - "integrity": "sha512-yP4USLIMYrwpPHEFB5JGH1uxhcslv6/hL0OyvTuY+3qlOSJvZ7ntYnoWpehBxufkgN0cvXxppuTu5hHa/zPh+A==", + "version": "1.0.0-rc.11", + "resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.11.tgz", + "integrity": "sha512-NRjoKMusSjfRbSYiH3VSumlkgFe7kYAa3pzVOsVYVFY3zb5d7nS+a3KGQ7hJKXuYWbzJKPVQ9Wxq2UvyK+ENpw==", "dev": true, "license": "MIT", "dependencies": { "@oxc-project/types": "=0.122.0", - "@rolldown/pluginutils": "1.0.0-rc.12" + "@rolldown/pluginutils": "1.0.0-rc.11" }, "bin": { "rolldown": "bin/cli.mjs" @@ -3214,21 +3280,21 @@ "node": "^20.19.0 || >=22.12.0" }, "optionalDependencies": { - "@rolldown/binding-android-arm64": "1.0.0-rc.12", - "@rolldown/binding-darwin-arm64": "1.0.0-rc.12", - "@rolldown/binding-darwin-x64": "1.0.0-rc.12", - "@rolldown/binding-freebsd-x64": "1.0.0-rc.12", - "@rolldown/binding-linux-arm-gnueabihf": "1.0.0-rc.12", - "@rolldown/binding-linux-arm64-gnu": "1.0.0-rc.12", - "@rolldown/binding-linux-arm64-musl": "1.0.0-rc.12", - "@rolldown/binding-linux-ppc64-gnu": "1.0.0-rc.12", - "@rolldown/binding-linux-s390x-gnu": "1.0.0-rc.12", - "@rolldown/binding-linux-x64-gnu": "1.0.0-rc.12", - "@rolldown/binding-linux-x64-musl": "1.0.0-rc.12", - "@rolldown/binding-openharmony-arm64": "1.0.0-rc.12", - "@rolldown/binding-wasm32-wasi": "1.0.0-rc.12", - "@rolldown/binding-win32-arm64-msvc": "1.0.0-rc.12", - "@rolldown/binding-win32-x64-msvc": "1.0.0-rc.12" + "@rolldown/binding-android-arm64": "1.0.0-rc.11", + "@rolldown/binding-darwin-arm64": "1.0.0-rc.11", + "@rolldown/binding-darwin-x64": "1.0.0-rc.11", + "@rolldown/binding-freebsd-x64": "1.0.0-rc.11", + "@rolldown/binding-linux-arm-gnueabihf": "1.0.0-rc.11", + "@rolldown/binding-linux-arm64-gnu": "1.0.0-rc.11", + "@rolldown/binding-linux-arm64-musl": "1.0.0-rc.11", + "@rolldown/binding-linux-ppc64-gnu": "1.0.0-rc.11", + "@rolldown/binding-linux-s390x-gnu": "1.0.0-rc.11", + "@rolldown/binding-linux-x64-gnu": "1.0.0-rc.11", + "@rolldown/binding-linux-x64-musl": "1.0.0-rc.11", + "@rolldown/binding-openharmony-arm64": "1.0.0-rc.11", + "@rolldown/binding-wasm32-wasi": "1.0.0-rc.11", + "@rolldown/binding-win32-arm64-msvc": "1.0.0-rc.11", + "@rolldown/binding-win32-x64-msvc": "1.0.0-rc.11" } }, "node_modules/rollup": { @@ -3469,6 +3535,12 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/ts-algebra": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz", + "integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==", + "license": "MIT" + }, "node_modules/tslib": { "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", @@ -3483,6 +3555,7 @@ "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "~0.27.0", "get-tsconfig": "^4.7.5" @@ -3512,6 +3585,7 @@ "integrity": "sha512-bGdAIrZ0wiGDo5l8c++HWtbaNCWTS4UTv7RaTH/ThVIgjkveJt83m74bBHMJkuCbslY8ixgLBVZJIOiQlQTjfQ==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -3640,16 +3714,17 @@ } }, "node_modules/vite": { - "version": "8.0.3", - "resolved": "https://registry.npmjs.org/vite/-/vite-8.0.3.tgz", - "integrity": "sha512-B9ifbFudT1TFhfltfaIPgjo9Z3mDynBTJSUYxTjOQruf/zHH+ezCQKcoqO+h7a9Pw9Nm/OtlXAiGT1axBgwqrQ==", + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/vite/-/vite-8.0.2.tgz", + "integrity": "sha512-1gFhNi+bHhRE/qKZOJXACm6tX4bA3Isy9KuKF15AgSRuRazNBOJfdDemPBU16/mpMxApDPrWvZ08DcLPEoRnuA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "lightningcss": "^1.32.0", - "picomatch": "^4.0.4", + "picomatch": "^4.0.3", "postcss": "^8.5.8", - "rolldown": "1.0.0-rc.12", + "rolldown": "1.0.0-rc.11", "tinyglobby": "^0.2.15" }, "bin": { @@ -4209,6 +4284,7 @@ "integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.21.3", "postcss": "^8.4.43", @@ -4264,19 +4340,19 @@ } }, "node_modules/vitest": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/vitest/-/vitest-4.1.2.tgz", - "integrity": "sha512-xjR1dMTVHlFLh98JE3i/f/WePqJsah4A0FK9cc8Ehp9Udk0AZk6ccpIZhh1qJ/yxVWRZ+Q54ocnD8TXmkhspGg==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/vitest/-/vitest-4.1.1.tgz", + "integrity": "sha512-yF+o4POL41rpAzj5KVILUxm1GCjKnELvaqmU9TLLUbMfDzuN0UpUR9uaDs+mCtjPe+uYPksXDRLQGGPvj1cTmA==", "dev": true, "license": "MIT", "dependencies": { - "@vitest/expect": "4.1.2", - "@vitest/mocker": "4.1.2", - "@vitest/pretty-format": "4.1.2", - "@vitest/runner": "4.1.2", - "@vitest/snapshot": "4.1.2", - "@vitest/spy": "4.1.2", - "@vitest/utils": "4.1.2", + "@vitest/expect": "4.1.1", + "@vitest/mocker": "4.1.1", + "@vitest/pretty-format": "4.1.1", + "@vitest/runner": "4.1.1", + "@vitest/snapshot": "4.1.1", + "@vitest/spy": "4.1.1", + "@vitest/utils": "4.1.1", "es-module-lexer": "^2.0.0", "expect-type": "^1.3.0", "magic-string": "^0.30.21", @@ -4287,7 +4363,7 @@ "tinybench": "^2.9.0", "tinyexec": "^1.0.2", "tinyglobby": "^0.2.15", - "tinyrainbow": "^3.1.0", + "tinyrainbow": "^3.0.3", "vite": "^6.0.0 || ^7.0.0 || ^8.0.0", "why-is-node-running": "^2.3.0" }, @@ -4304,10 +4380,10 @@ "@edge-runtime/vm": "*", "@opentelemetry/api": "^1.9.0", "@types/node": "^20.0.0 || ^22.0.0 || >=24.0.0", - "@vitest/browser-playwright": "4.1.2", - "@vitest/browser-preview": "4.1.2", - "@vitest/browser-webdriverio": "4.1.2", - "@vitest/ui": "4.1.2", + "@vitest/browser-playwright": "4.1.1", + "@vitest/browser-preview": "4.1.1", + "@vitest/browser-webdriverio": "4.1.1", + "@vitest/ui": "4.1.1", "happy-dom": "*", "jsdom": "*", "vite": "^6.0.0 || ^7.0.0 || ^8.0.0" @@ -4351,6 +4427,7 @@ "integrity": "sha512-hTHLc6VNZyzzEH/l7PFGjpcTvUgiaPK5mdLkbjrTeWSRcEfxFrv56g/XckIYlE9ckuobsdwqd5mk2g1sBkMewg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@vue/compiler-dom": "3.5.30", "@vue/compiler-sfc": "3.5.30", @@ -4389,6 +4466,7 @@ "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz", "integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==", "license": "MIT", + "peer": true, "engines": { "node": ">=10.0.0" }, @@ -4405,6 +4483,16 @@ } } }, + "node_modules/zod": { + "version": "4.3.6", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", + "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", + "license": "MIT", + "peer": true, + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, "node_modules/zwitch": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz", diff --git a/package.json b/package.json index 6da1ee32..f4e86342 100644 --- a/package.json +++ b/package.json @@ -53,13 +53,16 @@ "url": "git+https://github.com/jackwener/opencli.git" }, "dependencies": { + "@anthropic-ai/sdk": "^0.80.0", "chalk": "^5.3.0", "cli-table3": "^0.6.5", "commander": "^14.0.3", "js-yaml": "^4.1.0", + "openai": "^6.33.0", "turndown": "^7.2.2", "undici": "^7.24.6", - "ws": "^8.18.0" + "ws": "^8.18.0", + "zod": "^4.3.6" }, "devDependencies": { "@types/js-yaml": "^4.0.9", diff --git a/skills/adapter-dev/SKILL.md b/skills/adapter-dev/SKILL.md new file mode 100644 index 00000000..9cd76e6c --- /dev/null +++ b/skills/adapter-dev/SKILL.md @@ -0,0 +1,309 @@ +--- +name: opencli-adapter-dev +description: "OpenCLI adapter development — create new CLI commands from websites. Use when the user wants to build a new adapter, explore website APIs, record API calls, or write TypeScript/YAML adapters." +allowed-tools: Bash(opencli:*), Read, Edit, Write +--- + +# OpenCLI Adapter Development + +> **Before creating adapters, read [CLI-EXPLORER.md](../../CLI-EXPLORER.md) for the complete API discovery workflow.** + +## Key Rules + +1. Main parameter uses positional arg (not `--query` / `--id`) +2. Use `CliError` subclasses for expected failures, not raw `Error` +3. Update adapter docs and README when adding new adapters + +## Record Workflow + +`record` 是为「无法用 `explore` 自动发现」的页面(需要登录操作、复杂交互、SPA 内路由)准备的手动录制方案。 + +### 工作原理 + +``` +opencli record + → 打开 automation window 并导航到目标 URL + → 向所有 tab 注入 fetch/XHR 拦截器(幂等,可重复注入) + → 每 2s 轮询一次:发现新 tab 自动注入,drain 所有 tab 的捕获缓冲区 + → 超时(默认 60s)或按 Enter 停止 + → 分析捕获到的 JSON 请求:去重 → 评分 → 生成候选 YAML +``` + +**拦截器特性**: +- 同时 patch `window.fetch` 和 `XMLHttpRequest` +- 只捕获 `Content-Type: application/json` 的响应 +- 过滤纯对象少于 2 个 key 的响应(避免 tracking/ping) +- 跨 tab 隔离:每个 tab 独立缓冲区,轮询时分别 drain +- 幂等注入:同一 tab 二次注入时先 restore 原始函数再重新 patch,不丢失已捕获数据 + +### 使用步骤 + +```bash +# 1. 启动录制(建议 --timeout 给足操作时间) +opencli record "https://example.com/page" --timeout 120000 + +# 2. 在弹出的 automation window 里正常操作页面: +# - 打开列表、搜索、点击条目、切换 Tab +# - 凡是触发网络请求的操作都会被捕获 + +# 3. 完成操作后按 Enter 停止(或等超时自动停止) + +# 4. 查看结果 +cat .opencli/record//captured.json # 原始捕获 +ls .opencli/record//candidates/ # 候选 YAML +``` + +### 页面类型与捕获预期 + +| 页面类型 | 预期捕获量 | 说明 | +|---------|-----------|------| +| 列表/搜索页 | 多(5~20+) | 每次搜索/翻页都会触发新请求 | +| 详情页(只读) | 少(1~5) | 首屏数据一次性返回,后续操作走 form/redirect | +| SPA 内路由跳转 | 中等 | 路由切换会触发新接口,但首屏请求在注入前已发出 | +| 需要登录的页面 | 视操作而定 | 确保 Chrome 已登录目标网站 | + +> **注意**:如果页面在导航完成前就发出了大部分请求(服务端渲染 / SSR 注水),拦截器会错过这些请求。 +> 解决方案:在页面加载完成后,手动触发能产生新请求的操作(搜索、翻页、切 Tab、展开折叠项等)。 + +### 候选 YAML → TS CLI 转换 + +生成的候选 YAML 是起点,通常需要转换为 TypeScript(尤其是 tae 等内部系统): + +**候选 YAML 结构**(自动生成): +```yaml +site: tae +name: getList # 从 URL path 推断的名称 +strategy: cookie +browser: true +pipeline: + - navigate: https://... + - evaluate: | + (async () => { + const res = await fetch('/approval/getList.json?procInsId=...', { credentials: 'include' }); + const data = await res.json(); + return (data?.content?.operatorRecords || []).map(item => ({ ... })); + })() +``` + +**转换为 TS CLI**(参考 `src/clis/tae/add-expense.ts` 风格): +```typescript +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'tae', + name: 'get-approval', + description: '查看报销单审批流程和操作记录', + domain: 'tae.alibaba-inc.com', + strategy: Strategy.COOKIE, + browser: true, + args: [ + { name: 'proc_ins_id', type: 'string', required: true, positional: true, help: '流程实例 ID(procInsId)' }, + ], + columns: ['step', 'operator', 'action', 'time'], + func: async (page, kwargs) => { + await page.goto('https://tae.alibaba-inc.com/expense/pc.html?_authType=SAML'); + await page.wait(2); + const result = await page.evaluate(`(async () => { + const res = await fetch('/approval/getList.json?taskId=&procInsId=${kwargs.proc_ins_id}', { + credentials: 'include' + }); + const data = await res.json(); + return data?.content?.operatorRecords || []; + })()`); + return (result as any[]).map((r, i) => ({ + step: i + 1, + operator: r.operatorName || r.userId, + action: r.operationType, + time: r.operateTime, + })); + }, +}); +``` + +**转换要点**: +1. URL 中的动态 ID(`procInsId`、`taskId` 等)提取为 `args` +2. `captured.json` 里的真实 body 结构用于确定正确的数据路径(如 `content.operatorRecords`) +3. tae 系统统一用 `{ success, content, errorCode, errorMsg }` 外层包裹,取数据要走 `content.*` +4. 认证方式:cookie(`credentials: 'include'`),不需要额外 header +5. 文件放入 `src/clis//`,无需手动注册,`npm run build` 后自动发现 + +### 故障排查 + +| 现象 | 原因 | 解法 | +|------|------|------| +| 捕获 0 条请求 | 拦截器注入失败,或页面无 JSON API | 检查 daemon 是否运行:`curl localhost:19825/status` | +| 捕获量少(1~3 条) | 页面是只读详情页,首屏数据已在注入前发出 | 手动操作触发更多请求(搜索/翻页),或换用列表页 | +| 候选 YAML 为 0 | 捕获到的 JSON 都没有 array 结构 | 直接看 `captured.json` 手写 TS CLI | +| 新开的 tab 没有被拦截 | 轮询间隔内 tab 已关闭 | 缩短 `--poll 500` | +| 二次运行 record 时数据不连续 | 正常,每次 `record` 启动都是新的 automation window | 无需处理 | + +## Creating Adapters + +> [!TIP] +> **快速模式**:如果你只想为一个具体页面生成一个命令,直接看 [CLI-ONESHOT.md](./CLI-ONESHOT.md)。 +> 只需要一个 URL + 一句话描述,4 步搞定。 + +> [!IMPORTANT] +> **完整模式 — 在写任何代码之前,先阅读 [CLI-EXPLORER.md](./CLI-EXPLORER.md)。** +> 它包含:① AI Agent 浏览器探索工作流 ② 认证策略决策树 ③ 平台 SDK(如 Bilibili 的 `apiGet`/`fetchJson`)④ YAML vs TS 选择指南 ⑤ `tap` 步骤调试方法 ⑥ 级联请求模板 ⑦ 常见陷阱表。 +> **下方仅为简化模板参考,直接使用极易踩坑。** + +### YAML Pipeline (declarative, recommended) + +Create `src/clis//.yaml`: + +```yaml +site: mysite +name: hot +description: Hot topics +domain: www.mysite.com +strategy: cookie # public | cookie | header | intercept | ui +browser: true + +args: + limit: + type: int + default: 20 + description: Number of items + +pipeline: + - navigate: https://www.mysite.com + + - evaluate: | + (async () => { + const res = await fetch('/api/hot', { credentials: 'include' }); + const d = await res.json(); + return d.data.items.map(item => ({ + title: item.title, + score: item.score, + })); + })() + + - map: + rank: ${{ index + 1 }} + title: ${{ item.title }} + score: ${{ item.score }} + + - limit: ${{ args.limit }} + +columns: [rank, title, score] +``` + +For public APIs (no browser): + +```yaml +strategy: public +browser: false + +pipeline: + - fetch: + url: https://api.example.com/hot.json + - select: data.items + - map: + title: ${{ item.title }} + - limit: ${{ args.limit }} +``` + +### TypeScript Adapter (programmatic) + +Create `src/clis//.ts`. It will be automatically dynamically loaded (DO NOT manually import it in `index.ts`): + +```typescript +import { cli, Strategy } from '../../registry.js'; + +cli({ + site: 'mysite', + name: 'search', + strategy: Strategy.INTERCEPT, // Or COOKIE + args: [{ name: 'query', required: true, positional: true }], + columns: ['rank', 'title', 'url'], + func: async (page, kwargs) => { + await page.goto('https://www.mysite.com/search'); + + // Inject native XHR/Fetch interceptor hook + await page.installInterceptor('/api/search'); + + // Auto scroll down to trigger lazy loading + await page.autoScroll({ times: 3, delayMs: 2000 }); + + // Retrieve intercepted JSON payloads + const requests = await page.getInterceptedRequests(); + + let results = []; + for (const req of requests) { + results.push(...req.data.items); + } + return results.map((item, i) => ({ + rank: i + 1, title: item.title, url: item.url, + })); + }, +}); +``` + +**When to use TS**: XHR interception (`page.installInterceptor`), infinite scrolling (`page.autoScroll`), cookie extraction, complex data transforms (like GraphQL unwrapping). + +## Pipeline Steps + +| Step | Description | Example | +|------|-------------|---------| +| `navigate` | Go to URL | `navigate: https://example.com` | +| `fetch` | HTTP request (browser cookies) | `fetch: { url: "...", params: { q: "..." } }` | +| `evaluate` | Run JavaScript in page | `evaluate: \| (async () => { ... })()` | +| `select` | Extract JSON path | `select: data.items` | +| `map` | Map fields | `map: { title: "${{ item.title }}" }` | +| `filter` | Filter items | `filter: item.score > 100` | +| `sort` | Sort items | `sort: { by: score, order: desc }` | +| `limit` | Cap result count | `limit: ${{ args.limit }}` | +| `intercept` | Declarative XHR capture | `intercept: { trigger: "navigate:...", capture: "api/hot" }` | +| `tap` | Store action + XHR capture | `tap: { store: "feed", action: "fetchFeeds", capture: "homefeed" }` | +| `snapshot` | Page accessibility tree | `snapshot: { interactive: true }` | +| `click` | Click element | `click: ${{ ref }}` | +| `type` | Type text | `type: { ref: "@1", text: "hello" }` | +| `wait` | Wait for time/text | `wait: 2` or `wait: { text: "loaded" }` | +| `press` | Press key | `press: Enter` | + +## Template Syntax + +```yaml +# Arguments with defaults +${{ args.query }} +${{ args.limit | default(20) }} + +# Current item (in map/filter) +${{ item.title }} +${{ item.data.nested.field }} + +# Index (0-based) +${{ index }} +${{ index + 1 }} +``` + +## 5-Tier Authentication Strategy + +| Tier | Name | Method | Example | +|------|------|--------|---------| +| 1 | `public` | No auth, Node.js fetch | Hacker News, V2EX | +| 2 | `cookie` | Browser fetch with `credentials: include` | Bilibili, Zhihu | +| 3 | `header` | Custom headers (ct0, Bearer) | Twitter GraphQL | +| 4 | `intercept` | XHR interception + store mutation | 小红书 Pinia | +| 5 | `ui` | Full UI automation (click/type/scroll) | Last resort | + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `OPENCLI_DAEMON_PORT` | 19825 | Daemon listen port | +| `OPENCLI_BROWSER_CONNECT_TIMEOUT` | 30 | Browser connection timeout (sec) | +| `OPENCLI_BROWSER_COMMAND_TIMEOUT` | 45 | Command execution timeout (sec) | +| `OPENCLI_BROWSER_EXPLORE_TIMEOUT` | 120 | Explore timeout (sec) | +| `OPENCLI_VERBOSE` | — | Show daemon/extension logs | + +## Troubleshooting + +| Issue | Solution | +|-------|----------| +| `npx not found` | Install Node.js: `brew install node` | +| `Extension not connected` | 1) Chrome must be open 2) Install opencli Browser Bridge extension | +| `Target page context` error | Add `navigate:` step before `evaluate:` in YAML | +| Empty table data | Check if evaluate returns correct data path | +| Daemon issues | `curl localhost:19825/status` to check, `curl localhost:19825/logs` for extension logs | diff --git a/skills/cli/SKILL.md b/skills/cli/SKILL.md new file mode 100644 index 00000000..138ca3c8 --- /dev/null +++ b/skills/cli/SKILL.md @@ -0,0 +1,594 @@ +--- +name: opencli-cli +description: "OpenCLI command reference — 60+ website and desktop app adapters. Use when the user wants to fetch data from websites (Twitter, GitHub, HN, etc.), interact with desktop apps, or manage opencli commands." +allowed-tools: Bash(opencli:*) +--- +# OpenCLI + +> Make any website or Electron App your CLI. Reuse Chrome login, zero risk, AI-powered discovery. + +> [!CAUTION] +> **AI Agent 必读:创建或修改任何适配器之前,你必须先阅读 [CLI-EXPLORER.md](./CLI-EXPLORER.md)!** +> 该文档包含完整的 API 发现工作流(必须使用浏览器探索)、5 级认证策略决策树、平台 SDK 速查表、`tap` 步骤调试流程、分页 API 模板、级联请求模式、以及常见陷阱。 +> **本文件(SKILL.md)仅提供命令参考和简化模板,不足以正确开发适配器。** + +> [!IMPORTANT] +> 创建或修改 adapter 时,再额外遵守 3 条收口规则: +> 1. 主参数优先用 positional arg,不要把 `query` / `id` / `url` 默认做成 `--query` / `--id` / `--url` +> 2. 预期中的 adapter 失败优先抛 `CliError` 子类,不要直接 throw 原始 `Error` +> 3. 新增 adapter 或新增用户可发现命令时,同步更新 adapter docs、`docs/adapters/index.md`、sidebar,以及 README/README.zh-CN 中受影响的入口 + +## Install & Run + +```bash +# npm global install (recommended) +npm install -g @jackwener/opencli +opencli + +# Or from source +cd ~/code/opencli && npm install +npx tsx src/main.ts + +# Update to latest +npm update -g @jackwener/opencli +``` + +## Prerequisites + +Browser commands require: +1. Chrome browser running **(logged into target sites)** +2. **opencli Browser Bridge** Chrome extension installed (load `extension/` as unpacked in `chrome://extensions`) +3. No further setup needed — the daemon auto-starts on first browser command + +> **Note**: You must be logged into the target website in Chrome before running commands. Tabs opened during command execution are auto-closed afterwards. + +Public API commands (`hackernews`, `v2ex`) need no browser. + +## Commands Reference + +### Data Commands + +```bash +# Bilibili (browser) +opencli bilibili hot --limit 10 # B站热门视频 +opencli bilibili search "rust" # 搜索视频 (query positional) +opencli bilibili me # 我的信息 +opencli bilibili favorite # 我的收藏 +opencli bilibili history --limit 20 # 观看历史 +opencli bilibili feed --limit 10 # 动态时间线 +opencli bilibili user-videos --uid 12345 # 用户投稿 +opencli bilibili subtitle --bvid BV1xxx # 获取视频字幕 (支持 --lang zh-CN) +opencli bilibili dynamic --limit 10 # 动态 +opencli bilibili ranking --limit 10 # 排行榜 +opencli bilibili following --limit 20 # 我的关注列表 (支持 --uid 查看他人) + +# 知乎 (browser) +opencli zhihu hot --limit 10 # 知乎热榜 +opencli zhihu search "AI" # 搜索 (query positional) +opencli zhihu question 34816524 # 问题详情和回答 (id positional) + +# 小红书 (browser) +opencli xiaohongshu search "美食" # 搜索笔记 (query positional) +opencli xiaohongshu notifications # 通知(mentions/likes/connections) +opencli xiaohongshu feed --limit 10 # 推荐 Feed +opencli xiaohongshu user xxx # 用户主页 (id positional) +opencli xiaohongshu creator-notes --limit 10 # 创作者笔记列表 +opencli xiaohongshu creator-note-detail --note-id xxx # 笔记详情 +opencli xiaohongshu creator-notes-summary # 笔记数据概览 +opencli xiaohongshu creator-profile # 创作者资料 +opencli xiaohongshu creator-stats # 创作者数据统计 + +# 雪球 Xueqiu (browser) +opencli xueqiu hot-stock --limit 10 # 雪球热门股票榜 +opencli xueqiu stock --symbol SH600519 # 查看股票实时行情 +opencli xueqiu watchlist # 获取自选股/持仓列表 +opencli xueqiu feed # 我的关注 timeline +opencli xueqiu hot --limit 10 # 雪球热榜 +opencli xueqiu search "特斯拉" # 搜索 (query positional) +opencli xueqiu earnings-date SH600519 # 股票财报发布日期 (symbol positional) +opencli xueqiu fund-holdings # 蛋卷基金持仓明细 (支持 --account 过滤) +opencli xueqiu fund-snapshot # 蛋卷基金快照(总资产、子账户、持仓) + +# GitHub (via gh External CLI) +opencli gh repo list # 列出仓库 (passthrough to gh) +opencli gh pr list --limit 5 # PR 列表 +opencli gh issue list # Issue 列表 + +# Twitter/X (browser) +opencli twitter trending --limit 10 # 热门话题 +opencli twitter bookmarks --limit 20 # 获取收藏的书签推文 +opencli twitter search "AI" # 搜索推文 (query positional) +opencli twitter profile elonmusk # 用户资料 +opencli twitter timeline --limit 20 # 时间线 +opencli twitter thread 1234567890 # 推文 thread(原文 + 回复) +opencli twitter article 1891511252174299446 # 推文长文内容 +opencli twitter follow elonmusk # 关注用户 +opencli twitter unfollow elonmusk # 取消关注 +opencli twitter bookmark https://x.com/... # 收藏推文 +opencli twitter unbookmark https://x.com/... # 取消收藏 +opencli twitter post "Hello world" # 发布推文 (text positional) +opencli twitter like https://x.com/... # 点赞推文 (url positional) +opencli twitter reply https://x.com/... "Nice!" # 回复推文 (url + text positional) +opencli twitter delete https://x.com/... # 删除推文 (url positional) +opencli twitter block elonmusk # 屏蔽用户 (username positional) +opencli twitter unblock elonmusk # 取消屏蔽 (username positional) +opencli twitter followers elonmusk # 用户的粉丝列表 (user positional) +opencli twitter following elonmusk # 用户的关注列表 (user positional) +opencli twitter notifications --limit 20 # 通知列表 +opencli twitter hide-reply https://x.com/... # 隐藏回复 (url positional) +opencli twitter download elonmusk # 下载用户媒体 (username positional, 支持 --tweet-url) +opencli twitter accept "群,微信" # 自动接受含关键词的 DM 请求 (query positional) +opencli twitter reply-dm "消息内容" # 批量回复 DM (text positional) + +# Reddit (browser) +opencli reddit hot --limit 10 # 热门帖子 +opencli reddit hot --subreddit programming # 指定子版块 +opencli reddit frontpage --limit 10 # 首页 /r/all +opencli reddit popular --limit 10 # /r/popular 热门 +opencli reddit search "AI" --sort top --time week # 搜索(支持排序+时间过滤) +opencli reddit subreddit rust --sort top --time month # 子版块浏览(支持时间过滤) +opencli reddit read --post-id 1abc123 # 阅读帖子 + 评论 +opencli reddit user spez # 用户资料(karma、注册时间) +opencli reddit user-posts spez # 用户发帖历史 +opencli reddit user-comments spez # 用户评论历史 +opencli reddit upvote --post-id xxx --direction up # 投票(up/down/none) +opencli reddit save --post-id xxx # 收藏帖子 +opencli reddit comment --post-id xxx "Great!" # 发表评论 (text positional) +opencli reddit subscribe --subreddit python # 订阅子版块 +opencli reddit saved --limit 10 # 我的收藏 +opencli reddit upvoted --limit 10 # 我的赞 + +# V2EX (public + browser) +opencli v2ex hot --limit 10 # 热门话题 +opencli v2ex latest --limit 10 # 最新话题 +opencli v2ex topic 1024 # 主题详情 (id positional) +opencli v2ex daily # 每日签到 (browser) +opencli v2ex me # 我的信息 (browser) +opencli v2ex notifications --limit 10 # 通知 (browser) +opencli v2ex node python # 节点话题列表 (name positional) +opencli v2ex nodes --limit 30 # 所有节点列表 +opencli v2ex member username # 用户资料 (username positional) +opencli v2ex user username # 用户发帖列表 (username positional) +opencli v2ex replies 1024 # 主题回复列表 (id positional) + +# Hacker News (public) +opencli hackernews top --limit 10 # Top stories +opencli hackernews new --limit 10 # Newest stories +opencli hackernews best --limit 10 # Best stories +opencli hackernews ask --limit 10 # Ask HN posts +opencli hackernews show --limit 10 # Show HN posts +opencli hackernews jobs --limit 10 # Job postings +opencli hackernews search "rust" # 搜索 (query positional) +opencli hackernews user dang # 用户资料 (username positional) + +# BBC (public) +opencli bbc news --limit 10 # BBC News RSS headlines + +# 微博 (browser) +opencli weibo hot --limit 10 # 微博热搜 + +# BOSS直聘 (browser) +opencli boss search "AI agent" # 搜索职位 (query positional) +opencli boss detail --security-id xxx # 职位详情 +opencli boss recommend --limit 10 # 推荐职位 +opencli boss joblist --limit 10 # 职位列表 +opencli boss greet --security-id xxx # 打招呼 +opencli boss batchgreet --job-id xxx # 批量打招呼 +opencli boss send --uid xxx "消息内容" # 发消息 (text positional) +opencli boss chatlist --limit 10 # 聊天列表 +opencli boss chatmsg --security-id xxx # 聊天记录 +opencli boss invite --security-id xxx # 邀请沟通 +opencli boss mark --security-id xxx # 标记管理 +opencli boss exchange --security-id xxx # 交换联系方式 +opencli boss resume # 简历管理 +opencli boss stats # 数据统计 + +# YouTube (browser) +opencli youtube search "rust" # 搜索视频 (query positional) +opencli youtube video "https://www.youtube.com/watch?v=xxx" # 视频元数据 +opencli youtube transcript "https://www.youtube.com/watch?v=xxx" # 获取视频字幕/转录 +opencli youtube transcript "xxx" --lang zh-Hans --mode raw # 指定语言 + 原始时间戳模式 + +# Yahoo Finance (browser) +opencli yahoo-finance quote --symbol AAPL # 股票行情 + +# Sina Finance +opencli sinafinance news --limit 10 --type 1 # 7x24实时快讯 (0=全部 1=A股 2=宏观 3=公司 4=数据 5=市场 6=国际 7=观点 8=央行 9=其它) + +# Reuters (browser) +opencli reuters search "AI" # 路透社搜索 (query positional) + +# 什么值得买 (browser) +opencli smzdm search "耳机" # 搜索好价 (query positional) + +# 携程 (browser) +opencli ctrip search "三亚" # 搜索目的地 (query positional) + +# Antigravity (Electron/CDP) +opencli antigravity status # 检查 CDP 连接 +opencli antigravity send "hello" # 发送文本到当前 agent 聊天框 +opencli antigravity read # 读取整个聊天记录面板 +opencli antigravity new # 清空聊天、开启新对话 +opencli antigravity dump # 导出 DOM 和快照调试信息 +opencli antigravity extract-code # 自动抽取 AI 回复中的代码块 +opencli antigravity model claude # 切换底层模型 +opencli antigravity watch # 流式监听增量消息 + +# Barchart (browser) +opencli barchart quote --symbol AAPL # 股票行情 +opencli barchart options --symbol AAPL # 期权链 +opencli barchart greeks --symbol AAPL # 期权 Greeks +opencli barchart flow --limit 20 # 异常期权活动 + +# Jike 即刻 (browser) +opencli jike feed --limit 10 # 动态流 +opencli jike search "AI" # 搜索 (query positional) +opencli jike create "内容" # 发布动态 (text positional) +opencli jike like xxx # 点赞 (id positional) +opencli jike comment xxx "评论" # 评论 (id + text positional) +opencli jike repost xxx # 转发 (id positional) +opencli jike notifications # 通知 + +# Linux.do (public + browser) +opencli linux-do hot --limit 10 # 热门话题 +opencli linux-do latest --limit 10 # 最新话题 +opencli linux-do search "rust" # 搜索 (query positional) +opencli linux-do topic 1024 # 主题详情 (id positional) +opencli linux-do categories --limit 20 # 分类列表 (browser) +opencli linux-do category dev 7 # 分类内话题 (slug + id positional, browser) + +# StackOverflow (public) +opencli stackoverflow hot --limit 10 # 热门问题 +opencli stackoverflow search "typescript" # 搜索 (query positional) +opencli stackoverflow bounties --limit 10 # 悬赏问题 + +# WeRead 微信读书 (browser) +opencli weread shelf --limit 10 # 书架 +opencli weread search "AI" # 搜索图书 (query positional) +opencli weread book xxx # 图书详情 (book-id positional) +opencli weread highlights xxx # 划线笔记 (book-id positional) +opencli weread notes xxx # 想法笔记 (book-id positional) +opencli weread ranking --limit 10 # 排行榜 + +# Jimeng 即梦 AI (browser) +opencli jimeng generate --prompt "描述" # AI 生图 +opencli jimeng history --limit 10 # 生成历史 + +# Yollomi yollomi.com (browser — 需在 Chrome 登录 yollomi.com,复用站点 session) +opencli yollomi models --type image # 列出图像模型与积分 +opencli yollomi generate "提示词" --model z-image-turbo # 文生图 +opencli yollomi video "提示词" --model kling-2-1 # 视频 +opencli yollomi upload ./photo.jpg # 上传得 URL,供 img2img / 工具链使用 +opencli yollomi remove-bg # 去背景(免费) +opencli yollomi edit "改成油画风格" # Qwen 图像编辑 +opencli yollomi background # AI 背景生成 (5 credits) +opencli yollomi face-swap --source --target # 换脸 (3 credits) +opencli yollomi object-remover # AI 去除物体 (3 credits) +opencli yollomi restore # AI 修复老照片 (4 credits) +opencli yollomi try-on --person --cloth # 虚拟试衣 (3 credits) +opencli yollomi upscale # AI 超分辨率 (1 credit, 支持 --scale 2/4) + +# Grok (default + explicit web) +opencli grok ask --prompt "问题" # 提问 Grok(兼容默认路径) +opencli grok ask --prompt "问题" --web # 显式 grok.com consumer web UI 路径 + +# HuggingFace (public) +opencli hf top --limit 10 # 热门模型 + +# 超星学习通 (browser) +opencli chaoxing assignments # 作业列表 +opencli chaoxing exams # 考试列表 + +# Douban 豆瓣 (browser) +opencli douban search "三体" # 搜索 (query positional) +opencli douban top250 # 豆瓣 Top 250 +opencli douban subject 1234567 # 条目详情 (id positional) +opencli douban photos 30382501 # 图片列表 / 直链(默认海报) +opencli douban download 30382501 # 下载海报 / 剧照 +opencli douban marks --limit 10 # 我的标记 +opencli douban reviews --limit 10 # 短评 + +# Facebook (browser) +opencli facebook feed --limit 10 # 动态流 +opencli facebook profile username # 用户资料 (id positional) +opencli facebook search "AI" # 搜索 (query positional) +opencli facebook friends # 好友列表 +opencli facebook groups # 群组 +opencli facebook events # 活动 +opencli facebook notifications # 通知 +opencli facebook memories # 回忆 +opencli facebook add-friend username # 添加好友 (id positional) +opencli facebook join-group groupid # 加入群组 (id positional) + +# Instagram (browser) +opencli instagram explore # 探索 +opencli instagram profile username # 用户资料 (id positional) +opencli instagram search "AI" # 搜索 (query positional) +opencli instagram user username # 用户详情 (id positional) +opencli instagram followers username # 粉丝 (id positional) +opencli instagram following username # 关注 (id positional) +opencli instagram follow username # 关注用户 (id positional) +opencli instagram unfollow username # 取消关注 (id positional) +opencli instagram like postid # 点赞 (id positional) +opencli instagram unlike postid # 取消点赞 (id positional) +opencli instagram comment postid "评论" # 评论 (id + text positional) +opencli instagram save postid # 收藏 (id positional) +opencli instagram unsave postid # 取消收藏 (id positional) +opencli instagram saved # 已收藏列表 + +# TikTok (browser) +opencli tiktok explore # 探索 +opencli tiktok search "AI" # 搜索 (query positional) +opencli tiktok profile username # 用户资料 (id positional) +opencli tiktok user username # 用户详情 (id positional) +opencli tiktok following username # 关注列表 (id positional) +opencli tiktok follow username # 关注 (id positional) +opencli tiktok unfollow username # 取消关注 (id positional) +opencli tiktok like videoid # 点赞 (id positional) +opencli tiktok unlike videoid # 取消点赞 (id positional) +opencli tiktok comment videoid "评论" # 评论 (id + text positional) +opencli tiktok save videoid # 收藏 (id positional) +opencli tiktok unsave videoid # 取消收藏 (id positional) +opencli tiktok live # 直播 +opencli tiktok notifications # 通知 +opencli tiktok friends # 朋友 + +# Medium (browser) +opencli medium feed --limit 10 # 动态流 +opencli medium search "AI" # 搜索 (query positional) +opencli medium user username # 用户主页 (id positional) + +# Substack (browser) +opencli substack feed --limit 10 # 订阅动态 +opencli substack search "AI" # 搜索 (query positional) +opencli substack publication name # 出版物详情 (id positional) + +# Sinablog 新浪博客 (browser) +opencli sinablog hot --limit 10 # 热门 +opencli sinablog search "AI" # 搜索 (query positional) +opencli sinablog article url # 文章详情 +opencli sinablog user username # 用户主页 (id positional) + +# Lobsters (public) +opencli lobsters hot --limit 10 # 热门 +opencli lobsters newest --limit 10 # 最新 +opencli lobsters active --limit 10 # 活跃 +opencli lobsters tag rust # 按标签筛选 (tag positional) + +# Google (public) +opencli google news --limit 10 # 新闻 +opencli google search "AI" # 搜索 (query positional) +opencli google suggest "AI" # 搜索建议 (query positional) +opencli google trends # 趋势 + +# DEV.to (public) +opencli devto top --limit 10 # 热门文章 +opencli devto tag javascript --limit 10 # 按标签 (tag positional) +opencli devto user username # 用户文章 (username positional) + +# Steam (public) +opencli steam top-sellers --limit 10 # 热销游戏 + +# Apple Podcasts (public) +opencli apple-podcasts top --limit 10 # 热门播客排行榜 (支持 --country us/cn/gb/jp) +opencli apple-podcasts search "科技" # 搜索播客 (query positional) +opencli apple-podcasts episodes 12345 # 播客剧集列表 (id positional, 用 search 获取 ID) + +# arXiv (public) +opencli arxiv search "attention" # 搜索论文 (query positional) +opencli arxiv paper 1706.03762 # 论文详情 (id positional) + +# Bloomberg (public RSS + browser) +opencli bloomberg main --limit 10 # Bloomberg 首页头条 (RSS) +opencli bloomberg markets --limit 10 # 市场新闻 (RSS) +opencli bloomberg tech --limit 10 # 科技新闻 (RSS) +opencli bloomberg politics --limit 10 # 政治新闻 (RSS) +opencli bloomberg economics --limit 10 # 经济新闻 (RSS) +opencli bloomberg opinions --limit 10 # 观点 (RSS) +opencli bloomberg industries --limit 10 # 行业新闻 (RSS) +opencli bloomberg businessweek --limit 10 # Businessweek (RSS) +opencli bloomberg feeds # 列出所有 RSS feed 别名 +opencli bloomberg news "https://..." # 阅读 Bloomberg 文章全文 (link positional, browser) + +# Coupang 쿠팡 (browser) +opencli coupang search "耳机" # 搜索商品 (query positional, 支持 --filter rocket) +opencli coupang add-to-cart 12345 # 加入购物车 (product-id positional, 或 --url) + +# Dictionary (public) +opencli dictionary search "serendipity" # 单词释义 (word positional) +opencli dictionary synonyms "happy" # 近义词 (word positional) +opencli dictionary examples "ubiquitous" # 例句 (word positional) + +# 豆包 Doubao Web (browser) +opencli doubao status # 检查豆包页面状态 +opencli doubao new # 新建对话 +opencli doubao send "你好" # 发送消息 (text positional) +opencli doubao read # 读取对话记录 +opencli doubao ask "问题" # 一键提问并等回复 (text positional) + +# 京东 JD (browser) +opencli jd item 100291143898 # 商品详情 (sku positional, 含价格/主图/规格) + +# LinkedIn (browser) +opencli linkedin search "AI engineer" # 搜索职位 (query positional, 支持 --location/--company/--remote) +opencli linkedin timeline --limit 20 # 首页动态流 + +# Pixiv (browser) +opencli pixiv ranking --limit 20 # 插画排行榜 (支持 --mode daily/weekly/monthly) +opencli pixiv search "風景" # 搜索插画 (query positional) +opencli pixiv user 12345 # 画师资料 (uid positional) +opencli pixiv illusts 12345 # 画师作品列表 (user-id positional) +opencli pixiv detail 12345 # 插画详情 (id positional) +opencli pixiv download 12345 # 下载插画 (illust-id positional) + +# Web (browser) +opencli web read --url "https://..." # 抓取任意网页并导出为 Markdown + +# 微信公众号 Weixin (browser) +opencli weixin download --url "https://mp.weixin.qq.com/s/xxx" # 下载公众号文章为 Markdown + +# 小宇宙 Xiaoyuzhou (public) +opencli xiaoyuzhou podcast 12345 # 播客资料 (id positional) +opencli xiaoyuzhou podcast-episodes 12345 # 播客剧集列表 (id positional) +opencli xiaoyuzhou episode 12345 # 单集详情 (id positional) + +# Wikipedia (public) +opencli wikipedia search "AI" # 搜索 (query positional) +opencli wikipedia summary "Python" # 摘要 (title positional) +``` + +### Desktop Adapter Commands + +```bash +# Cursor (desktop — CDP via Electron) +opencli cursor status # 检查连接 +opencli cursor send "message" # 发送消息 +opencli cursor read # 读取回复 +opencli cursor new # 新建对话 +opencli cursor dump # 导出 DOM 调试信息 +opencli cursor composer # Composer 模式 +opencli cursor model claude # 切换模型 +opencli cursor extract-code # 提取代码块 +opencli cursor ask "question" # 一键提问并等回复 +opencli cursor screenshot # 截图 +opencli cursor history # 对话历史 +opencli cursor export # 导出对话 + +# Codex (desktop — headless CLI agent) +opencli codex status # 检查连接 +opencli codex send "message" # 发送消息 +opencli codex read # 读取回复 +opencli codex new # 新建对话 +opencli codex dump # 导出调试信息 +opencli codex extract-diff # 提取 diff +opencli codex model gpt-4 # 切换模型 +opencli codex ask "question" # 一键提问并等回复 +opencli codex screenshot # 截图 +opencli codex history # 对话历史 +opencli codex export # 导出对话 + +# ChatGPT (desktop — macOS AppleScript/CDP) +opencli chatgpt status # 检查应用状态 +opencli chatgpt new # 新建对话 +opencli chatgpt send "message" # 发送消息 +opencli chatgpt read # 读取回复 +opencli chatgpt ask "question" # 一键提问并等回复 + +# ChatWise (desktop — multi-LLM client) +opencli chatwise status # 检查连接 +opencli chatwise new # 新建对话 +opencli chatwise send "message" # 发送消息 +opencli chatwise read # 读取回复 +opencli chatwise ask "question" # 一键提问并等回复 +opencli chatwise model claude # 切换模型 +opencli chatwise history # 对话历史 +opencli chatwise export # 导出对话 +opencli chatwise screenshot # 截图 + +# Notion (desktop — CDP via Electron) +opencli notion status # 检查连接 +opencli notion search "keyword" # 搜索页面 +opencli notion read # 读取当前页面 +opencli notion new # 新建页面 +opencli notion write "content" # 写入内容 +opencli notion sidebar # 侧边栏导航 +opencli notion favorites # 收藏列表 +opencli notion export # 导出 + +# Discord App (desktop — CDP via Electron) +opencli discord-app status # 检查连接 +opencli discord-app send "message" # 发送消息 +opencli discord-app read # 读取消息 +opencli discord-app channels # 频道列表 +opencli discord-app servers # 服务器列表 +opencli discord-app search "keyword" # 搜索 +opencli discord-app members # 成员列表 + +# Doubao App 豆包桌面版 (desktop — CDP via Electron) +opencli doubao-app status # 检查连接 +opencli doubao-app new # 新建对话 +opencli doubao-app send "message" # 发送消息 +opencli doubao-app read # 读取回复 +opencli doubao-app ask "question" # 一键提问并等回复 +opencli doubao-app screenshot # 截图 +opencli doubao-app dump # 导出 DOM 调试信息 +``` +### Management Commands + +```bash +opencli list # List all commands (including External CLIs) +opencli list --json # JSON output +opencli list -f yaml # YAML output +opencli install # Auto-install an external CLI (e.g., gh, obsidian) +opencli register # Register a local custom CLI for unified discovery +opencli validate # Validate all CLI definitions +opencli validate bilibili # Validate specific site +opencli doctor # Diagnose browser bridge (auto-starts daemon, includes live test) +``` + +### AI Agent Workflow + +```bash +# Deep Explore: network intercept → response analysis → capability inference +opencli explore --site + +# Synthesize: generate evaluate-based YAML pipelines from explore artifacts +opencli synthesize + +# Generate: one-shot explore → synthesize → register +opencli generate --goal "hot" + +# Record: YOU operate the page, opencli captures every API call → YAML candidates +# Opens the URL in automation window, injects fetch/XHR interceptor into ALL tabs, +# polls every 2s, auto-stops after 60s (or press Enter to stop early). +opencli record # 录制,site name 从域名推断 +opencli record --site mysite # 指定 site name +opencli record --timeout 120000 # 自定义超时(毫秒,默认 60000) +opencli record --poll 1000 # 缩短轮询间隔(毫秒,默认 2000) +opencli record --out .opencli/record/x # 自定义输出目录 +# Output: +# .opencli/record//captured.json ← 原始捕获数据(带 url/method/body) +# .opencli/record//candidates/*.yaml ← 高置信度候选适配器(score ≥ 8,有 array 结果) + +# Operate: AI agent autonomously controls the browser to complete tasks +# Supports Anthropic (Claude) and OpenAI (GPT) models +# Requires: OPENCLI_PROVIDER, OPENCLI_API_KEY, optionally OPENCLI_MODEL, OPENCLI_BASE_URL +opencli operate "go to HN and extract the top 5 stories" +opencli operate --url https://github.com/trending "extract top 3 repos" +opencli operate -v "fill the form with test data" # verbose: see each step +opencli operate --save-as hn/top "get top HN stories" # save as reusable skill +opencli operate --screenshot "describe this page layout" # include screenshots for LLM +opencli operate --max-steps 20 "quick task" # limit step count +# After --save-as, the skill runs without AI: +# opencli hn top + +# Strategy Cascade: auto-probe PUBLIC → COOKIE → HEADER +opencli cascade + +# Explore with interactive fuzzing (click buttons to trigger lazy APIs) +opencli explore --auto --click "字幕,CC,评论" + +# Validate: validate adapter definitions +opencli validate +``` + +## Output Formats + +All built-in commands support `--format` / `-f` with `table`, `json`, `yaml`, `md`, and `csv`. +The `list` command supports the same formats and also keeps `--json` as a compatibility alias. + +```bash +opencli list -f yaml # YAML command registry +opencli bilibili hot -f table # Default: rich table +opencli bilibili hot -f json # JSON (pipe to jq, feed to AI agent) +opencli bilibili hot -f yaml # YAML (readable structured output) +opencli bilibili hot -f md # Markdown +opencli bilibili hot -f csv # CSV +``` + +## Verbose Mode + +```bash +opencli bilibili hot -v # Show each pipeline step and data flow +``` + diff --git a/skills/operate/SKILL.md b/skills/operate/SKILL.md new file mode 100644 index 00000000..bc4c0470 --- /dev/null +++ b/skills/operate/SKILL.md @@ -0,0 +1,170 @@ +--- +name: opencli-operate +description: Browser automation via OpenCLI. Navigate websites, click elements, fill forms, extract data, and take screenshots — all using Chrome with existing login sessions. Use when the user needs to interact with web pages, fill forms, extract web data, or automate browser tasks. +allowed-tools: Bash(opencli:*) +--- + +# Browser Automation with OpenCLI + +OpenCLI provides browser automation that reuses your existing Chrome login sessions — no passwords needed. + +## Prerequisites + +```bash +opencli doctor # Verify extension + daemon + LLM connectivity +``` + +Requires: Chrome running + OpenCLI Browser Bridge extension installed. + +## Two Modes + +### Mode 1: AI Agent (fully autonomous) + +Let the AI agent complete a task end-to-end: + +```bash +opencli operate "go to Hacker News and extract the top 5 stories" +opencli operate --url https://github.com/trending "extract top 3 repos" +opencli operate -v "fill the login form with test@example.com" +``` + +Requires `OPENCLI_API_KEY` for LLM calls. See OPERATE.md for full config. + +### Mode 2: Manual Commands (Claude Code controls the loop) + +Claude Code drives the browser step-by-step using CLI commands. **No LLM API key needed** — Claude Code IS the LLM. + +#### Core Workflow + +1. **Navigate**: open a URL +2. **Inspect**: get page state with element indices +3. **Interact**: use indices to click, type, select +4. **Verify**: check state or take screenshot +5. **Repeat**: browser stays open between commands + +#### Navigation + +```bash +opencli browse open # Open URL in automation window +opencli browse back # Go back in history +opencli browse scroll down # Scroll down +opencli browse scroll up # Scroll up +``` + +#### Page State — always run this first to get element indices + +```bash +opencli browse state # Returns: URL, title, interactive elements with [N] indices +opencli browse screenshot [path.png] # Take screenshot (base64 if no path) +``` + +#### Interactions — use indices from state + +```bash +opencli browse click # Click element [N] +opencli browse type "text" # Click element [N], then type text +opencli browse select "option" # Select dropdown option +opencli browse keys "Enter" # Press keyboard key +opencli browse eval "document.title" # Execute JavaScript, return result +``` + +#### Data Extraction + +```bash +opencli browse eval "document.querySelectorAll('.item').length" +opencli browse eval "JSON.stringify([...document.querySelectorAll('h2')].map(e => e.textContent))" +``` + +#### Example: Extract HN Stories + +```bash +opencli browse open https://news.ycombinator.com +opencli browse state # See elements: [1] a "Story 1", [2] a "Story 2"... +opencli browse eval "JSON.stringify([...document.querySelectorAll('.titleline a')].slice(0,5).map(a => ({title: a.textContent, url: a.href})))" +``` + +#### Example: Fill a Form + +```bash +opencli browse open https://httpbin.org/forms/post +opencli browse state # See: [3] input "Customer Name", [4] input "Telephone"... +opencli browse type 3 "OpenCLI" +opencli browse type 4 "555-0100" +opencli browse click 7 # Click submit (DON'T if user said "don't submit") +``` + +## Saving as Reusable CLI + +After successfully completing a browser task, save it as a permanent CLI command: + +### Via operate --save-as + +```bash +opencli operate --save-as hn/top "get top 5 HN stories" --url https://news.ycombinator.com +# Future: opencli hn top (no LLM needed) +``` + +### Via Claude Code (recommended — higher quality) + +After manually completing a task with `opencli browse` commands, write a TS adapter: + +```typescript +// ~/.opencli/clis/hn/top.ts +import { cli, Strategy } from '@jackwener/opencli/registry'; + +cli({ + site: 'hn', + name: 'top', + description: 'Top Hacker News stories', + domain: 'news.ycombinator.com', + strategy: Strategy.PUBLIC, + browser: false, + args: [{ name: 'limit', type: 'int', default: 5 }], + columns: ['rank', 'title', 'score', 'url'], + func: async (_page, kwargs) => { + const resp = await fetch('https://hacker-news.firebaseio.com/v0/topstories.json'); + const ids = await resp.json(); + const items = await Promise.all( + ids.slice(0, kwargs.limit).map(async (id, i) => { + const item = await (await fetch(`https://hacker-news.firebaseio.com/v0/item/${id}.json`)).json(); + return { rank: i + 1, title: item.title, score: item.score, url: item.url }; + }) + ); + return items; + }, +}); +``` + +Save to `~/.opencli/clis//.ts`. The adapter is immediately available as `opencli `. + +### Adapter Strategy Guide + +Choose the simplest strategy that works: + +| Strategy | When | browser: | +|----------|------|----------| +| `Strategy.PUBLIC` | No auth needed, public API available | `false` | +| `Strategy.COOKIE` | Needs login cookies, fetch with `credentials: 'include'` | `true` | +| `Strategy.INTERCEPT` | SPA that triggers API on navigation | `true` | +| `Strategy.UI` | Must interact with DOM directly | `true` | + +**Always prefer API over UI** — if you discovered an API endpoint during browsing, use it directly with `fetch()`. + +## Configuration + +```bash +# For operate mode (AI agent) +export OPENCLI_PROVIDER=anthropic # or openai +export OPENCLI_MODEL=sonnet # model alias +export OPENCLI_API_KEY=sk-ant-... # API key +export OPENCLI_BASE_URL=https://... # optional proxy + +# For browse mode (manual commands) +# No LLM config needed — just Chrome + extension +``` + +## Troubleshooting + +- **"Extension not connected"** → `opencli doctor` +- **"attach failed: chrome-extension://"** → Disable 1Password or other debugger extensions temporarily +- **Element not found** → `opencli browse scroll down` then `opencli browse state` diff --git a/src/agent/action-executor.ts b/src/agent/action-executor.ts new file mode 100644 index 00000000..8bd549ee --- /dev/null +++ b/src/agent/action-executor.ts @@ -0,0 +1,423 @@ +/** + * Action Executor — dispatches parsed LLM actions to the browser via IPage. + * + * Prioritizes native CDP Input events (nativeClick/nativeType) when available, + * falls back to JS injection (page.click/page.typeText) for compatibility. + */ + +import type { IPage } from '../types.js'; +import type { AgentAction, ActionResult } from './types.js'; +import type { ElementInfo } from './dom-context.js'; + +export class ActionExecutor { + constructor(private page: IPage) {} + + async execute( + action: AgentAction, + elementMap: Map, + ): Promise { + try { + switch (action.type) { + case 'click': + return await this.executeClick(action, elementMap); + case 'type': + return await this.executeType(action, elementMap); + case 'navigate': + return await this.executeNavigate(action); + case 'scroll': + return await this.executeScroll(action, elementMap); + case 'wait': + return await this.executeWait(action); + case 'extract': + return await this.executeExtract(action); + case 'go_back': + return await this.executeGoBack(); + case 'press_key': + return await this.executePressKey(action); + case 'select_dropdown': + return await this.executeSelectDropdown(action, elementMap); + case 'switch_tab': + return await this.executeSwitchTab(action); + case 'open_tab': + return await this.executeOpenTab(action); + case 'close_tab': + return await this.executeCloseTab(); + case 'search_page': + return await this.executeSearchPage(action); + case 'done': + return { action, success: true, extractedContent: action.result }; + default: + return { action, success: false, error: `Unknown action type: ${(action as AgentAction).type}` }; + } + } catch (err) { + return { + action, + success: false, + error: err instanceof Error ? err.message : String(err), + }; + } + } + + // ── Click ─────────────────────────────────────────────────────────────── + + private async executeClick( + action: Extract, + elementMap: Map, + ): Promise { + const el = elementMap.get(action.index); + if (!el) { + return { action, success: false, error: `Element [${action.index}] not found in current snapshot` }; + } + + // Auto-detect — use "select_dropdown" action instead` }; + } + + await this.clickElement(action.index, el); + await this.page.wait(0.5); + return { action, success: true }; + } + + /** Scroll an element into the viewport center before interacting with it. */ + private async scrollIntoView(index: number): Promise { + await this.page.evaluate(` + (function() { + var el = document.querySelector('[data-opencli-ref="${index}"]'); + if (el) el.scrollIntoView({ block: 'nearest', behavior: 'instant' }); + })() + `); + await this.page.wait(0.3); + } + + /** Click an element: scroll into view, try CDP click, fallback to JS click, final fallback to evaluate */ + private async clickElement(index: number, el: ElementInfo): Promise { + // Always scroll into view first — CDP mouse events only work within the viewport + await this.scrollIntoView(index); + + // Strategy 1: Native CDP click with fresh coordinates + if (this.page.nativeClick) { + try { + const freshPos = await this.page.evaluate(` + (function() { + var el = document.querySelector('[data-opencli-ref="${index}"]'); + if (!el) return null; + var r = el.getBoundingClientRect(); + return { x: r.x + r.width / 2, y: r.y + r.height / 2 }; + })() + `) as { x: number; y: number } | null; + + if (freshPos && freshPos.x > 0 && freshPos.y > 0) { + await this.page.nativeClick(freshPos.x, freshPos.y); + return; + } + } catch { + // CDP click failed — try next strategy + } + } + + // Strategy 2: JS click via page.click (uses dom-helpers clickJs) + try { + await this.page.click(String(index)); + return; + } catch { + // page.click also failed — try final fallback + } + + // Strategy 3: Direct JS evaluate click (most robust, works even if ref format differs) + await this.page.evaluate(` + (function() { + var el = document.querySelector('[data-opencli-ref="${index}"]'); + if (el) { el.click(); return; } + // Fallback: try finding by link text if it's a link + var links = document.querySelectorAll('a'); + for (var i = 0; i < links.length; i++) { + if (links[i].textContent.trim().includes(${JSON.stringify(el.text.slice(0, 30))})) { + links[i].click(); + return; + } + } + })() + `); + } + + /** Type into an element: try native CDP, fallback to JS injection */ + private async typeIntoElement(index: number, text: string): Promise { + if (this.page.nativeType) { + try { + await this.page.nativeType(text); + return; + } catch { + // CDP type failed — fallback to JS + } + } + await this.page.typeText(String(index), text); + } + + // ── Type ──────────────────────────────────────────────────────────────── + + private async executeType( + action: Extract, + elementMap: Map, + ): Promise { + const el = elementMap.get(action.index); + if (!el) { + return { action, success: false, error: `Element [${action.index}] not found in current snapshot` }; + } + + // Click to focus the element first, then verify focus + await this.clickElement(action.index, el); + await this.page.wait(0.2); + + // Verify the element is actually focused — if not, force focus via JS + const isFocused = await this.page.evaluate(` + (function() { + var el = document.querySelector('[data-opencli-ref="${action.index}"]'); + if (!el) return false; + if (document.activeElement !== el) { el.focus(); } + return document.activeElement === el; + })() + `) as boolean; + if (!isFocused) { + return { action, success: false, error: `Element [${action.index}] could not be focused` }; + } + + // Clear existing content — use JS selectAll to avoid macOS Cmd vs Ctrl issue + await this.page.evaluate(` + (function() { + var el = document.querySelector('[data-opencli-ref="${action.index}"]'); + if (el && (el.tagName === 'INPUT' || el.tagName === 'TEXTAREA')) { + el.select(); + } else if (el) { + document.execCommand('selectAll'); + } + })() + `); + await this.page.wait(0.1); + + // Type the text + await this.typeIntoElement(action.index, action.text); + + // Post-type value verification: check if the field actually has the expected value + let mismatchNote = ''; + try { + const actualValue = await this.page.evaluate(` + (function() { + var el = document.querySelector('[data-opencli-ref="${action.index}"]'); + return el ? (el.value || el.textContent || '').trim() : null; + })() + `) as string | null; + if (actualValue !== null && actualValue !== action.text) { + mismatchNote = `\n⚠️ Note: field's actual value "${actualValue}" differs from typed text "${action.text}". The page may have reformatted or autocompleted your input.`; + } + } catch { /* non-fatal */ } + + // Autocomplete fields: wait for JS dropdown to populate, then notify LLM + if (el.isAutocomplete) { + await this.page.wait(0.4); + return { + action, + success: true, + extractedContent: '⚠️ This is an autocomplete field. Wait for suggestions to appear (new elements marked with *[]), then click the correct suggestion instead of pressing Enter.' + mismatchNote, + }; + } + + // Optionally press Enter + if (action.pressEnter) { + await this.page.wait(0.2); + if (this.page.nativeKeyPress) { + await this.page.nativeKeyPress('Enter'); + } else { + await this.page.pressKey('Enter'); + } + } + + return { action, success: true, extractedContent: mismatchNote || undefined }; + } + + // ── Navigate ──────────────────────────────────────────────────────────── + + private async executeNavigate( + action: Extract, + ): Promise { + await this.page.goto(action.url); + await this.page.wait(2); + return { action, success: true }; + } + + // ── Scroll ────────────────────────────────────────────────────────────── + + private async executeScroll( + action: Extract, + elementMap: Map, + ): Promise { + const amount = action.amount ?? 500; + + if (action.index !== undefined) { + // Scroll within a specific element + const el = elementMap.get(action.index); + if (el) { + const scrollAmount = action.direction === 'up' ? -amount : amount; + await this.page.evaluate(` + (function() { + var els = document.querySelectorAll('[data-opencli-ref=' + ${JSON.stringify(String(action.index))} + ']'); + if (els[0]) els[0].scrollBy(0, ${JSON.stringify(scrollAmount)}); + })() + `); + } + } else { + await this.page.scroll(action.direction, amount); + } + + await this.page.wait(0.5); + return { action, success: true }; + } + + // ── Wait ──────────────────────────────────────────────────────────────── + + private async executeWait( + action: Extract, + ): Promise { + const seconds = Math.min(action.seconds ?? 2, 10); // Cap at 10s + await this.page.wait(seconds); + return { action, success: true }; + } + + // ── Extract ───────────────────────────────────────────────────────────── + + private async executeExtract( + action: Extract, + ): Promise { + const content = await this.page.evaluate(` + (function() { + var body = document.body; + if (!body) return ''; + return body.innerText.slice(0, 5000); + })() + `) as string; + + return { + action, + success: true, + extractedContent: content || '(empty page)', + }; + } + + // ── Go Back ───────────────────────────────────────────────────────────── + + private async executeGoBack(): Promise { + await this.page.evaluate('history.back()'); + await this.page.wait(2); + return { action: { type: 'go_back' }, success: true }; + } + + // ── Press Key ─────────────────────────────────────────────────────────── + + private async executePressKey( + action: Extract, + ): Promise { + if (this.page.nativeKeyPress) { + try { + await this.page.nativeKeyPress(action.key); + await this.page.wait(0.5); + return { action, success: true }; + } catch { + // fallback + } + } + await this.page.pressKey(action.key); + await this.page.wait(0.5); + return { action, success: true }; + } + + // ── Select Dropdown ───────────────────────────────────────────────────── + + private async executeSelectDropdown( + action: Extract, + elementMap: Map, + ): Promise { + const el = elementMap.get(action.index); + if (!el) { + return { action, success: false, error: `Element [${action.index}] not found` }; + } + + const indexStr = JSON.stringify(String(action.index)); + const optionText = JSON.stringify(action.option); + const result = await this.page.evaluate(` + (function() { + var selects = document.querySelectorAll('[data-opencli-ref=' + ${indexStr} + ']'); + var sel = selects[0]; + if (!sel || sel.tagName !== 'SELECT') return { error: 'Not a +Each step you receive: +1. Your previous evaluation and action results +2. Current page state (URL, title, viewport, interactive element count) +3. The page DOM as an indexed element tree +4. Optionally, a screenshot of the current page + + + +The DOM uses this notation: +- \`[N]text\` — interactive element with index N (use this index in actions) +- \`*[N]\` — NEW element that appeared since the last step +- Indentation shows nesting depth +- \`|scroll|\` prefix marks scrollable containers with scroll position info +- Only interactive and visible elements are shown + + + +You MUST respond with a JSON object containing ALL of these fields: + +{ + "evaluationPreviousGoal": "1-sentence: did the previous action succeed/fail and why", + "thinking": "Your reasoning about the current state (2-4 sentences)", + "memory": "Key facts to persist across steps (optional, update when new info discovered)", + "nextGoal": "What the next action(s) will achieve (1 sentence)", + "plan": ["remaining step 1", "remaining step 2", "..."], + "actions": [{"type": "...", ...}] +} + + + +Page-changing actions (put LAST in actions array — page will reload after these): +- {"type": "navigate", "url": "https://..."} — Go to URL +- {"type": "click", "index": N} — Click element [N] (may trigger navigation) +- {"type": "go_back"} — Go back in browser history +- {"type": "open_tab", "url": "https://..."} — Open URL in new tab +- {"type": "switch_tab", "tabIndex": N} — Switch to tab N +- {"type": "close_tab"} — Close current tab + +Safe actions (can chain multiple before a page-changing action): +- {"type": "type", "index": N, "text": "...", "pressEnter": true} — Type into element [N] +- {"type": "scroll", "direction": "down", "amount": 500} — Scroll page +- {"type": "scroll", "direction": "down", "index": N} — Scroll within element [N] +- {"type": "wait", "seconds": 2} — Wait for page to update +- {"type": "press_key", "key": "Enter"} — Press keyboard key (Enter, Escape, Tab, Control+a, etc.) +- {"type": "select_dropdown", "index": N, "option": "Option text"} — Select from dropdown +- {"type": "search_page", "query": "text"} — Search for text on the page + +Data actions: +- {"type": "extract", "goal": "what to extract"} — Extract information from page +- {"type": "done", "result": "summary", "extractedData": {...}, "success": true} — Task complete + + + +ELEMENT INTERACTION: +1. Only use element indices [N] that exist in the CURRENT DOM snapshot +2. If an element is not visible, scroll down to reveal it before interacting +3. For dropdowns ( element'), + option: z.string().describe('Option text to select'), +}); + +export const SwitchTabAction = z.object({ + type: z.literal('switch_tab'), + tabIndex: z.number().describe('Tab index to switch to'), +}); + +export const OpenTabAction = z.object({ + type: z.literal('open_tab'), + url: z.string().optional().describe('URL to open in new tab'), +}); + +export const CloseTabAction = z.object({ + type: z.literal('close_tab'), +}); + +export const SearchPageAction = z.object({ + type: z.literal('search_page'), + query: z.string().describe('Text to search for on the page'), +}); + +export const DoneAction = z.object({ + type: z.literal('done'), + result: z.string().optional().describe('Summary of what was accomplished'), + extractedData: z.unknown().optional().describe('Structured data extracted'), + success: z.boolean().optional().default(true).describe('Whether the task was completed successfully'), +}); + +export const AgentAction = z.discriminatedUnion('type', [ + ClickAction, + TypeAction, + NavigateAction, + ScrollAction, + WaitAction, + ExtractAction, + GoBackAction, + PressKeyAction, + SelectDropdownAction, + SwitchTabAction, + OpenTabAction, + CloseTabAction, + SearchPageAction, + DoneAction, +]); + +export type AgentAction = z.infer; + +// ── Agent Response Schema (with planning + self-evaluation) ───────────────── + +export const AgentResponse = z.object({ + evaluationPreviousGoal: z.string().describe('1-sentence evaluation: did the previous action succeed or fail, and why?'), + thinking: z.string().describe('Your reasoning about the current state and what to do next'), + memory: z.string().optional().describe('Important information to remember across steps'), + nextGoal: z.string().describe('What the next action will achieve'), + plan: z.array(z.string()).optional().describe('Updated task plan — list of remaining steps'), + actions: z.array(AgentAction).min(1).max(5).describe('Actions to execute'), +}); + +export type AgentResponse = z.infer; + +// ── Action Result ─────────────────────────────────────────────────────────── + +export interface ActionResult { + action: AgentAction; + success: boolean; + error?: string; + extractedContent?: string; +} + +// ── Planning ──────────────────────────────────────────────────────────────── + +export type PlanItemStatus = 'pending' | 'current' | 'done' | 'skipped'; + +export interface PlanItem { + text: string; + status: PlanItemStatus; +} + +// ── Agent Configuration ───────────────────────────────────────────────────── + +export interface AgentConfig { + task: string; + startUrl?: string; + maxSteps?: number; + maxConsecutiveErrors?: number; + useScreenshot?: boolean; + model?: string; + verbose?: boolean; + workspace?: string; + record?: boolean; + saveAs?: string; + /** LLM call timeout in ms (default 60000) */ + llmTimeout?: number; + /** Sensitive data patterns to mask before sending to LLM */ + sensitivePatterns?: Record; +} + +// ── Agent Result ──────────────────────────────────────────────────────────── + +export interface AgentResult { + success: boolean; + status: 'done' | 'error' | 'max_steps'; + result?: string; + extractedData?: unknown; + stepsCompleted: number; + tokenUsage: { input: number; output: number; estimatedCost: number }; + trace?: import('./trace-recorder.js').RichTrace; +} + +// ── Agent Step ────────────────────────────────────────────────────────────── + +export interface AgentStep { + stepNumber: number; + url: string; + response: AgentResponse; + results: ActionResult[]; +} diff --git a/src/browser/daemon-client.ts b/src/browser/daemon-client.ts index 97e7b782..93b132f5 100644 --- a/src/browser/daemon-client.ts +++ b/src/browser/daemon-client.ts @@ -19,7 +19,8 @@ function generateId(): string { export interface DaemonCommand { id: string; - action: 'exec' | 'navigate' | 'tabs' | 'cookies' | 'screenshot' | 'close-window' | 'sessions' | 'set-file-input' | 'bind-current'; + + action: 'exec' | 'navigate' | 'tabs' | 'cookies' | 'screenshot' | 'close-window' | 'sessions' | 'set-file-input' | 'bind-current' | 'cdp'; tabId?: number; code?: string; workspace?: string; @@ -32,10 +33,13 @@ export interface DaemonCommand { format?: 'png' | 'jpeg'; quality?: number; fullPage?: boolean; + /** Local file paths for set-file-input action */ files?: string[]; /** CSS selector for file input element (set-file-input action) */ selector?: string; + cdpMethod?: string; + cdpParams?: Record; } export interface DaemonResult { diff --git a/src/browser/page.ts b/src/browser/page.ts index 4b10c58b..7afddaa2 100644 --- a/src/browser/page.ts +++ b/src/browser/page.ts @@ -378,6 +378,53 @@ export class Page implements IPage { ...this._cmdOpts(), }); } + + async cdp(method: string, params: Record = {}): Promise { + return sendCommand('cdp', { + cdpMethod: method, + cdpParams: params, + ...this._cmdOpts(), + }); + } + + async nativeClick(x: number, y: number): Promise { + await this.cdp('Input.dispatchMouseEvent', { + type: 'mousePressed', + x, y, + button: 'left', + clickCount: 1, + }); + await this.cdp('Input.dispatchMouseEvent', { + type: 'mouseReleased', + x, y, + button: 'left', + clickCount: 1, + }); + } + + async nativeType(text: string): Promise { + // Use Input.insertText for reliable Unicode/CJK text insertion + await this.cdp('Input.insertText', { text }); + } + + async nativeKeyPress(key: string, modifiers: string[] = []): Promise { + let modifierFlags = 0; + for (const mod of modifiers) { + if (mod === 'Alt') modifierFlags |= 1; + if (mod === 'Ctrl') modifierFlags |= 2; + if (mod === 'Meta') modifierFlags |= 4; + if (mod === 'Shift') modifierFlags |= 8; + } + await this.cdp('Input.dispatchKeyEvent', { + type: 'keyDown', + key, + modifiers: modifierFlags, + }); + await this.cdp('Input.dispatchKeyEvent', { + type: 'keyUp', + key, + modifiers: modifierFlags, + }); + } } -// (End of file) diff --git a/src/cli.ts b/src/cli.ts index 4610c520..fa711b8d 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -11,12 +11,20 @@ import { type CliCommand, fullName, getRegistry, strategyLabel } from './registr import { serializeCommand, formatArgSummary } from './serialization.js'; import { render as renderOutput } from './output.js'; import { getBrowserFactory, browserSession } from './runtime.js'; +import type { IPage } from './types.js'; import { PKG_VERSION } from './version.js'; import { printCompletionScript } from './completion.js'; import { loadExternalClis, executeExternalCli, installExternalCli, registerExternalCli, isBinaryInstalled } from './external.js'; import { registerAllCommands } from './commanderAdapter.js'; import { EXIT_CODES, getErrorMessage } from './errors.js'; +/** Create a browser page for browse commands. Uses 'browse' workspace for session persistence. */ +async function getBrowsePage(): Promise { + const { BrowserBridge } = await import('./browser/index.js'); + const bridge = new BrowserBridge(); + return bridge.connect({ timeout: 30, workspace: 'browse' }); +} + export function runCli(BUILTIN_CLIS: string, USER_CLIS: string): void { const program = new Command(); // enablePositionalOptions: prevents parent from consuming flags meant for subcommands; @@ -228,6 +236,185 @@ export function runCli(BUILTIN_CLIS: string, USER_CLIS: string): void { console.log(renderCascadeResult(result)); }); + // ── Built-in: operate (AI Agent) ──────────────────────────────────────────── + + program + .command('operate') + .alias('op') + .description('AI agent: autonomously operate a website to complete a task') + .argument('', 'Natural language task description') + .option('--url ', 'Starting URL (agent navigates if omitted)') + .option('--max-steps ', 'Maximum agent steps', '50') + .option('--screenshot', 'Include screenshots in LLM context', false) + .option('--record', 'Record action trace', false) + .option('--save-as ', 'Save as reusable CLI skill after completion') + .option('-v, --verbose', 'Show step-by-step reasoning', false) + .action(async (task, opts) => { + const { runAgent, renderAgentResult } = await import('./agent/cli-handler.js'); + const { LLMClient } = await import('./agent/llm-client.js'); + const llm = new LLMClient(); + const modelDisplay = llm.getModelDisplay(); + const result = await runAgent({ + task, + startUrl: opts.url, + maxSteps: parseInt(opts.maxSteps, 10), + useScreenshot: opts.screenshot, + record: opts.record, + saveAs: opts.saveAs, + verbose: opts.verbose, + BrowserFactory: getBrowserFactory(), + }); + console.log(renderAgentResult(result, modelDisplay)); + process.exitCode = result.success ? EXIT_CODES.SUCCESS : EXIT_CODES.GENERIC_ERROR; + }); + + // ── Built-in: browse (manual browser control for Claude Code) ────────────── + + const browse = program + .command('browse') + .description('Manual browser control — navigate, click, type, extract (no LLM needed)'); + + browse + .command('open') + .argument('') + .description('Open URL in automation window') + .action(async (url) => { + const page = await getBrowsePage(); + await page.goto(url); + await page.wait(2); + const currentUrl = await page.getCurrentUrl?.() ?? url; + console.log(`Navigated to: ${currentUrl}`); + }); + + browse + .command('state') + .description('Get page state: URL, title, interactive elements with [N] indices') + .action(async () => { + const page = await getBrowsePage(); + const snapshot = await page.snapshot({ viewportExpand: 800 }); + const url = await page.getCurrentUrl?.() ?? ''; + console.log(`URL: ${url}\n`); + console.log(typeof snapshot === 'string' ? snapshot : JSON.stringify(snapshot, null, 2)); + }); + + browse + .command('click') + .argument('', 'Element index from state') + .description('Click element by index') + .action(async (index) => { + const page = await getBrowsePage(); + await page.click(index); + console.log(`Clicked element [${index}]`); + }); + + browse + .command('type') + .argument('', 'Element index from state') + .argument('', 'Text to type') + .description('Click element, then type text') + .action(async (index, text) => { + const page = await getBrowsePage(); + await page.click(index); + await page.wait(0.2); + await page.typeText(index, text); + console.log(`Typed "${text}" into element [${index}]`); + }); + + browse + .command('select') + .argument('', 'Element index of ' }; + var match = Array.from(sel.options).find(o => o.text.trim() === ${JSON.stringify(option)} || o.value === ${JSON.stringify(option)}); + if (!match) return { error: 'Option not found', available: Array.from(sel.options).map(o => o.text.trim()) }; + var setter = Object.getOwnPropertyDescriptor(HTMLSelectElement.prototype, 'value')?.set; + if (setter) setter.call(sel, match.value); else sel.value = match.value; + sel.dispatchEvent(new Event('input', {bubbles:true})); + sel.dispatchEvent(new Event('change', {bubbles:true})); + return { selected: match.text }; + })() + `); + const r = result as { error?: string; selected?: string; available?: string[] } | null; + if (r?.error) { + console.error(`Error: ${r.error}${r.available ? ` — Available: ${r.available.join(', ')}` : ''}`); + process.exitCode = EXIT_CODES.GENERIC_ERROR; + } else { + console.log(`Selected "${r?.selected}" in element [${index}]`); + } + }); + + browse + .command('keys') + .argument('', 'Key to press (Enter, Escape, Tab, Control+a, etc.)') + .description('Press keyboard key') + .action(async (key) => { + const page = await getBrowsePage(); + await page.pressKey(key); + console.log(`Pressed: ${key}`); + }); + + browse + .command('eval') + .argument('', 'JavaScript code to evaluate') + .description('Execute JavaScript in page context, return result') + .action(async (js) => { + const page = await getBrowsePage(); + const result = await page.evaluate(js); + if (typeof result === 'string') console.log(result); + else console.log(JSON.stringify(result, null, 2)); + }); + + browse + .command('screenshot') + .argument('[path]', 'Save to file path (prints base64 if omitted)') + .description('Take screenshot') + .action(async (path) => { + const page = await getBrowsePage(); + if (path) { + await page.screenshot({ path }); + console.log(`Screenshot saved to: ${path}`); + } else { + const base64 = await page.screenshot({ format: 'png' }); + console.log(base64); + } + }); + + browse + .command('scroll') + .argument('', 'up or down') + .option('--amount ', 'Pixels to scroll', '500') + .description('Scroll page') + .action(async (direction, opts) => { + const page = await getBrowsePage(); + await page.scroll(direction, parseInt(opts.amount, 10)); + console.log(`Scrolled ${direction}`); + }); + + browse + .command('back') + .description('Go back in browser history') + .action(async () => { + const page = await getBrowsePage(); + await page.evaluate('history.back()'); + await page.wait(2); + console.log('Navigated back'); + }); + + browse + .command('close') + .description('Close the automation window') + .action(async () => { + const page = await getBrowsePage(); + await page.closeWindow?.(); + console.log('Automation window closed'); + }); + // ── Built-in: doctor / completion ────────────────────────────────────────── program diff --git a/src/doctor.ts b/src/doctor.ts index 00827a76..f765dca6 100644 --- a/src/doctor.ts +++ b/src/doctor.ts @@ -26,12 +26,21 @@ export type ConnectivityResult = { durationMs: number; }; +export type LLMStatus = { + configured: boolean; + provider?: string; + model?: string; + connected?: boolean; + error?: string; +}; + export type DoctorReport = { cliVersion?: string; daemonRunning: boolean; extensionConnected: boolean; extensionVersion?: string; connectivity?: ConnectivityResult; + llm?: LLMStatus; sessions?: Array<{ workspace: string; windowId: number; tabCount: number; idleMsRemaining: number }>; issues: string[]; }; @@ -78,6 +87,30 @@ export async function runBrowserDoctor(opts: DoctorOptions = {}): Promise : undefined; + // LLM configuration check + let llm: LLMStatus = { configured: false }; + try { + const { LLMClient } = await import('./agent/llm-client.js'); + const client = new LLMClient(); + llm = { + configured: true, + provider: client.getProvider(), + model: client.getModelId(), + }; + // Quick connectivity test: send a minimal request + if (opts.live) { + try { + const response = await client.generateRaw('Reply with exactly: ok', 'test'); + llm.connected = response.trim().toLowerCase().includes('ok'); + } catch (err) { + llm.connected = false; + llm.error = getErrorMessage(err); + } + } + } catch (err) { + llm = { configured: false, error: getErrorMessage(err) }; + } + const issues: string[] = []; if (!status.running) { issues.push('Daemon is not running. It should start automatically when you run an opencli browser command.'); @@ -94,6 +127,16 @@ export async function runBrowserDoctor(opts: DoctorOptions = {}): Promise = { RATE_LIMITED: '⏳', PAGE_CHANGED: '🔄', CONFIG: '⚙️ ', + AGENT: '🤖', + AGENT_BUDGET: '📊', }; diff --git a/src/types.ts b/src/types.ts index f1647e6e..68160430 100644 --- a/src/types.ts +++ b/src/types.ts @@ -75,4 +75,12 @@ export interface IPage { closeWindow?(): Promise; /** Returns the current page URL, or null if unavailable. */ getCurrentUrl?(): Promise; + /** Send a raw CDP command via chrome.debugger passthrough. */ + cdp?(method: string, params?: Record): Promise; + /** Click at native coordinates via CDP Input.dispatchMouseEvent. */ + nativeClick?(x: number, y: number): Promise; + /** Type text character-by-character via CDP Input.dispatchKeyEvent. */ + nativeType?(text: string): Promise; + /** Press a key via CDP Input.dispatchKeyEvent. */ + nativeKeyPress?(key: string, modifiers?: string[]): Promise; }