diff --git a/.changeset/fix-anthropic-cua-triple-click.md b/.changeset/fix-anthropic-cua-triple-click.md new file mode 100644 index 000000000..6f2d57fe6 --- /dev/null +++ b/.changeset/fix-anthropic-cua-triple-click.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +Fix Anthropic CUA `triple_click` action mapping. diff --git a/packages/core/lib/v3/agent/AnthropicCUAClient.ts b/packages/core/lib/v3/agent/AnthropicCUAClient.ts index 4bbe2d47b..752d208e2 100644 --- a/packages/core/lib/v3/agent/AnthropicCUAClient.ts +++ b/packages/core/lib/v3/agent/AnthropicCUAClient.ts @@ -901,6 +901,17 @@ export class AnthropicCUAClient extends AgentClient { (input.coordinate ? (input.coordinate as number[])[1] : 0), ...input, }; + } else if (action === "triple_click" || action === "tripleClick") { + return { + type: "tripleClick", + x: + (input.x as number) || + (input.coordinate ? (input.coordinate as number[])[0] : 0), + y: + (input.y as number) || + (input.coordinate ? (input.coordinate as number[])[1] : 0), + ...input, + }; } else if (action === "scroll") { // Convert Anthropic's coordinate, scroll_amount and scroll_direction into scroll_x and scroll_y const x = diff --git a/packages/core/lib/v3/handlers/v3CuaAgentHandler.ts b/packages/core/lib/v3/handlers/v3CuaAgentHandler.ts index 6cefa4b4d..af3a3dad8 100644 --- a/packages/core/lib/v3/handlers/v3CuaAgentHandler.ts +++ b/packages/core/lib/v3/handlers/v3CuaAgentHandler.ts @@ -324,6 +324,7 @@ export class V3CuaAgentHandler { } return { success: true }; } + case "triple_click": case "tripleClick": { const { x, y } = action; if (recording) { diff --git a/packages/core/tests/unit/anthropic-cua-triple-click.test.ts b/packages/core/tests/unit/anthropic-cua-triple-click.test.ts new file mode 100644 index 000000000..727fe5173 --- /dev/null +++ b/packages/core/tests/unit/anthropic-cua-triple-click.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, it, vi, beforeEach } from "vitest"; +import { AnthropicCUAClient } from "../../lib/v3/agent/AnthropicCUAClient.js"; +import Anthropic from "@anthropic-ai/sdk"; + +vi.mock("@anthropic-ai/sdk", () => { + const mockCreate = vi.fn(); + + return { + default: class MockAnthropic { + beta = { + messages: { + create: mockCreate, + }, + }; + }, + }; +}); + +describe("AnthropicCUAClient triple_click handling", () => { + let mockCreate: ReturnType; + let client: AnthropicCUAClient; + let executedActions: Array>; + + beforeEach(() => { + vi.clearAllMocks(); + const anthropic = new Anthropic({ apiKey: "test" }); + mockCreate = anthropic.beta.messages.create as ReturnType; + + client = new AnthropicCUAClient("anthropic", "claude-sonnet-4-5-20250929", undefined, { + apiKey: "test-key", + }); + client.setViewport(1280, 720); + client.setScreenshotProvider(async () => "fake-base64-screenshot"); + + executedActions = []; + client.setActionHandler(async (action) => { + executedActions.push({ ...action }); + }); + }); + + it("should convert triple_click with coordinate array to tripleClick action", async () => { + mockCreate.mockResolvedValue({ + id: "test-id", + content: [ + { + type: "tool_use", + id: "tool-1", + name: "computer", + input: { + action: "triple_click", + coordinate: [640, 360], + }, + }, + ], + usage: { input_tokens: 10, output_tokens: 20 }, + }); + + const logger = vi.fn(); + await client.executeStep( + [{ role: "user", content: "triple click the paragraph" }], + logger, + ); + + expect(executedActions).toHaveLength(1); + expect(executedActions[0].type).toBe("tripleClick"); + expect(executedActions[0].x).toBe(640); + expect(executedActions[0].y).toBe(360); + }); + + it("should convert triple_click with x/y fields to tripleClick action", async () => { + mockCreate.mockResolvedValue({ + id: "test-id", + content: [ + { + type: "tool_use", + id: "tool-2", + name: "computer", + input: { + action: "triple_click", + x: 100, + y: 200, + }, + }, + ], + usage: { input_tokens: 10, output_tokens: 20 }, + }); + + const logger = vi.fn(); + await client.executeStep( + [{ role: "user", content: "triple click the line" }], + logger, + ); + + expect(executedActions).toHaveLength(1); + expect(executedActions[0].type).toBe("tripleClick"); + expect(executedActions[0].x).toBe(100); + expect(executedActions[0].y).toBe(200); + }); +});