From e371098a746ced0c1a1af158d8b189d0266323d0 Mon Sep 17 00:00:00 2001 From: Tal Zaccai Date: Tue, 12 May 2026 23:05:31 -0700 Subject: [PATCH 1/5] code-agent + Coda extension: migrate to dynamic port via PortRegistrar PR 2 of the port-registrar series. Drops the hardcoded 8082 port for the `code` agent's WebSocket server and rewires the in-repo Coda VS Code extension to discover the live port through the agent-server's discovery channel before connecting. * `CodeAgentWebSocketServer` is now an async `start(port = 0)` factory that resolves only after the `listening` event fires, with `close()` returning a Promise. Lets us read the OS-assigned port reliably and serialize rapid disable/enable cycles under a fixed `CODE_WEBSOCKET_PORT` override. * `updateCodeContext` registers the server's actual port per session via `sessionContext.registerPort`. The shared module-scoped server is kept (refcounted), but each enabling session gets its own registration handle stored on its `agentContext`; `releaseAllForSession` is the crash-safety backstop. The registrar already supports multiple registrations per `(agent, role)` so this drops the previously considered ownership-transfer state machine. * `readiness.ts` treats port as `number | undefined` and omits the port suffix from setup-card and NOT-RUNNING messages when no override is set. `resolveCodePort` is now `resolveCodePortOverride` to make the env-var semantics explicit. * Coda's `createWebSocket` is async and uses `discoverPort()` (from the `@typeagent/agent-server-client/discovery` subpath introduced in PR 1) with a legacy 8082 fallback only when the discovery channel itself is unreachable (not when lookup returns `not-registered`). Reconnect loop now guards against the multi-interval leak in the prior `reconnectWebSocket` implementation and re-discovers the port on every retry (port can change across agent-server restarts). * Tests: 5 new integration tests for the shared-server lifecycle (single + multi-session enable / disable / rollback on bind failure). Existing 13 readiness tests updated for the new `number | undefined` port type. All 18 code-agent tests pass; full workspace build green; lint clean. Branched off and depends on PR 1 ([port-registrar foundation], talzacc/port-registrar-foundation tip `a8fd7cf7`). Does not modify SecretAgents. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agents/code/src/codeActionHandler.ts | 191 +++++++++++++---- .../code/src/codeAgentWebSocketServer.ts | 85 +++++++- ts/packages/agents/code/src/readiness.ts | 44 ++-- .../agents/code/test/codeReadiness.spec.ts | 53 +++-- .../code/test/codeUpdateContext.spec.ts | 194 ++++++++++++++++++ ts/packages/coda/package.json | 2 + ts/packages/coda/src/webSocket.ts | 54 ++++- ts/packages/coda/src/wsConnect.ts | 18 +- ts/pnpm-lock.yaml | 6 + 9 files changed, 550 insertions(+), 97 deletions(-) create mode 100644 ts/packages/agents/code/test/codeUpdateContext.spec.ts diff --git a/ts/packages/agents/code/src/codeActionHandler.ts b/ts/packages/agents/code/src/codeActionHandler.ts index 0fdbbb9bf7..9a26c9be4a 100644 --- a/ts/packages/agents/code/src/codeActionHandler.ts +++ b/ts/packages/agents/code/src/codeActionHandler.ts @@ -24,7 +24,7 @@ import { } from "@typeagent/agent-sdk/helpers/action"; import { evaluateCodeReadiness, - resolveCodePort, + resolveCodePortOverride, setupCode, whichExists, } from "./readiness.js"; @@ -32,13 +32,23 @@ import { const debug = registerDebug("typeagent:code"); // Shared WebSocket server that bridges this code agent to the Coda VS Code -// extension (ts/packages/coda) on port 8082. Created on first enable, closed -// when the last session disables the code agent. Storing it per-session caused -// "No websocket connection" errors when an action ran on a session different -// from the one that originally created the server (e.g. after schema enable on -// a different conversation), and also masked EADDRINUSE failures from a second -// bind attempt on port 8082. +// extension (ts/packages/coda). Created on first session-enable, closed when +// the last session disables. Storing it per-session caused "No websocket +// connection" errors when an action ran on a session different from the one +// that originally created the server (e.g. after schema enable on a different +// conversation), and also masked EADDRINUSE failures from a second bind +// attempt on the configured port. +// +// Port allocation: by default the OS picks a free ephemeral port (port=0). +// Each session that uses the shared server registers it under its own +// `sessionContextId`, so the PortRegistrar's `closeSessionContext` backstop +// auto-releases per-session entries and `lookup("code")` keeps returning the +// shared port as long as ≥1 session has it enabled. `CODE_WEBSOCKET_PORT` +// remains an explicit override (useful for back-compat with installed Coda +// extensions that dial 8082). let sharedWebSocketServer: CodeAgentWebSocketServer | undefined; +let sharedStartingPromise: Promise | undefined; +let sharedClosingPromise: Promise | undefined; let sharedWebSocketRefCount = 0; const sharedPendingCalls: Map< number, @@ -62,11 +72,16 @@ export function instantiate(): AppAgent { setup: (actionContext) => { const ctx = (actionContext as ActionContext) .sessionContext.agentContext; + // Prefer the actual bound port (set once updateAgentContext has + // brought up the shared server); fall back to the static + // override for messaging on the first probe. + const port = + getSharedCodePort() ?? resolveCodePortOverride(process.env); return setupCode( actionContext, ctx.choiceManager, () => ctx.webSocketServer?.isConnected() === true, - resolveCodePort(process.env), + port, ); }, handleChoice: async (choiceId, response, context) => { @@ -90,6 +105,11 @@ type CodeActionContext = { // Manages yes/no choice callbacks (currently only the setup-flow card). // Hooked up via the AppAgent.handleChoice in instantiate() above. choiceManager: ChoiceManager; + // Handle returned by sessionContext.registerPort, kept so we can release + // exactly this session's registration on disable. The + // closeSessionContext backstop will also release it if the disable path + // is skipped. + portRegistration?: { release: () => void }; }; async function initializeCodeContext(): Promise { @@ -126,10 +146,82 @@ async function checkCodeReadiness( return evaluateCodeReadiness({ clientConnected, vsCodeCliInstalled, - port: resolveCodePort(process.env), + port: getSharedCodePort() ?? resolveCodePortOverride(process.env), }); } +// Bind hint for the shared server. Returns the explicit override if +// CODE_WEBSOCKET_PORT is set (handy for back-compat with installed Coda +// extensions that still dial 8082); otherwise 0 so the OS picks a free +// port and the registrar/discovery channel publishes it. +function getCodeBindPort(): number { + const raw = process.env["CODE_WEBSOCKET_PORT"]; + if (raw === undefined) return 0; + const n = parseInt(raw, 10); + if (!Number.isFinite(n) || n < 0) { + debug( + `Ignoring invalid CODE_WEBSOCKET_PORT=${raw}; falling back to OS-assigned port`, + ); + return 0; + } + return n; +} + +// Wire the shared server's onMessage handler. Module-scoped because the +// server itself is module-scoped — all sessions route their pending-call +// completions through the same handler. +function attachSharedOnMessage(server: CodeAgentWebSocketServer): void { + server.onMessage = (message: string) => { + try { + const data = JSON.parse(message) as WebSocketMessageV2; + + if (data.id !== undefined && data.result !== undefined) { + const pendingCall = sharedPendingCalls.get(Number(data.id)); + + if (pendingCall) { + sharedPendingCalls.delete(Number(data.id)); + const { resolve, context } = pendingCall; + if (context?.actionIO) { + context.actionIO.setDisplay(data.result); + } + resolve(); + } + } + } catch (error) { + debug("Error parsing WebSocket message:", error); + } + }; +} + +// Start (or attach to an in-flight start of) the shared WebSocket server. +// Concurrent enables from different sessions can race; serialize via +// sharedStartingPromise so only one bind attempt is in flight. +async function ensureSharedServer(): Promise { + // If a previous teardown is still releasing the port (matters under + // CODE_WEBSOCKET_PORT override), await it before binding again. + if (sharedClosingPromise !== undefined) { + await sharedClosingPromise; + } + if (sharedWebSocketServer !== undefined) { + return sharedWebSocketServer; + } + if (sharedStartingPromise !== undefined) { + return sharedStartingPromise; + } + sharedStartingPromise = (async () => { + try { + const server = + await CodeAgentWebSocketServer.start(getCodeBindPort()); + attachSharedOnMessage(server); + sharedWebSocketServer = server; + return server; + } finally { + sharedStartingPromise = undefined; + } + })(); + return sharedStartingPromise; +} + async function updateCodeContext( enable: boolean, context: SessionContext, @@ -140,43 +232,33 @@ async function updateCodeContext( if (agentContext.enabled.has(schemaName)) { return; } - if (agentContext.enabled.size === 0) { - sharedWebSocketRefCount++; - } + const isFirstSchemaForSession = agentContext.enabled.size === 0; agentContext.enabled.add(schemaName); - - if (!sharedWebSocketServer) { - // TODO: stop hardcoding the port. The dispatcher should hand each - // agent a free port at initialize time so multiple TypeAgent - // installs / sessions on one host can't collide on 8082. - const port = parseInt(process.env["CODE_WEBSOCKET_PORT"] || "8082"); - sharedWebSocketServer = new CodeAgentWebSocketServer(port); - - sharedWebSocketServer.onMessage = (message: string) => { - try { - const data = JSON.parse(message) as WebSocketMessageV2; - - if (data.id !== undefined && data.result !== undefined) { - const pendingCall = sharedPendingCalls.get( - Number(data.id), - ); - - if (pendingCall) { - sharedPendingCalls.delete(Number(data.id)); - const { resolve, context } = pendingCall; - if (context?.actionIO) { - context.actionIO.setDisplay(data.result); - } - resolve(); - } - } - } catch (error) { - debug("Error parsing WebSocket message:", error); - } - }; + try { + const server = await ensureSharedServer(); + agentContext.webSocketServer = server; + agentContext.pendingCall = sharedPendingCalls; + if (isFirstSchemaForSession) { + // Per-session registration: the registrar allows multiple + // entries for `(code, default)` across sessions and lookup + // returns the most recent, so each active session + // independently keeps the shared port discoverable. The + // backstop in closeSessionContext releases ours if disable + // is skipped. + agentContext.portRegistration = context.registerPort( + "default", + server.port, + ); + sharedWebSocketRefCount++; + } + } catch (e) { + // Roll back the per-session schema bookkeeping so a subsequent + // retry sees a clean slate. Don't touch shared module state — + // the bind itself failed, so we never incremented the refcount + // or registered. + agentContext.enabled.delete(schemaName); + throw e; } - agentContext.webSocketServer = sharedWebSocketServer; - agentContext.pendingCall = sharedPendingCalls; } else { if (!agentContext.enabled.has(schemaName)) { return; @@ -184,16 +266,35 @@ async function updateCodeContext( agentContext.enabled.delete(schemaName); if (agentContext.enabled.size === 0) { agentContext.webSocketServer = undefined; + // Release this session's registration before potentially closing + // the server. Release is idempotent and a no-op if already + // released by the backstop. + agentContext.portRegistration?.release(); + delete agentContext.portRegistration; + sharedWebSocketRefCount = Math.max(0, sharedWebSocketRefCount - 1); if (sharedWebSocketRefCount === 0 && sharedWebSocketServer) { - sharedWebSocketServer.close(); + const server = sharedWebSocketServer; sharedWebSocketServer = undefined; sharedPendingCalls.clear(); + // Track the in-flight close so a rapid re-enable awaits + // port release under a fixed-port override. + sharedClosingPromise = server.close().finally(() => { + sharedClosingPromise = undefined; + }); + await sharedClosingPromise; } } } } +// Exposed for readiness/setup messaging — undefined when the shared server +// isn't bound yet, otherwise the actual bound port. Lets readiness messages +// always reflect the real listener. +export function getSharedCodePort(): number | undefined { + return sharedWebSocketServer?.port; +} + function getVSCodeStoragePath(): string { const platform = os.platform(); if (platform === "darwin") diff --git a/ts/packages/agents/code/src/codeAgentWebSocketServer.ts b/ts/packages/agents/code/src/codeAgentWebSocketServer.ts index 4cbbe44e5d..221bda1957 100644 --- a/ts/packages/agents/code/src/codeAgentWebSocketServer.ts +++ b/ts/packages/agents/code/src/codeAgentWebSocketServer.ts @@ -2,23 +2,81 @@ // Licensed under the MIT License. import { WebSocketServer, WebSocket } from "ws"; +import { AddressInfo } from "net"; import registerDebug from "debug"; const debug = registerDebug("typeagent:code:websocket"); export class CodeAgentWebSocketServer { - private server: WebSocketServer; private clients: Map = new Map(); private clientIdCounter = 0; public onMessage?: (message: string) => void; - constructor(port: number = 8082) { - this.server = new WebSocketServer({ port }); + /** + * @param server the underlying ws server, already bound and listening. + * @param port the actually bound port (OS-assigned when the caller + * passed 0). + * + * Construction is private — use {@link CodeAgentWebSocketServer.start} + * so callers always get a server that is guaranteed to be bound + * before they read {@link port} or pass it to the registrar. + */ + private constructor( + private readonly server: WebSocketServer, + public readonly port: number, + ) { this.setupHandlers(); debug(`CodeAgentWebSocketServer listening on port ${port}`); + } - this.server.on("error", (error) => { - debug("Server error:", error); + /** + * Bind a new server on `port`. Resolves only after the + * `listening` event so callers can synchronously read + * {@link port}; rejects on the first `error` event so bind + * failures (EADDRINUSE under fixed-port overrides) surface + * loudly instead of being swallowed by an attached error + * handler. + * + * Pass `0` to let the OS pick a free ephemeral port; the + * actual port is then available via {@link port}. + */ + public static start(port: number = 0): Promise { + return new Promise((resolve, reject) => { + const server = new WebSocketServer({ port }); + let settled = false; + const onError = (error: Error) => { + if (settled) { + debug("Server error after listening:", error); + return; + } + settled = true; + server.removeListener("listening", onListening); + debug("Server bind error:", error); + reject(error); + }; + const onListening = () => { + if (settled) return; + settled = true; + server.removeListener("error", onError); + const address = server.address() as AddressInfo | null; + if (!address || typeof address === "string") { + server.close(); + reject( + new Error( + "ws server.address() did not return an AddressInfo", + ), + ); + return; + } + // Re-attach a permanent error handler so post-listen errors + // are logged rather than crashing the process. + server.on("error", (error) => { + debug("Server error:", error); + }); + resolve(new CodeAgentWebSocketServer(server, address.port)); + }; + server.once("error", onError); + server.once("listening", onListening); }); } @@ -48,10 +106,6 @@ export class CodeAgentWebSocketServer { this.clients.delete(clientId); }); }); - - this.server.on("error", (error) => { - debug("Server error:", error); - }); } public broadcast(message: string): number { @@ -117,7 +171,14 @@ export class CodeAgentWebSocketServer { return states; } - public close(): void { + /** + * Close all client connections and the underlying server. + * Resolves when the server has fully released its port — important + * for a rapid disable→enable cycle under a fixed-port override + * (`CODE_WEBSOCKET_PORT`), where a synchronous return would race + * the new bind into EADDRINUSE. + */ + public close(): Promise { debug("Closing CodeAgentWebSocketServer"); for (const [, client] of this.clients.entries()) { if (client.readyState === WebSocket.OPEN) { @@ -125,6 +186,8 @@ export class CodeAgentWebSocketServer { } } this.clients.clear(); - this.server.close(); + return new Promise((resolve) => { + this.server.close(() => resolve()); + }); } } diff --git a/ts/packages/agents/code/src/readiness.ts b/ts/packages/agents/code/src/readiness.ts index 620ea0d41c..ad57cb676f 100644 --- a/ts/packages/agents/code/src/readiness.ts +++ b/ts/packages/agents/code/src/readiness.ts @@ -41,8 +41,12 @@ export type CodeReadinessProbe = { // user has set everything up correctly and just closed VS Code is // misleading. vsCodeCliInstalled: boolean; - // For messaging only. Pulled from CODE_WEBSOCKET_PORT (default 8082). - port: number; + // For messaging only. Undefined until the shared server is bound; the + // user-facing message then omits the port number. Pulled from the + // actual bound port (via getSharedCodePort()) so readiness stays in + // sync with the registered listener, including when the OS picked a + // free ephemeral port (port=0 default). + port: number | undefined; }; // Pure decision function — exported for unit tests so we don't have to @@ -77,9 +81,10 @@ export function evaluateCodeReadiness( // This is a transient runtime state, not a config problem; `setup` // resolves it by launching VS Code and waiting for the extension to // connect. + const portSuffix = probe.port !== undefined ? ` on port ${probe.port}` : ""; return { state: "setup-required", - message: `VS Code isn't currently running (or the Coda extension isn't connected on port ${probe.port}).`, + message: `VS Code isn't currently running (or the Coda extension isn't connected${portSuffix}).`, details: "Run `@config agent setup code` to launch VS Code — the Coda extension will auto-connect — or open VS Code yourself and your code commands will start working immediately.", }; @@ -102,16 +107,21 @@ export async function whichExists(tool: string): Promise { }); } -// Resolves the configured port the same way updateAgentContext does. -// Centralized here so readiness messaging stays in sync with the actual -// listener configuration. -export function resolveCodePort(env: NodeJS.ProcessEnv): number { +// Resolves the explicit port override (CODE_WEBSOCKET_PORT) for readiness +// messaging when the shared server isn't bound yet. Returns undefined to +// signal "no static port" — the caller (readiness probe) should query +// `getSharedCodePort()` first and fall through here only when the server +// hasn't been started yet. +// +// Validation matches getCodeBindPort() in codeActionHandler.ts. +export function resolveCodePortOverride( + env: NodeJS.ProcessEnv, +): number | undefined { const raw = env.CODE_WEBSOCKET_PORT; - if (raw !== undefined) { - const n = parseInt(raw); - if (Number.isFinite(n) && n > 0) return n; - } - return 8082; + if (raw === undefined) return undefined; + const n = parseInt(raw, 10); + if (Number.isFinite(n) && n > 0) return n; + return undefined; } // ============================================================================ @@ -169,7 +179,7 @@ export async function setupCode( actionContext: ActionContext, choiceManager: ChoiceManager, isConnected: () => boolean, - port: number, + port: number | undefined, ): Promise { if (isConnected()) { // Defense-in-depth: dispatcher only invokes setup() when readiness @@ -181,9 +191,10 @@ export async function setupCode( "VS Code is already connected.", ); } + const portSuffix = port !== undefined ? ` on port ${port}` : ""; return createYesNoChoiceResult( choiceManager, - `Launch VS Code? The Coda extension will auto-connect to the code agent's WebSocket server on port ${port}. I'll wait up to 30 seconds for the connection — you can keep working in the meantime.`, + `Launch VS Code? The Coda extension will auto-connect to the code agent's WebSocket server${portSuffix}. I'll wait up to 30 seconds for the connection — you can keep working in the meantime.`, async (confirmed, liveActionContext) => { if (!confirmed) { return createActionResultFromTextDisplay( @@ -204,7 +215,7 @@ export async function setupCode( export async function runLaunchAndWait( actionContext: ActionContext, isConnected: () => boolean, - port: number, + port: number | undefined, options?: { timeoutMs?: number; launchImpl?: () => Promise; @@ -229,10 +240,11 @@ export async function runLaunchAndWait( ); } + const waitOn = port !== undefined ? ` on port ${port}` : ""; actionContext.actionIO.appendDisplay( { type: "text", - content: `[${ts()}] Waiting for the Coda extension to connect on port ${port} (up to ${Math.round(timeoutMs / 1000)}s)…`, + content: `[${ts()}] Waiting for the Coda extension to connect${waitOn} (up to ${Math.round(timeoutMs / 1000)}s)…`, kind: "status", }, "block", diff --git a/ts/packages/agents/code/test/codeReadiness.spec.ts b/ts/packages/agents/code/test/codeReadiness.spec.ts index 32e0e75596..27e27d673a 100644 --- a/ts/packages/agents/code/test/codeReadiness.spec.ts +++ b/ts/packages/agents/code/test/codeReadiness.spec.ts @@ -5,14 +5,14 @@ * Tests for the code agent's readiness/setup wiring. * * Pure functions only — `evaluateCodeReadiness` (decision) and - * `resolveCodePort` (env parse) — plus `runLaunchAndWait` exercised with - * an injected `launchImpl` so we don't actually spawn `code --new-window` - * during tests. + * `resolveCodePortOverride` (env parse) — plus `runLaunchAndWait` + * exercised with an injected `launchImpl` so we don't actually spawn + * `code --new-window` during tests. */ import { evaluateCodeReadiness, - resolveCodePort, + resolveCodePortOverride, runLaunchAndWait, } from "../src/readiness.js"; import type { ActionContext } from "@typeagent/agent-sdk"; @@ -75,43 +75,58 @@ describe("evaluateCodeReadiness", () => { }); expect(r.message).toMatch(/port 9999/); }); + + test("port omitted from message when undefined (server not yet bound)", () => { + // Initial readiness probe can fire before updateAgentContext has + // bound the shared server. Messaging must still be coherent — no + // dangling "port undefined" string. + const r = evaluateCodeReadiness({ + clientConnected: false, + vsCodeCliInstalled: true, + port: undefined, + }); + expect(r.message).not.toMatch(/port/i); + expect(r.message).not.toMatch(/undefined/); + }); }); -describe("resolveCodePort", () => { - test("defaults to 8082 when CODE_WEBSOCKET_PORT is unset", () => { - expect(resolveCodePort({} as NodeJS.ProcessEnv)).toBe(8082); +describe("resolveCodePortOverride", () => { + test("returns undefined when CODE_WEBSOCKET_PORT is unset (dynamic port)", () => { + expect( + resolveCodePortOverride({} as NodeJS.ProcessEnv), + ).toBeUndefined(); }); test("parses a valid integer override", () => { expect( - resolveCodePort({ + resolveCodePortOverride({ CODE_WEBSOCKET_PORT: "9100", } as NodeJS.ProcessEnv), ).toBe(9100); }); - test("falls back to default when CODE_WEBSOCKET_PORT is non-numeric", () => { - // Defensive: parseInt on garbage returns NaN. Without the - // Number.isFinite guard we'd return NaN as the port and the - // listener would explode. + test("returns undefined when CODE_WEBSOCKET_PORT is non-numeric", () => { + // Defensive: parseInt on garbage returns NaN. We must not propagate + // NaN as a port — return undefined so the agent falls back to + // OS-assigned binding. expect( - resolveCodePort({ + resolveCodePortOverride({ CODE_WEBSOCKET_PORT: "abc", } as NodeJS.ProcessEnv), - ).toBe(8082); + ).toBeUndefined(); }); - test("falls back to default for zero / negative values", () => { + test("returns undefined for zero / negative values", () => { expect( - resolveCodePort({ + resolveCodePortOverride({ CODE_WEBSOCKET_PORT: "0", } as NodeJS.ProcessEnv), - ).toBe(8082); + ).toBeUndefined(); expect( - resolveCodePort({ + resolveCodePortOverride({ CODE_WEBSOCKET_PORT: "-1", } as NodeJS.ProcessEnv), - ).toBe(8082); + ).toBeUndefined(); }); }); diff --git a/ts/packages/agents/code/test/codeUpdateContext.spec.ts b/ts/packages/agents/code/test/codeUpdateContext.spec.ts new file mode 100644 index 0000000000..c4f56f50a8 --- /dev/null +++ b/ts/packages/agents/code/test/codeUpdateContext.spec.ts @@ -0,0 +1,194 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +/** + * Integration-style tests for the code agent's shared-server lifecycle + * and per-session port registration. These tests actually bind a + * WebSocket server on a random free port (port=0) per the design — they + * exercise the real `CodeAgentWebSocketServer.start` + `close` paths + * plus the per-session `registerPort` accounting end-to-end. + * + * Coverage matrix (per the PR 2 plan's test list): + * - enable A: shared server bound, A's registerPort called. + * - enable A + B: both sessions register, lookup returns same port. + * - A disables: A's registration released; server still up; B's + * registration intact. + * - both disable: server closed, no live registrations. + * - non-owner disable: shared server stays up under the other session. + * - skipping disable (backstop simulation): manually release A's + * registration; B still works. + * + * The shared server is module-scoped state, so tests run serially via + * Jest's default sequential mode for a single suite. + */ + +import { instantiate, getSharedCodePort } from "../src/codeActionHandler.js"; +import type { SessionContext } from "@typeagent/agent-sdk"; + +// Minimal stub of SessionContext — only the surface the code agent uses +// during updateAgentContext. registerPort records every call so we can +// assert per-session accounting; the returned handle's release is real +// (no-op default; tests override when they want to inspect release). +type RegisterCall = { role: string; port: number }; +type StubContext = { + sessionContext: SessionContext; + registerCalls: RegisterCall[]; + releaseSpy: () => void; + releaseCount: number; +}; + +function makeStubContext(agentContext: any): StubContext { + const registerCalls: RegisterCall[] = []; + let releaseCount = 0; + const releaseSpy = () => { + releaseCount++; + }; + const sessionContext = { + agentContext, + registerPort(role: string, port: number) { + registerCalls.push({ role, port }); + return { release: releaseSpy }; + }, + // The rest of SessionContext isn't touched by updateCodeContext. + } as unknown as SessionContext; + return { + sessionContext, + registerCalls, + get releaseCount() { + return releaseCount; + }, + releaseSpy, + } as any; +} + +describe("code agent shared server + per-session registration", () => { + const agent = instantiate(); + + async function newSession(): Promise { + const agentContext = await agent.initializeAgentContext!(); + return makeStubContext(agentContext); + } + + afterEach(async () => { + // Defensive: if a test left the shared server up, calling + // updateAgentContext(false, ...) on a synthetic session won't + // close it (refcount stays positive). Reach into the module's + // close path by exhausting any sessions via the public API — + // but since we control all stubs in each test, the per-test + // cleanup handles it. Leaving this hook as a placeholder. + }); + + test("enable on a fresh session binds the shared server and registers under (code, default)", async () => { + const s = await newSession(); + await agent.updateAgentContext!(true, s.sessionContext, "code"); + try { + // Bind happened — getSharedCodePort returns the bound port. + const port = getSharedCodePort(); + expect(port).toBeDefined(); + expect(port).toBeGreaterThan(0); + // Exactly one register call for the session's first schema. + expect(s.registerCalls).toEqual([{ role: "default", port }]); + // Web socket server is wired into the per-session agentContext. + expect(s.sessionContext.agentContext.webSocketServer).toBeDefined(); + } finally { + await agent.updateAgentContext!(false, s.sessionContext, "code"); + // After last session disables, shared server is torn down. + expect(getSharedCodePort()).toBeUndefined(); + } + }); + + test("two sessions enabling produce two registrations on the SAME port", async () => { + const a = await newSession(); + const b = await newSession(); + await agent.updateAgentContext!(true, a.sessionContext, "code"); + const portAfterA = getSharedCodePort(); + await agent.updateAgentContext!(true, b.sessionContext, "code"); + const portAfterB = getSharedCodePort(); + try { + expect(portAfterA).toBeDefined(); + expect(portAfterB).toBe(portAfterA); + expect(a.registerCalls).toEqual([ + { role: "default", port: portAfterA }, + ]); + expect(b.registerCalls).toEqual([ + { role: "default", port: portAfterA }, + ]); + } finally { + await agent.updateAgentContext!(false, a.sessionContext, "code"); + await agent.updateAgentContext!(false, b.sessionContext, "code"); + } + }); + + test("disabling one session releases only its own registration; server stays up under the other", async () => { + const a = await newSession(); + const b = await newSession(); + await agent.updateAgentContext!(true, a.sessionContext, "code"); + await agent.updateAgentContext!(true, b.sessionContext, "code"); + const port = getSharedCodePort(); + expect(port).toBeDefined(); + + // A disables — its release runs; B is untouched. + await agent.updateAgentContext!(false, a.sessionContext, "code"); + expect((a as any).releaseCount).toBe(1); + expect((b as any).releaseCount).toBe(0); + // Shared server still bound to the same port (B keeps it alive). + expect(getSharedCodePort()).toBe(port); + + // B disables — server closes. + await agent.updateAgentContext!(false, b.sessionContext, "code"); + expect((b as any).releaseCount).toBe(1); + expect(getSharedCodePort()).toBeUndefined(); + }); + + test("multiple schemas on one session only register once and release once", async () => { + const s = await newSession(); + // updateCodeContext is called per schema; with refcount + // bookkeeping based on `enabled.size === 0`, the registration is + // only created on the FIRST schema and released on the LAST. + await agent.updateAgentContext!(true, s.sessionContext, "code"); + await agent.updateAgentContext!(true, s.sessionContext, "code-editor"); + try { + expect(s.registerCalls).toHaveLength(1); + } finally { + // Disable one schema — server stays up because the session + // still has another schema enabled. + await agent.updateAgentContext!( + false, + s.sessionContext, + "code-editor", + ); + expect((s as any).releaseCount).toBe(0); + expect(getSharedCodePort()).toBeDefined(); + // Disable the last schema — release fires, server closes. + await agent.updateAgentContext!(false, s.sessionContext, "code"); + expect((s as any).releaseCount).toBe(1); + expect(getSharedCodePort()).toBeUndefined(); + } + }); + + test("backstop simulation: releasing A's handle externally still leaves B's registration intact", async () => { + // Simulates closeSessionContext's finally backstop running for A + // (which calls portRegistrar.releaseAllForSession bypassing the + // agent's updateAgentContext(false, ...) path) while B remains + // active. The agent must tolerate this — its own state assumes + // the registration may already be released. + const a = await newSession(); + const b = await newSession(); + await agent.updateAgentContext!(true, a.sessionContext, "code"); + await agent.updateAgentContext!(true, b.sessionContext, "code"); + const port = getSharedCodePort(); + + // Pretend the backstop released A's handle directly. + a.sessionContext.agentContext.portRegistration?.release(); + + // Now drive A's disable explicitly. The release call inside + // updateCodeContext should be a no-op (idempotent stub), + // bookkeeping must still complete cleanly. + await agent.updateAgentContext!(false, a.sessionContext, "code"); + // Server still up under B. + expect(getSharedCodePort()).toBe(port); + + await agent.updateAgentContext!(false, b.sessionContext, "code"); + expect(getSharedCodePort()).toBeUndefined(); + }); +}); diff --git a/ts/packages/coda/package.json b/ts/packages/coda/package.json index 5f75cf0787..a460aa05c0 100644 --- a/ts/packages/coda/package.json +++ b/ts/packages/coda/package.json @@ -50,6 +50,8 @@ "onStartupFinished" ], "dependencies": { + "@typeagent/agent-server-client": "workspace:*", + "@typeagent/agent-server-protocol": "workspace:*", "body-parser": "^1.20.3", "chalk": "^5.4.1", "cors": "^2.8.5", diff --git a/ts/packages/coda/src/webSocket.ts b/ts/packages/coda/src/webSocket.ts index a97ef1db41..1b2d60b90b 100644 --- a/ts/packages/coda/src/webSocket.ts +++ b/ts/packages/coda/src/webSocket.ts @@ -2,6 +2,7 @@ // Licensed under the MIT License. import WebSocket from "ws"; +import { discoverPort } from "@typeagent/agent-server-client/discovery"; export type WebSocketMessage = { source: string; @@ -11,15 +12,60 @@ export type WebSocketMessage = { body: any; }; +// Back-compat fallback: pre-port-registrar versions of this extension +// dialed `ws://localhost:8082` directly. We still honor that target if +// the discovery channel is unreachable (agentServer not running, or an +// older agentServer without the discovery channel) so users running +// against an old agent server aren't broken on update. +const CODE_AGENT_FALLBACK_HOST = "ws://localhost:8082"; + +// One-shot resolution of the code agent's WebSocket endpoint. Tries the +// agent-server's discovery channel first, falls back to the legacy +// hardcoded host. Honors: +// - `CODE_WEBSOCKET_HOST` env: explicit override; if set, skips +// discovery and dials the given URL directly. +// - `AGENT_SERVER_URL` env: where to reach the agent-server's +// discovery WS (defaults to ws://localhost:8999, the +// AGENT_SERVER_DEFAULT_URL constant). +async function resolveCodeEndpoint(): Promise { + const explicit = process.env["CODE_WEBSOCKET_HOST"]; + if (explicit) return explicit; + + const result = await discoverPort("code", undefined, { + url: process.env["AGENT_SERVER_URL"], + }); + if (result.kind === "found") { + return `ws://localhost:${result.port}`; + } + if (result.kind === "not-registered") { + // Agent server is running but the `code` agent isn't enabled + // (no one has activated the `code` schema in any session yet). + // The reconnect loop will re-discover periodically, so this + // is a transient state — log and fall back to the legacy host + // for a still-non-zero chance the user happens to be running + // an old agent server too. + console.log( + "code agent not yet registered with agent-server; will retry. Falling back to legacy 8082 in case of pre-discovery agent server.", + ); + return CODE_AGENT_FALLBACK_HOST; + } + // Discovery unreachable (agentServer down or pre-discovery + // version). Fall back to the legacy host so users with an old + // server keep working. + console.log( + `Discovery channel unreachable (${result.error.message}). Falling back to legacy ${CODE_AGENT_FALLBACK_HOST}.`, + ); + return CODE_AGENT_FALLBACK_HOST; +} + export async function createWebSocket( channel: string, role: string, clientId?: string, ) { - return new Promise((resolve, reject) => { - let endpoint = - process.env["CODE_WEBSOCKET_HOST"] ?? "ws://localhost:8082"; - endpoint += `?channel=${channel}&role=${role}`; + return new Promise(async (resolve) => { + const base = await resolveCodeEndpoint(); + let endpoint = `${base}?channel=${channel}&role=${role}`; if (clientId) { endpoint += `clientId=${clientId}`; } diff --git a/ts/packages/coda/src/wsConnect.ts b/ts/packages/coda/src/wsConnect.ts index 2dc4e3497e..51602a919a 100644 --- a/ts/packages/coda/src/wsConnect.ts +++ b/ts/packages/coda/src/wsConnect.ts @@ -112,13 +112,27 @@ function arrayBufferToJson(arrayBuffer: any) { } } +// Single in-flight reconnect interval. Pre-discovery code path spawned +// a new setInterval each time reconnect was called (from initializeWS +// and from every onclose); on rapid disconnects that leaked one +// interval per disconnect, all racing to call ensureWebsocketConnected. +// Guard with a module-scoped handle so only one timer is alive at a time. +let reconnectIntervalId: ReturnType | undefined; + function reconnectWebSocket() { - const connectionCheckIntervalId = setInterval(async () => { + if (reconnectIntervalId !== undefined) { + return; + } + reconnectIntervalId = setInterval(async () => { if (webSocket && webSocket.readyState === WebSocket.OPEN) { console.log("Clearing reconnect retry interval"); - clearInterval(connectionCheckIntervalId); + clearInterval(reconnectIntervalId); + reconnectIntervalId = undefined; } else { console.log("Retrying connection"); + // Re-runs the discovery handshake — important because the + // code agent's port can change across agent-server + // restarts (now that it binds to an OS-assigned port). await ensureWebsocketConnected(); } }, 5 * 1000); diff --git a/ts/pnpm-lock.yaml b/ts/pnpm-lock.yaml index 64f8b20d32..d5c8eced6c 100644 --- a/ts/pnpm-lock.yaml +++ b/ts/pnpm-lock.yaml @@ -3548,6 +3548,12 @@ importers: packages/coda: dependencies: + '@typeagent/agent-server-client': + specifier: workspace:* + version: link:../agentServer/client + '@typeagent/agent-server-protocol': + specifier: workspace:* + version: link:../agentServer/protocol body-parser: specifier: ^1.20.3 version: 1.20.3 From e7474262340359cb9a456af4294105df4f76a6d9 Mon Sep 17 00:00:00 2001 From: Tal Zaccai Date: Wed, 13 May 2026 13:47:34 -0700 Subject: [PATCH 2/5] code agent / coda extension: drop legacy 8082 fallback, fix clientId query separator, refresh stale docs This is a research project; there are no in-the-wild pre-discovery clients to support, so the back-compat fallback in coda's webSocket.ts is dead weight. Replace it with: when discovery is unreachable or the code agent isn't yet registered, return undefined and let the existing 5s reconnect loop in wsConnect.ts retry. Also fix a pre-existing bug in createWebSocket: `clientId=` was appended to the query string without a `&` separator, producing malformed URLs like `...&role=clientclientId=xyz`. Caught by code review. Stale doc references to port 8082 in commandExecutor/README.md, commandExecutor/VSCODE_CAPABILITIES.md, and shell/demo/DEMO_SETUP.md updated to describe the discovery-based flow. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agents/code/src/codeActionHandler.ts | 10 ++--- ts/packages/coda/src/webSocket.ts | 41 ++++++++----------- ts/packages/commandExecutor/README.md | 2 +- .../commandExecutor/VSCODE_CAPABILITIES.md | 4 +- ts/packages/shell/demo/DEMO_SETUP.md | 7 ++-- 5 files changed, 30 insertions(+), 34 deletions(-) diff --git a/ts/packages/agents/code/src/codeActionHandler.ts b/ts/packages/agents/code/src/codeActionHandler.ts index 9a26c9be4a..64dc0293e7 100644 --- a/ts/packages/agents/code/src/codeActionHandler.ts +++ b/ts/packages/agents/code/src/codeActionHandler.ts @@ -44,8 +44,8 @@ const debug = registerDebug("typeagent:code"); // `sessionContextId`, so the PortRegistrar's `closeSessionContext` backstop // auto-releases per-session entries and `lookup("code")` keeps returning the // shared port as long as ≥1 session has it enabled. `CODE_WEBSOCKET_PORT` -// remains an explicit override (useful for back-compat with installed Coda -// extensions that dial 8082). +// remains an explicit override (useful for pinning the port when debugging +// or when an external client expects a known address). let sharedWebSocketServer: CodeAgentWebSocketServer | undefined; let sharedStartingPromise: Promise | undefined; let sharedClosingPromise: Promise | undefined; @@ -151,9 +151,9 @@ async function checkCodeReadiness( } // Bind hint for the shared server. Returns the explicit override if -// CODE_WEBSOCKET_PORT is set (handy for back-compat with installed Coda -// extensions that still dial 8082); otherwise 0 so the OS picks a free -// port and the registrar/discovery channel publishes it. +// CODE_WEBSOCKET_PORT is set (useful for pinning the port when debugging); +// otherwise 0 so the OS picks a free port and the registrar/discovery +// channel publishes it. function getCodeBindPort(): number { const raw = process.env["CODE_WEBSOCKET_PORT"]; if (raw === undefined) return 0; diff --git a/ts/packages/coda/src/webSocket.ts b/ts/packages/coda/src/webSocket.ts index 1b2d60b90b..2b0894b897 100644 --- a/ts/packages/coda/src/webSocket.ts +++ b/ts/packages/coda/src/webSocket.ts @@ -12,22 +12,17 @@ export type WebSocketMessage = { body: any; }; -// Back-compat fallback: pre-port-registrar versions of this extension -// dialed `ws://localhost:8082` directly. We still honor that target if -// the discovery channel is unreachable (agentServer not running, or an -// older agentServer without the discovery channel) so users running -// against an old agent server aren't broken on update. -const CODE_AGENT_FALLBACK_HOST = "ws://localhost:8082"; - -// One-shot resolution of the code agent's WebSocket endpoint. Tries the -// agent-server's discovery channel first, falls back to the legacy -// hardcoded host. Honors: +// One-shot resolution of the code agent's WebSocket endpoint via the +// agent-server's discovery channel. Returns undefined if the endpoint +// can't be resolved (discovery unreachable, or `code` agent not yet +// enabled in any session) — the caller's reconnect loop will retry. +// Honors: // - `CODE_WEBSOCKET_HOST` env: explicit override; if set, skips // discovery and dials the given URL directly. // - `AGENT_SERVER_URL` env: where to reach the agent-server's // discovery WS (defaults to ws://localhost:8999, the // AGENT_SERVER_DEFAULT_URL constant). -async function resolveCodeEndpoint(): Promise { +async function resolveCodeEndpoint(): Promise { const explicit = process.env["CODE_WEBSOCKET_HOST"]; if (explicit) return explicit; @@ -40,22 +35,18 @@ async function resolveCodeEndpoint(): Promise { if (result.kind === "not-registered") { // Agent server is running but the `code` agent isn't enabled // (no one has activated the `code` schema in any session yet). - // The reconnect loop will re-discover periodically, so this - // is a transient state — log and fall back to the legacy host - // for a still-non-zero chance the user happens to be running - // an old agent server too. + // Transient — the reconnect loop will re-discover periodically. console.log( - "code agent not yet registered with agent-server; will retry. Falling back to legacy 8082 in case of pre-discovery agent server.", + "code agent not yet registered with agent-server; will retry.", ); - return CODE_AGENT_FALLBACK_HOST; + return undefined; } - // Discovery unreachable (agentServer down or pre-discovery - // version). Fall back to the legacy host so users with an old - // server keep working. + // Discovery unreachable (agentServer not running). Reconnect loop + // will retry. console.log( - `Discovery channel unreachable (${result.error.message}). Falling back to legacy ${CODE_AGENT_FALLBACK_HOST}.`, + `Discovery channel unreachable (${result.error.message}); will retry.`, ); - return CODE_AGENT_FALLBACK_HOST; + return undefined; } export async function createWebSocket( @@ -65,9 +56,13 @@ export async function createWebSocket( ) { return new Promise(async (resolve) => { const base = await resolveCodeEndpoint(); + if (!base) { + resolve(undefined); + return; + } let endpoint = `${base}?channel=${channel}&role=${role}`; if (clientId) { - endpoint += `clientId=${clientId}`; + endpoint += `&clientId=${clientId}`; } const webSocket = new WebSocket(endpoint); diff --git a/ts/packages/commandExecutor/README.md b/ts/packages/commandExecutor/README.md index d6c176896c..c8088118ce 100644 --- a/ts/packages/commandExecutor/README.md +++ b/ts/packages/commandExecutor/README.md @@ -211,7 +211,7 @@ Command Executor MCP Server TypeAgent Dispatcher (WebSocket) ↓ ├─ TypeAgent Agents (Music, Lists, Calendar, etc.) - └─ Coda VSCode Extension (via WebSocket on port 8082) + └─ Coda VSCode Extension (via WebSocket; port discovered through agent-server) └─ VSCode APIs (theme, editor, files, terminal, etc.) ``` diff --git a/ts/packages/commandExecutor/VSCODE_CAPABILITIES.md b/ts/packages/commandExecutor/VSCODE_CAPABILITIES.md index ecb97378cd..d50a6acfc0 100644 --- a/ts/packages/commandExecutor/VSCODE_CAPABILITIES.md +++ b/ts/packages/commandExecutor/VSCODE_CAPABILITIES.md @@ -10,7 +10,7 @@ The Command Executor MCP server can control VSCode through the Coda extension. B ``` User → Claude Code → execute_command MCP tool → → TypeAgent Dispatcher → - → Coda Extension (WebSocket on port 8082) → + → Coda Extension (WebSocket; port discovered through agent-server) → → VSCode APIs ``` @@ -295,7 +295,7 @@ To use these VSCode capabilities: 2. **Coda Extension** must be installed and activated in VSCode: - Published as `aisystems.copilot-coda` - - Auto-connects to dispatcher on port 8082 + - Discovers the code agent's WebSocket port via the agent-server (default `ws://localhost:8999`) 3. **Command Executor MCP Server** configured in `.mcp.json`: ```json diff --git a/ts/packages/shell/demo/DEMO_SETUP.md b/ts/packages/shell/demo/DEMO_SETUP.md index 21ccc42683..6e5dc56f58 100644 --- a/ts/packages/shell/demo/DEMO_SETUP.md +++ b/ts/packages/shell/demo/DEMO_SETUP.md @@ -31,8 +31,9 @@ Reinstall vscode-shell after any changes to `ts/packages/vscode-shell` (includin ## 2. Processes that must be running before you start 1. **Agent server** — `pnpm --filter agent-server start` (listens on `ws://localhost:8999`). -2. **Code agent's WebSocket** on port `8082` — started automatically when the - `code` schema is enabled in your session, and consumed by the coda extension. +2. **Code agent's WebSocket** — started automatically when the `code` schema + is enabled in your session (binds an OS-assigned port). The coda extension + discovers it through the agent-server's discovery channel. 3. **VS Code** open, with the **TypeAgent activity-bar icon visible** (vscode-shell installed). Do **not** click the icon until Part 1 hands off. 4. **gh CLI authenticated** — `gh auth status` should be green. Set the default @@ -94,7 +95,7 @@ listener installed by the webview. | `show check runs for that PR` runs `gh run view ` and 404s | Reasoning maps "for that PR" to a workflow-run id, and there's no conversation memory of the prior PR | Use the explicit phrasing `show check runs for PR N [in OWNER/REPO]` (now backed by `gh pr checks`) | | `create scratch.ts` returns "Unknown action name: code.code-general" | Reasoning hallucinates a sub-schema name as an action name | Rephrase as `create a typescript file scratch.ts with a hello world function` (drop "called" and "new"); long-term tracked in plan.md | | `splitEditor` returns "Did not handle the action" | Installed coda is older than `ts/packages/coda` source | Rebuild & reinstall coda | -| `splitEditor` returns "No websocket connection" | Code agent WS server isn't up in this session | `@config schema code on` (then verify port 8082 is listening) | +| `splitEditor` returns "No websocket connection" | Code agent WS server isn't up in this session | `@config schema code on` (the agent-server's discovery channel will then publish the dynamic port to the coda extension) | | `create scratch.ts` returns "Integration ... not found" | Onboarding agent grabbed the request | `@config schema onboarding off` | | `change my color theme to ...` toggles the title-bar instead of the theme | Desktop agent grabbed the request because the prompt didn't say "vscode" | Use the exact wording in the demo file | | "remind me which PR we were just looking at" hallucinates PR #123 / `example/repo` | Reasoning has no conversation memory | `@config request reasoning copilot` | From ce2ff1cb4fe1cdd3fdc9214b459da7665bc6a47f Mon Sep 17 00:00:00 2001 From: Tal Zaccai Date: Wed, 13 May 2026 14:58:42 -0700 Subject: [PATCH 3/5] code agent: clarify port-resolution lifecycle and bind-failure semantics - Extract getKnownCodePort() helper with a comment explaining the two-phase lifecycle (live bound port after start; static env-var prediction before start). The previous inline `getSharedCodePort() ?? resolveCodePortOverride(...)` read like a fallback when it's actually 'live if known, else predicted'. - Tighten getCodeBindPort doc: explicitly call out that EADDRINUSE on the requested port is a hard failure (no silent fallback to OS-assigned), since the user explicitly asked for that port. The previous 'falling back to OS-assigned port' wording in the malformed-input branch could mislead readers into thinking bind failures also fall back, which they don't. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../agents/code/src/codeActionHandler.ts | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/ts/packages/agents/code/src/codeActionHandler.ts b/ts/packages/agents/code/src/codeActionHandler.ts index 64dc0293e7..e83d39e37e 100644 --- a/ts/packages/agents/code/src/codeActionHandler.ts +++ b/ts/packages/agents/code/src/codeActionHandler.ts @@ -72,16 +72,11 @@ export function instantiate(): AppAgent { setup: (actionContext) => { const ctx = (actionContext as ActionContext) .sessionContext.agentContext; - // Prefer the actual bound port (set once updateAgentContext has - // brought up the shared server); fall back to the static - // override for messaging on the first probe. - const port = - getSharedCodePort() ?? resolveCodePortOverride(process.env); return setupCode( actionContext, ctx.choiceManager, () => ctx.webSocketServer?.isConnected() === true, - port, + getKnownCodePort(), ); }, handleChoice: async (choiceId, response, context) => { @@ -146,21 +141,40 @@ async function checkCodeReadiness( return evaluateCodeReadiness({ clientConnected, vsCodeCliInstalled, - port: getSharedCodePort() ?? resolveCodePortOverride(process.env), + port: getKnownCodePort(), }); } +// Returns the port the code agent's WS server is/will be reachable on, +// for display in readiness/setup messaging. Two phases: +// - After bind: `getSharedCodePort()` returns the actual bound port +// (OS-assigned by default, or `CODE_WEBSOCKET_PORT` if set; either way, +// this is the authoritative answer). +// - Before bind: no live port exists, so we fall back to the static +// prediction from `CODE_WEBSOCKET_PORT` if set, else `undefined` +// (the UI shows "port unknown until bind"). +function getKnownCodePort(): number | undefined { + return getSharedCodePort() ?? resolveCodePortOverride(process.env); +} + // Bind hint for the shared server. Returns the explicit override if // CODE_WEBSOCKET_PORT is set (useful for pinning the port when debugging); // otherwise 0 so the OS picks a free port and the registrar/discovery // channel publishes it. +// +// Note: we only validate the *shape* of the env var here (numeric, >= 0). +// If the caller asks for a specific port and the OS can't bind it +// (EADDRINUSE), `CodeAgentWebSocketServer.start()` rejects with that error +// and the schema-enable fails loudly — we deliberately do NOT silently +// fall back to an OS-assigned port, since the user explicitly asked for +// a specific one. function getCodeBindPort(): number { const raw = process.env["CODE_WEBSOCKET_PORT"]; if (raw === undefined) return 0; const n = parseInt(raw, 10); if (!Number.isFinite(n) || n < 0) { debug( - `Ignoring invalid CODE_WEBSOCKET_PORT=${raw}; falling back to OS-assigned port`, + `Ignoring malformed CODE_WEBSOCKET_PORT=${raw}; using OS-assigned port instead`, ); return 0; } From a0c62b8a55e3aa7a8d54b46353c1653575718018 Mon Sep 17 00:00:00 2001 From: Tal Zaccai Date: Wed, 13 May 2026 15:08:20 -0700 Subject: [PATCH 4/5] address Copilot review: avoid hung promise in createWebSocket; real test cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit createWebSocket previously used an `async` Promise executor (`new Promise(async (resolve) => ...`). If `resolveCodeEndpoint()` rejects or `new WebSocket(endpoint)` throws synchronously (e.g. an invalid `CODE_WEBSOCKET_HOST` override), the rejection lands on the executor's implicit promise rather than the outer one — leaving createWebSocket pending forever and stalling the reconnect loop. Refactor to do the awaits at the top level and try/catch around `new WebSocket()`. codeUpdateContext.spec.ts: replace the placeholder afterEach with a real teardown that tracks every StubContext newSession() hands out and force-disables any still-enabled session. Asserts the shared server is down after cleanup so a leak surfaces loudly instead of contaminating later tests. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../code/test/codeUpdateContext.spec.ts | 39 +++++++++++++++---- ts/packages/coda/src/webSocket.ts | 37 +++++++++++------- 2 files changed, 55 insertions(+), 21 deletions(-) diff --git a/ts/packages/agents/code/test/codeUpdateContext.spec.ts b/ts/packages/agents/code/test/codeUpdateContext.spec.ts index c4f56f50a8..56d0e69f79 100644 --- a/ts/packages/agents/code/test/codeUpdateContext.spec.ts +++ b/ts/packages/agents/code/test/codeUpdateContext.spec.ts @@ -63,19 +63,44 @@ function makeStubContext(agentContext: any): StubContext { describe("code agent shared server + per-session registration", () => { const agent = instantiate(); + // Track all sessions created during a test so afterEach can defensively + // tear down anything a test left enabled (e.g., on assertion failure + // before the test's own try/finally cleanup runs). Without this, a + // surviving shared server contaminates later tests. + const liveSessions = new Set(); async function newSession(): Promise { const agentContext = await agent.initializeAgentContext!(); - return makeStubContext(agentContext); + const ctx = makeStubContext(agentContext); + liveSessions.add(ctx); + return ctx; } afterEach(async () => { - // Defensive: if a test left the shared server up, calling - // updateAgentContext(false, ...) on a synthetic session won't - // close it (refcount stays positive). Reach into the module's - // close path by exhausting any sessions via the public API — - // but since we control all stubs in each test, the per-test - // cleanup handles it. Leaving this hook as a placeholder. + // Disable any session still enabled from the test body, then drop + // it from the live set. updateAgentContext(false, ...) is idempotent + // and refcount-safe, so it's fine to call on sessions whose + // try/finally already ran. + for (const ctx of liveSessions) { + try { + await agent.updateAgentContext!( + false, + ctx.sessionContext, + "code", + ); + } catch { + // Best-effort cleanup; swallow so other sessions still tear down. + } + } + liveSessions.clear(); + // Sanity: the shared server should now be down. If a test left it up + // for a reason we don't model, surface it loudly so we don't silently + // leak across tests. + if (getSharedCodePort() !== undefined) { + throw new Error( + "shared WebSocket server still bound after afterEach cleanup", + ); + } }); test("enable on a fresh session binds the shared server and registers under (code, default)", async () => { diff --git a/ts/packages/coda/src/webSocket.ts b/ts/packages/coda/src/webSocket.ts index 2b0894b897..ba8e488edf 100644 --- a/ts/packages/coda/src/webSocket.ts +++ b/ts/packages/coda/src/webSocket.ts @@ -53,30 +53,39 @@ export async function createWebSocket( channel: string, role: string, clientId?: string, -) { - return new Promise(async (resolve) => { +): Promise { + let endpoint: string; + try { const base = await resolveCodeEndpoint(); - if (!base) { - resolve(undefined); - return; - } - let endpoint = `${base}?channel=${channel}&role=${role}`; + if (!base) return undefined; + endpoint = `${base}?channel=${channel}&role=${role}`; if (clientId) { endpoint += `&clientId=${clientId}`; } - - const webSocket = new WebSocket(endpoint); - - webSocket.onopen = (event: object) => { + } catch (error) { + console.error("Error resolving code agent endpoint:", error); + return undefined; + } + return new Promise((resolve) => { + let webSocket: WebSocket; + try { + webSocket = new WebSocket(endpoint); + } catch (error) { + // e.g. invalid URL from CODE_WEBSOCKET_HOST override. + console.error("Error constructing WebSocket:", error); + resolve(undefined); + return; + } + webSocket.onopen = () => { console.log("websocket open"); resolve(webSocket); }; - webSocket.onmessage = (event: object) => {}; - webSocket.onclose = (event: object) => { + webSocket.onmessage = () => {}; + webSocket.onclose = () => { console.log("websocket connection closed"); resolve(undefined); }; - webSocket.onerror = (event: object) => { + webSocket.onerror = () => { console.error("websocket error"); resolve(undefined); }; From a20e81357e86522ca097c4d574fe72a3213960fb Mon Sep 17 00:00:00 2001 From: Tal Zaccai Date: Thu, 14 May 2026 13:57:26 -0700 Subject: [PATCH 5/5] address review: log when CODE_WEBSOCKET_PORT override is active or ignored Per @robgruen review feedback on resolveCodePortOverride: surface a debug log when the env-var override is in effect (so a user wondering why the port isn't OS-assigned can confirm), and another when an invalid value is ignored (so a typo doesn't silently fall back to OS-assigned without any diagnostic). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ts/packages/agents/code/src/readiness.ts | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/ts/packages/agents/code/src/readiness.ts b/ts/packages/agents/code/src/readiness.ts index ad57cb676f..2395c634a1 100644 --- a/ts/packages/agents/code/src/readiness.ts +++ b/ts/packages/agents/code/src/readiness.ts @@ -9,6 +9,7 @@ // server — not that we can dial out to a port. import { spawn } from "child_process"; +import registerDebug from "debug"; import { ActionContext, ActionResult, @@ -21,6 +22,8 @@ import { createYesNoChoiceResult, } from "@typeagent/agent-sdk/helpers/action"; +const debug = registerDebug("typeagent:code"); + // HH:MM timestamp prefix for status updates — same convention used by // screencapture / desktop / calendar so progress reads consistently. function ts(): string { @@ -120,7 +123,16 @@ export function resolveCodePortOverride( const raw = env.CODE_WEBSOCKET_PORT; if (raw === undefined) return undefined; const n = parseInt(raw, 10); - if (Number.isFinite(n) && n > 0) return n; + if (Number.isFinite(n) && n > 0) { + debug( + `CODE_WEBSOCKET_PORT override active: using port ${n} (set in environment)`, + ); + return n; + } + debug( + `CODE_WEBSOCKET_PORT override ignored: invalid value %o; falling back to OS-assigned port`, + raw, + ); return undefined; }