From 5198a9fc1102d72bed354754c93f300216045e98 Mon Sep 17 00:00:00 2001 From: MK Date: Mon, 18 May 2026 16:44:51 -0400 Subject: [PATCH] fix(windows): shared process.IsAlive for forge-cli and forge-ui (closes #59) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit forge-ui's pidAlive used os.Process.Signal(syscall.Signal(0)) — the standard Unix idiom for "is this PID alive?". On Windows, Go's stdlib only translates os.Interrupt and os.Kill to platform calls; any other signal returns "operating system does not support signal". So pidAlive returned false on Windows regardless of whether the PID was alive, breaking `forge ui` startup detection: - waitForPort (forge-ui/process.go) fast-fails at the pidAlive check instead of looping until the TCP port responds. The UI shows "agent failed to start" and surfaces the daemon's normal startup banner (which by then is in serve.log) as the supposed error. - Discovery (forge-ui/discovery.go:133, now 134) thinks the daemon is dead and deletes .forge/serve.json — actively orphaning a still-running daemon from the dashboard on every page load. forge-cli already knew about this — serve_windows.go had a working OpenProcess-based isProcessAlive. The duplication is what caused the forge-ui side to miss it. Option A from the issue: extract a single platform-split helper in forge-core/util/process and delete the duplicates. - forge-core/util/process/process_{unix,windows}.go — IsAlive(pid). Unix: FindProcess + Signal(0). Windows: OpenProcess with PROCESS_QUERY_LIMITED_INFORMATION + CloseHandle. - forge-core/util/process/process_test.go — cross-platform tests: IsAlive(self) is true; IsAlive(spawned-then-waited subprocess pid) is false. The subprocess approach is deterministic on both Unix and Windows; picking arbitrary high PIDs is not. - forge-cli/cmd/serve_{unix,windows}.go — delete isProcessAlive, keep only the daemon-attr and signal helpers. - forge-cli/cmd/serve.go — switch to process.IsAlive (2 call sites). - forge-ui/process.go — delete pidAlive; switch waitForPort to process.IsAlive. Drop the now-unused syscall import. - forge-ui/discovery.go — switch to process.IsAlive. There is now exactly one implementation of "is this PID alive" in the tree, build-tag-split, consumed by both forge-cli and forge-ui. Verified: gofmt clean; golangci-lint 0 issues across all four modules; GOOS=windows go build succeeds for forge-core, forge-cli, forge-ui; native tests pass. --- forge-cli/cmd/serve.go | 5 ++- forge-cli/cmd/serve_unix.go | 8 ---- forge-cli/cmd/serve_windows.go | 10 ----- forge-core/util/process/process_test.go | 45 ++++++++++++++++++++++ forge-core/util/process/process_unix.go | 30 +++++++++++++++ forge-core/util/process/process_windows.go | 39 +++++++++++++++++++ forge-ui/discovery.go | 3 +- forge-ui/process.go | 14 ++----- 8 files changed, 122 insertions(+), 32 deletions(-) create mode 100644 forge-core/util/process/process_test.go create mode 100644 forge-core/util/process/process_unix.go create mode 100644 forge-core/util/process/process_windows.go diff --git a/forge-cli/cmd/serve.go b/forge-cli/cmd/serve.go index 036a876..1cdecd7 100644 --- a/forge-cli/cmd/serve.go +++ b/forge-cli/cmd/serve.go @@ -12,6 +12,7 @@ import ( "strings" "time" + "github.com/initializ/forge/forge-core/util/process" "github.com/spf13/cobra" ) @@ -141,7 +142,7 @@ func readDaemonState(path string) (daemonState, bool) { return daemonState{}, false } - if state.PID <= 0 || !isProcessAlive(state.PID) { + if state.PID <= 0 || !process.IsAlive(state.PID) { return daemonState{}, false } @@ -301,7 +302,7 @@ func serveStopRun(cmd *cobra.Command, args []string) error { // Poll for exit (up to 10 seconds) deadline := time.Now().Add(10 * time.Second) for time.Now().Before(deadline) { - if !isProcessAlive(state.PID) { + if !process.IsAlive(state.PID) { os.Remove(statePath) //nolint:errcheck fmt.Fprintln(os.Stderr, "Daemon stopped.") return nil diff --git a/forge-cli/cmd/serve_unix.go b/forge-cli/cmd/serve_unix.go index 4a92ec0..bbc314a 100644 --- a/forge-cli/cmd/serve_unix.go +++ b/forge-cli/cmd/serve_unix.go @@ -11,14 +11,6 @@ func daemonSysProcAttr() *syscall.SysProcAttr { return &syscall.SysProcAttr{Setsid: true} } -func isProcessAlive(pid int) bool { - proc, err := os.FindProcess(pid) - if err != nil { - return false - } - return proc.Signal(syscall.Signal(0)) == nil -} - func sendTermSignal(proc *os.Process) error { return proc.Signal(syscall.SIGTERM) } diff --git a/forge-cli/cmd/serve_windows.go b/forge-cli/cmd/serve_windows.go index 31ae852..e7c24e1 100644 --- a/forge-cli/cmd/serve_windows.go +++ b/forge-cli/cmd/serve_windows.go @@ -11,16 +11,6 @@ func daemonSysProcAttr() *syscall.SysProcAttr { return &syscall.SysProcAttr{CreationFlags: 0x00000008} // DETACHED_PROCESS } -func isProcessAlive(pid int) bool { - const processQueryLimitedInfo = 0x1000 - h, err := syscall.OpenProcess(processQueryLimitedInfo, false, uint32(pid)) - if err != nil { - return false - } - _ = syscall.CloseHandle(h) - return true -} - func sendTermSignal(proc *os.Process) error { // Windows has no SIGTERM; terminate the process directly. return proc.Kill() diff --git a/forge-core/util/process/process_test.go b/forge-core/util/process/process_test.go new file mode 100644 index 0000000..488668d --- /dev/null +++ b/forge-core/util/process/process_test.go @@ -0,0 +1,45 @@ +package process + +import ( + "os" + "os/exec" + "runtime" + "testing" + "time" +) + +func TestIsAlive_Self(t *testing.T) { + if !IsAlive(os.Getpid()) { + t.Errorf("IsAlive(self=%d) = false, want true", os.Getpid()) + } +} + +// TestIsAlive_ChildAfterExit spawns a short-lived subprocess, waits for it +// to exit cleanly, and asserts IsAlive returns false for its PID. This is +// the deterministic way to exercise the "process is gone" branch on both +// platforms — picking a "high PID that probably isn't real" is unreliable. +func TestIsAlive_ChildAfterExit(t *testing.T) { + var cmd *exec.Cmd + if runtime.GOOS == "windows" { + cmd = exec.Command("cmd", "/c", "exit", "0") + } else { + cmd = exec.Command("true") + } + if err := cmd.Start(); err != nil { + t.Fatalf("starting subprocess: %v", err) + } + pid := cmd.Process.Pid + if err := cmd.Wait(); err != nil { + t.Fatalf("waiting for subprocess: %v", err) + } + // Give the OS a beat to actually reap the PID. On most systems this + // is instant, but Windows handle cleanup can briefly lag. + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if !IsAlive(pid) { + return + } + time.Sleep(50 * time.Millisecond) + } + t.Errorf("IsAlive(pid=%d) = true after Wait() returned, want false", pid) +} diff --git a/forge-core/util/process/process_unix.go b/forge-core/util/process/process_unix.go new file mode 100644 index 0000000..3de120a --- /dev/null +++ b/forge-core/util/process/process_unix.go @@ -0,0 +1,30 @@ +//go:build !windows + +// Package process provides small, platform-aware utilities for interacting +// with OS processes by PID. Today it ships a single function — IsAlive — +// because the same Unix idiom (Signal(0)) silently fails on Windows and was +// duplicated across forge-cli and forge-ui with the Windows variant missing. +// See issue #59. +package process + +import ( + "os" + "syscall" +) + +// IsAlive reports whether a process with the given PID is currently running. +// +// On Unix, this calls FindProcess and sends a null signal (signal 0), which +// the kernel treats as a permission/existence check without delivering an +// actual signal. The OS returns success when the process exists, even if the +// caller lacks permission to signal it. +// +// Any error from either step is treated as "not alive" so callers can use a +// simple bool branch. +func IsAlive(pid int) bool { + proc, err := os.FindProcess(pid) + if err != nil { + return false + } + return proc.Signal(syscall.Signal(0)) == nil +} diff --git a/forge-core/util/process/process_windows.go b/forge-core/util/process/process_windows.go new file mode 100644 index 0000000..91c575f --- /dev/null +++ b/forge-core/util/process/process_windows.go @@ -0,0 +1,39 @@ +//go:build windows + +// Package process provides small, platform-aware utilities for interacting +// with OS processes by PID. See process_unix.go for the package overview. +package process + +import "syscall" + +// processQueryLimitedInfo is Windows' lightweight process access right +// (no PROCESS_VM_READ, etc.) — enough to call GetProcessTimes / +// QueryFullProcessImageName / etc. without elevated privileges. Granting +// PROCESS_QUERY_INFORMATION instead would also work but requires more +// rights than this check needs. +const processQueryLimitedInfo = 0x1000 + +// IsAlive reports whether a process with the given PID is currently running. +// +// On Windows, the Unix idiom os.Process.Signal(syscall.Signal(0)) does not +// work — Go's stdlib only knows how to translate os.Interrupt and os.Kill +// for Windows processes, so any other signal returns +// "operating system does not support signal". This always-error response +// makes Signal(0) useless as a liveness probe on Windows. +// +// Instead, open the process handle with PROCESS_QUERY_LIMITED_INFORMATION +// rights. OpenProcess fails only when the PID doesn't exist or the caller +// lacks rights even for the limited-info subset; both cases reasonably map +// to "not alive" for our use case (the forge daemon is the caller's child, +// so it always has rights to its own PID). +// +// The handle is closed immediately — we only care that OpenProcess +// succeeded, not what the handle exposes. +func IsAlive(pid int) bool { + h, err := syscall.OpenProcess(processQueryLimitedInfo, false, uint32(pid)) + if err != nil { + return false + } + _ = syscall.CloseHandle(h) + return true +} diff --git a/forge-ui/discovery.go b/forge-ui/discovery.go index c5b608b..20b7753 100644 --- a/forge-ui/discovery.go +++ b/forge-ui/discovery.go @@ -10,6 +10,7 @@ import ( "time" "github.com/initializ/forge/forge-core/types" + "github.com/initializ/forge/forge-core/util/process" ) // externalDaemonState mirrors the daemonState written by `forge serve start`. @@ -130,7 +131,7 @@ func detectExternalAgent(dir string) (int, bool) { } // Check PID liveness first — if the process is dead, serve.json is stale. - if state.PID > 0 && !pidAlive(state.PID) { + if state.PID > 0 && !process.IsAlive(state.PID) { _ = os.Remove(statePath) return 0, false } diff --git a/forge-ui/process.go b/forge-ui/process.go index 7e8be88..d828d9e 100644 --- a/forge-ui/process.go +++ b/forge-ui/process.go @@ -11,8 +11,9 @@ import ( "strconv" "strings" "sync" - "syscall" "time" + + "github.com/initializ/forge/forge-core/util/process" ) // PortAllocator manages port assignment for agent processes. @@ -171,7 +172,7 @@ func (pm *ProcessManager) waitForPort(agentDir string, port int) bool { for time.Now().Before(deadline) { // Fast-fail: if the child process already exited, don't keep polling. - if pid > 0 && !pidAlive(pid) { + if pid > 0 && !process.IsAlive(pid) { return false } @@ -201,15 +202,6 @@ func (pm *ProcessManager) readServePID(agentDir string) int { return state.PID } -// pidAlive checks if a process with the given PID is still running. -func pidAlive(pid int) bool { - proc, err := os.FindProcess(pid) - if err != nil { - return false - } - return proc.Signal(syscall.Signal(0)) == nil -} - // readServeLogs reads .forge/serve.log and extracts the error message. // It looks for cobra "Error:" lines first, then falls back to the last // few non-empty lines. This avoids returning cobra usage/help text that