diff --git a/forge-cli/cmd/serve.go b/forge-cli/cmd/serve.go index 036a876..1cdecd7 100644 --- a/forge-cli/cmd/serve.go +++ b/forge-cli/cmd/serve.go @@ -12,6 +12,7 @@ import ( "strings" "time" + "github.com/initializ/forge/forge-core/util/process" "github.com/spf13/cobra" ) @@ -141,7 +142,7 @@ func readDaemonState(path string) (daemonState, bool) { return daemonState{}, false } - if state.PID <= 0 || !isProcessAlive(state.PID) { + if state.PID <= 0 || !process.IsAlive(state.PID) { return daemonState{}, false } @@ -301,7 +302,7 @@ func serveStopRun(cmd *cobra.Command, args []string) error { // Poll for exit (up to 10 seconds) deadline := time.Now().Add(10 * time.Second) for time.Now().Before(deadline) { - if !isProcessAlive(state.PID) { + if !process.IsAlive(state.PID) { os.Remove(statePath) //nolint:errcheck fmt.Fprintln(os.Stderr, "Daemon stopped.") return nil diff --git a/forge-cli/cmd/serve_unix.go b/forge-cli/cmd/serve_unix.go index 4a92ec0..bbc314a 100644 --- a/forge-cli/cmd/serve_unix.go +++ b/forge-cli/cmd/serve_unix.go @@ -11,14 +11,6 @@ func daemonSysProcAttr() *syscall.SysProcAttr { return &syscall.SysProcAttr{Setsid: true} } -func isProcessAlive(pid int) bool { - proc, err := os.FindProcess(pid) - if err != nil { - return false - } - return proc.Signal(syscall.Signal(0)) == nil -} - func sendTermSignal(proc *os.Process) error { return proc.Signal(syscall.SIGTERM) } diff --git a/forge-cli/cmd/serve_windows.go b/forge-cli/cmd/serve_windows.go index 31ae852..e7c24e1 100644 --- a/forge-cli/cmd/serve_windows.go +++ b/forge-cli/cmd/serve_windows.go @@ -11,16 +11,6 @@ func daemonSysProcAttr() *syscall.SysProcAttr { return &syscall.SysProcAttr{CreationFlags: 0x00000008} // DETACHED_PROCESS } -func isProcessAlive(pid int) bool { - const processQueryLimitedInfo = 0x1000 - h, err := syscall.OpenProcess(processQueryLimitedInfo, false, uint32(pid)) - if err != nil { - return false - } - _ = syscall.CloseHandle(h) - return true -} - func sendTermSignal(proc *os.Process) error { // Windows has no SIGTERM; terminate the process directly. return proc.Kill() diff --git a/forge-core/util/process/process_test.go b/forge-core/util/process/process_test.go new file mode 100644 index 0000000..488668d --- /dev/null +++ b/forge-core/util/process/process_test.go @@ -0,0 +1,45 @@ +package process + +import ( + "os" + "os/exec" + "runtime" + "testing" + "time" +) + +func TestIsAlive_Self(t *testing.T) { + if !IsAlive(os.Getpid()) { + t.Errorf("IsAlive(self=%d) = false, want true", os.Getpid()) + } +} + +// TestIsAlive_ChildAfterExit spawns a short-lived subprocess, waits for it +// to exit cleanly, and asserts IsAlive returns false for its PID. This is +// the deterministic way to exercise the "process is gone" branch on both +// platforms — picking a "high PID that probably isn't real" is unreliable. +func TestIsAlive_ChildAfterExit(t *testing.T) { + var cmd *exec.Cmd + if runtime.GOOS == "windows" { + cmd = exec.Command("cmd", "/c", "exit", "0") + } else { + cmd = exec.Command("true") + } + if err := cmd.Start(); err != nil { + t.Fatalf("starting subprocess: %v", err) + } + pid := cmd.Process.Pid + if err := cmd.Wait(); err != nil { + t.Fatalf("waiting for subprocess: %v", err) + } + // Give the OS a beat to actually reap the PID. On most systems this + // is instant, but Windows handle cleanup can briefly lag. + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if !IsAlive(pid) { + return + } + time.Sleep(50 * time.Millisecond) + } + t.Errorf("IsAlive(pid=%d) = true after Wait() returned, want false", pid) +} diff --git a/forge-core/util/process/process_unix.go b/forge-core/util/process/process_unix.go new file mode 100644 index 0000000..3de120a --- /dev/null +++ b/forge-core/util/process/process_unix.go @@ -0,0 +1,30 @@ +//go:build !windows + +// Package process provides small, platform-aware utilities for interacting +// with OS processes by PID. Today it ships a single function — IsAlive — +// because the same Unix idiom (Signal(0)) silently fails on Windows and was +// duplicated across forge-cli and forge-ui with the Windows variant missing. +// See issue #59. +package process + +import ( + "os" + "syscall" +) + +// IsAlive reports whether a process with the given PID is currently running. +// +// On Unix, this calls FindProcess and sends a null signal (signal 0), which +// the kernel treats as a permission/existence check without delivering an +// actual signal. The OS returns success when the process exists, even if the +// caller lacks permission to signal it. +// +// Any error from either step is treated as "not alive" so callers can use a +// simple bool branch. +func IsAlive(pid int) bool { + proc, err := os.FindProcess(pid) + if err != nil { + return false + } + return proc.Signal(syscall.Signal(0)) == nil +} diff --git a/forge-core/util/process/process_windows.go b/forge-core/util/process/process_windows.go new file mode 100644 index 0000000..91c575f --- /dev/null +++ b/forge-core/util/process/process_windows.go @@ -0,0 +1,39 @@ +//go:build windows + +// Package process provides small, platform-aware utilities for interacting +// with OS processes by PID. See process_unix.go for the package overview. +package process + +import "syscall" + +// processQueryLimitedInfo is Windows' lightweight process access right +// (no PROCESS_VM_READ, etc.) — enough to call GetProcessTimes / +// QueryFullProcessImageName / etc. without elevated privileges. Granting +// PROCESS_QUERY_INFORMATION instead would also work but requires more +// rights than this check needs. +const processQueryLimitedInfo = 0x1000 + +// IsAlive reports whether a process with the given PID is currently running. +// +// On Windows, the Unix idiom os.Process.Signal(syscall.Signal(0)) does not +// work — Go's stdlib only knows how to translate os.Interrupt and os.Kill +// for Windows processes, so any other signal returns +// "operating system does not support signal". This always-error response +// makes Signal(0) useless as a liveness probe on Windows. +// +// Instead, open the process handle with PROCESS_QUERY_LIMITED_INFORMATION +// rights. OpenProcess fails only when the PID doesn't exist or the caller +// lacks rights even for the limited-info subset; both cases reasonably map +// to "not alive" for our use case (the forge daemon is the caller's child, +// so it always has rights to its own PID). +// +// The handle is closed immediately — we only care that OpenProcess +// succeeded, not what the handle exposes. +func IsAlive(pid int) bool { + h, err := syscall.OpenProcess(processQueryLimitedInfo, false, uint32(pid)) + if err != nil { + return false + } + _ = syscall.CloseHandle(h) + return true +} diff --git a/forge-ui/discovery.go b/forge-ui/discovery.go index c5b608b..20b7753 100644 --- a/forge-ui/discovery.go +++ b/forge-ui/discovery.go @@ -10,6 +10,7 @@ import ( "time" "github.com/initializ/forge/forge-core/types" + "github.com/initializ/forge/forge-core/util/process" ) // externalDaemonState mirrors the daemonState written by `forge serve start`. @@ -130,7 +131,7 @@ func detectExternalAgent(dir string) (int, bool) { } // Check PID liveness first — if the process is dead, serve.json is stale. - if state.PID > 0 && !pidAlive(state.PID) { + if state.PID > 0 && !process.IsAlive(state.PID) { _ = os.Remove(statePath) return 0, false } diff --git a/forge-ui/process.go b/forge-ui/process.go index 7e8be88..d828d9e 100644 --- a/forge-ui/process.go +++ b/forge-ui/process.go @@ -11,8 +11,9 @@ import ( "strconv" "strings" "sync" - "syscall" "time" + + "github.com/initializ/forge/forge-core/util/process" ) // PortAllocator manages port assignment for agent processes. @@ -171,7 +172,7 @@ func (pm *ProcessManager) waitForPort(agentDir string, port int) bool { for time.Now().Before(deadline) { // Fast-fail: if the child process already exited, don't keep polling. - if pid > 0 && !pidAlive(pid) { + if pid > 0 && !process.IsAlive(pid) { return false } @@ -201,15 +202,6 @@ func (pm *ProcessManager) readServePID(agentDir string) int { return state.PID } -// pidAlive checks if a process with the given PID is still running. -func pidAlive(pid int) bool { - proc, err := os.FindProcess(pid) - if err != nil { - return false - } - return proc.Signal(syscall.Signal(0)) == nil -} - // readServeLogs reads .forge/serve.log and extracts the error message. // It looks for cobra "Error:" lines first, then falls back to the last // few non-empty lines. This avoids returning cobra usage/help text that