From da97dca6d5266537708add85fd07677d2fb7f878 Mon Sep 17 00:00:00 2001 From: MK Date: Mon, 18 May 2026 13:15:51 -0400 Subject: [PATCH 1/2] fix(secrets): skip unloadable encrypted-file providers (closes #52) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OverlaySecretsToEnv and Runner.buildSecretProvider previously appended $HOME/.forge/secrets.enc to the provider chain unconditionally. When that global file used a different passphrase than the project's local file, ChainProvider.Get and ChainProvider.List propagated the decrypt error and poisoned the entire chain — silently hiding every non-builtin key the local file declared (the failure mode #51 exposed). Add viableEncryptedFileProviders(workDir, passCb, warnFn) which builds the candidate list of agent-local and global encrypted-file providers and filters out the unviable ones: - Missing file: silently skipped (common case — user never ran `forge secret set --global`). - Decryption fails (wrong passphrase, corruption): warning emitted via warnFn, provider dropped from the chain. - Decryption succeeds: provider admitted. List() during validation populates EncryptedFileProvider.cache so the subsequent secretOverlayKeys + Get path reuses the work with no extra Argon2id. ChainProvider semantics are unchanged — it stays strict on non-NotFound errors. The fix is at the chain-construction site: we don't admit providers we already know will fail. Both OverlaySecretsToEnv (pre-runner, no structured logger available) and Runner.buildSecretProvider (uses r.logger.Warn) now call the shared helper. stderrWarn provides the pre-runner sink with a "forge: ..." prefix matching other early-startup messages. Tests: - TestOverlaySecretsToEnv_StaleGlobalFileDoesNotPoisonChain: hermetic end-to-end with a temp HOME containing a deliberately-bad global file; asserts the local file's custom key still overlays. - TestViableEncryptedFileProviders_Categorization: covers admit/drop+warn/silent-skip outcomes. - TestViableEncryptedFileProviders_MissingGlobalIsSilent: missing global must not produce warning noise. - TestOverlaySecretsToEnv_CustomSkillKey (the #48 regression) no longer needs t.Setenv("HOME", t.TempDir()) — the fix handles real-world stale globals automatically. Confirmed against the actual machine's ~/.forge/secrets.enc (mismatched passphrase) where the warning fires and the test still passes. --- forge-cli/runtime/runner.go | 86 +++++++++++---- forge-cli/runtime/runner_secrets_test.go | 133 ++++++++++++++++++++++- 2 files changed, 194 insertions(+), 25 deletions(-) diff --git a/forge-cli/runtime/runner.go b/forge-cli/runtime/runner.go index 3bd035d..de974df 100644 --- a/forge-cli/runtime/runner.go +++ b/forge-cli/runtime/runner.go @@ -2480,16 +2480,7 @@ func (r *Runner) buildSecretProvider() secrets.Provider { case "env": providers = append(providers, secrets.NewEnvProvider("")) case "encrypted-file": - // Agent-local secrets file (in agent workdir) - localPath := filepath.Join(r.cfg.WorkDir, ".forge", "secrets.enc") - providers = append(providers, secrets.NewEncryptedFileProvider(localPath, passCb)) - - // Global fallback secrets file - home, err := os.UserHomeDir() - if err == nil { - globalPath := filepath.Join(home, ".forge", "secrets.enc") - providers = append(providers, secrets.NewEncryptedFileProvider(globalPath, passCb)) - } + providers = append(providers, viableEncryptedFileProviders(r.cfg.WorkDir, passCb, r.logger.Warn)...) default: r.logger.Warn("unknown secret provider, skipping", map[string]any{"provider": name}) } @@ -2504,10 +2495,60 @@ func (r *Runner) buildSecretProvider() secrets.Provider { return secrets.NewChainProvider(providers...) } +// viableEncryptedFileProviders returns the agent-local and global +// encrypted-file providers that pass an eager-load check. Files that don't +// exist are silently skipped (the common case: the operator never ran +// `forge secret set --global`). Files that fail to decrypt (wrong passphrase, +// corruption) emit a warning via warnFn and are dropped from the chain — so a +// stale global file with a different passphrase cannot poison subsequent +// ChainProvider.Get/List calls once admitted to the chain. +// +// The returned providers retain their decrypted cache (EncryptedFileProvider +// flags `loaded = true` after a successful List()), so subsequent reads — by +// secretOverlayKeys, by Get on individual keys — reuse the work and don't +// trigger another Argon2id derivation. +// +// warnFn may be nil; in that case decryption failures are silently skipped. +func viableEncryptedFileProviders(workDir string, passCb func() (string, error), warnFn func(msg string, fields map[string]any)) []secrets.Provider { + candidates := []struct{ path, label string }{ + {filepath.Join(workDir, ".forge", "secrets.enc"), "agent-local"}, + } + if home, err := os.UserHomeDir(); err == nil { + candidates = append(candidates, struct{ path, label string }{filepath.Join(home, ".forge", "secrets.enc"), "global"}) + } + + var viable []secrets.Provider + for _, c := range candidates { + if _, err := os.Stat(c.path); os.IsNotExist(err) { + continue + } + provider := secrets.NewEncryptedFileProvider(c.path, passCb) + // Eagerly validate the file can be decrypted. List() runs + // ensureLoaded which performs the decrypt and caches the cleartext + // for later calls. + if _, err := provider.List(); err != nil { + if warnFn != nil { + warnFn("skipping secrets provider that failed to load", map[string]any{ + "path": c.path, + "label": c.label, + "error": err.Error(), + }) + } + continue + } + viable = append(viable, provider) + } + return viable +} + // OverlaySecretsToEnv loads secrets from the config's provider chain and sets // them in the OS environment so that channel adapters (which use os.Getenv) can // access encrypted secrets. Only keys not already set in the env are written. // workDir is the agent directory used to locate agent-local secrets. +// +// Runs before the Runner exists (called from cmd/common.go), so it doesn't +// have access to the structured logger — warnings about unloadable secret +// files go to stderr in the same style as other early-startup messages. func OverlaySecretsToEnv(cfg *types.ForgeConfig, workDir string) { providerNames := cfg.Secrets.Providers if len(providerNames) == 0 { @@ -2520,16 +2561,7 @@ func OverlaySecretsToEnv(cfg *types.ForgeConfig, workDir string) { for _, name := range providerNames { switch name { case "encrypted-file": - // Agent-local secrets file - localPath := filepath.Join(workDir, ".forge", "secrets.enc") - chain = append(chain, secrets.NewEncryptedFileProvider(localPath, passCb)) - - // Global fallback secrets file - home, err := os.UserHomeDir() - if err == nil { - globalPath := filepath.Join(home, ".forge", "secrets.enc") - chain = append(chain, secrets.NewEncryptedFileProvider(globalPath, passCb)) - } + chain = append(chain, viableEncryptedFileProviders(workDir, passCb, stderrWarn)...) case "env": // env provider uses os.Getenv — already available, skip } @@ -2558,6 +2590,20 @@ func OverlaySecretsToEnv(cfg *types.ForgeConfig, workDir string) { } } +// stderrWarn is a warning sink for code paths that run before the structured +// logger is available (e.g. OverlaySecretsToEnv). +func stderrWarn(msg string, fields map[string]any) { + var parts []string + for k, v := range fields { + parts = append(parts, fmt.Sprintf("%s=%v", k, v)) + } + fmt.Fprintf(os.Stderr, " forge: %s", msg) + if len(parts) > 0 { + fmt.Fprintf(os.Stderr, " (%s)", strings.Join(parts, ", ")) + } + fmt.Fprintln(os.Stderr) +} + // writeJSON writes a JSON response with the given status code. func writeJSON(w http.ResponseWriter, status int, v any) { w.Header().Set("Content-Type", "application/json") diff --git a/forge-cli/runtime/runner_secrets_test.go b/forge-cli/runtime/runner_secrets_test.go index 430d5c4..d0df221 100644 --- a/forge-cli/runtime/runner_secrets_test.go +++ b/forge-cli/runtime/runner_secrets_test.go @@ -140,11 +140,10 @@ func TestOverlaySecretsToEnv_CustomSkillKey(t *testing.T) { const builtinVal = "tavily-secret-value" t.Setenv("FORGE_PASSPHRASE", passphrase) - // Redirect HOME so the global ~/.forge/secrets.enc fallback resolves to an - // empty path under the temp dir, isolating the test from the developer's - // real home directory. - t.Setenv("HOME", t.TempDir()) - // Ensure the target env vars are clear before the test runs. + // HOME isolation is no longer required: viableEncryptedFileProviders + // (issue #52) eagerly validates each candidate file and silently skips + // any global ~/.forge/secrets.enc that fails to decrypt with the test + // passphrase. The test now exercises the real chain-build path. t.Setenv(customKey, "") t.Setenv(builtinKey, "") @@ -177,3 +176,127 @@ func TestOverlaySecretsToEnv_CustomSkillKey(t *testing.T) { t.Errorf("custom skill key %q in OS env = %q, want %q", customKey, got, customVal) } } + +// TestOverlaySecretsToEnv_StaleGlobalFileDoesNotPoisonChain is the regression +// for issue #52: a global ~/.forge/secrets.enc encrypted with a different +// passphrase than the project's local file must not block local keys from +// being overlaid. Before the fix, ChainProvider.List() (and Get for keys not +// in the local file) would propagate the decryption error, hiding every +// non-builtin key the local file declared. +func TestOverlaySecretsToEnv_StaleGlobalFileDoesNotPoisonChain(t *testing.T) { + workDir := t.TempDir() + fakeHome := t.TempDir() + + const localPass = "local-project-passphrase" + const globalPass = "old-unrelated-passphrase" // intentionally different + const customKey = "PROJECT_CUSTOM_KEY" + const customVal = "from-local-encrypted-file" + + t.Setenv("FORGE_PASSPHRASE", localPass) + t.Setenv("HOME", fakeHome) + t.Setenv(customKey, "") + + // Seed the LOCAL encrypted file with the project's passphrase. + localPath := filepath.Join(workDir, ".forge", "secrets.enc") + localProvider := secrets.NewEncryptedFileProvider(localPath, func() (string, error) { + return localPass, nil + }) + if err := localProvider.SetBatch(map[string]string{customKey: customVal}); err != nil { + t.Fatalf("seeding local secrets: %v", err) + } + + // Seed the GLOBAL encrypted file at $HOME/.forge/secrets.enc with a + // different passphrase. The runtime will pick this file up via the + // global fallback path; pre-#52 it would poison the chain. + globalPath := filepath.Join(fakeHome, ".forge", "secrets.enc") + globalProvider := secrets.NewEncryptedFileProvider(globalPath, func() (string, error) { + return globalPass, nil + }) + if err := globalProvider.SetBatch(map[string]string{"UNRELATED": "value"}); err != nil { + t.Fatalf("seeding global secrets: %v", err) + } + + cfg := &types.ForgeConfig{ + AgentID: "stale-global-test", + Secrets: types.SecretsConfig{Providers: []string{"encrypted-file"}}, + } + + OverlaySecretsToEnv(cfg, workDir) + + // The local file's custom key must reach the OS env even though the + // global file failed to decrypt with the project passphrase. + if got := os.Getenv(customKey); got != customVal { + t.Errorf("custom key %q in OS env = %q, want %q (stale global file should be skipped, not poison the chain)", customKey, got, customVal) + } +} + +// TestViableEncryptedFileProviders_Categorization verifies the three outcomes +// for each candidate path: missing file silently skipped, decryptable file +// admitted to the chain, undecryptable file dropped with a warning. +func TestViableEncryptedFileProviders_Categorization(t *testing.T) { + workDir := t.TempDir() + fakeHome := t.TempDir() + t.Setenv("HOME", fakeHome) + + const projectPass = "p1" + const globalPass = "p2" + + // Local: decryptable. + localPath := filepath.Join(workDir, ".forge", "secrets.enc") + if err := secrets.NewEncryptedFileProvider(localPath, func() (string, error) { + return projectPass, nil + }).SetBatch(map[string]string{"LOCAL_KEY": "v"}); err != nil { + t.Fatalf("seeding local: %v", err) + } + // Global: undecryptable with the project passphrase. + globalPath := filepath.Join(fakeHome, ".forge", "secrets.enc") + if err := secrets.NewEncryptedFileProvider(globalPath, func() (string, error) { + return globalPass, nil + }).SetBatch(map[string]string{"OTHER_KEY": "v"}); err != nil { + t.Fatalf("seeding global: %v", err) + } + + var warnings []string + got := viableEncryptedFileProviders(workDir, func() (string, error) { + return projectPass, nil + }, func(msg string, fields map[string]any) { + warnings = append(warnings, fields["label"].(string)) + }) + + if len(got) != 1 { + t.Fatalf("got %d viable providers, want 1 (local only)", len(got)) + } + if len(warnings) != 1 || warnings[0] != "global" { + t.Errorf("warnings = %v, want one warning for label=global", warnings) + } +} + +// TestViableEncryptedFileProviders_MissingGlobalIsSilent verifies the +// common-case path: no global file at all, no warning, just the local. +func TestViableEncryptedFileProviders_MissingGlobalIsSilent(t *testing.T) { + workDir := t.TempDir() + fakeHome := t.TempDir() // no secrets.enc inside + t.Setenv("HOME", fakeHome) + + const projectPass = "p" + localPath := filepath.Join(workDir, ".forge", "secrets.enc") + if err := secrets.NewEncryptedFileProvider(localPath, func() (string, error) { + return projectPass, nil + }).SetBatch(map[string]string{"K": "v"}); err != nil { + t.Fatalf("seeding local: %v", err) + } + + var warnings []string + got := viableEncryptedFileProviders(workDir, func() (string, error) { + return projectPass, nil + }, func(msg string, fields map[string]any) { + warnings = append(warnings, fields["label"].(string)) + }) + + if len(got) != 1 { + t.Errorf("got %d providers, want 1", len(got)) + } + if len(warnings) != 0 { + t.Errorf("warnings = %v, want none (missing global should be silent)", warnings) + } +} From f031a85bdf61d177d1d53ff121aad42d8261bbfc Mon Sep 17 00:00:00 2001 From: MK Date: Mon, 18 May 2026 13:21:06 -0400 Subject: [PATCH 2/2] docs: provider chain validation (#52) Add a Provider Chain Validation subsection to docs/security/secret-management.md documenting the eager-load check introduced by #52: - Missing file: silently skipped (common case) - Decryptable: admitted; cache populated for reuse - Undecryptable: dropped with warning, naming the file path Cross-reference from docs/core-concepts/runtime-engine.md so the runtime engine's Secret Overlay section points at the chain-build validation step. --- docs/core-concepts/runtime-engine.md | 4 +++- docs/security/secret-management.md | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/docs/core-concepts/runtime-engine.md b/docs/core-concepts/runtime-engine.md index 75119ad..63d82f3 100644 --- a/docs/core-concepts/runtime-engine.md +++ b/docs/core-concepts/runtime-engine.md @@ -88,7 +88,9 @@ Skill scripts (e.g., `code-review-diff.sh`) detect `OPENAI_BASE_URL` and automat ### Secret Overlay and Reuse Detection -At startup, the runtime overlays secret values from the configured provider chain into the process environment so downstream skill scripts and `cli_execute` invocations can read them via `os.Getenv`. The overlay set is the union of forge's builtin LLM/search/channel keys and whatever the provider enumerates via `List()` — so a skill declaring a custom env var name (e.g. `ACME_API_TOKEN`) flows through without any code change. See [Secret Management](../security/secret-management.md#skill-declared-secrets). +At startup, the runtime overlays secret values from the configured provider chain into the process environment so downstream skill scripts and `cli_execute` invocations can read them via `os.Getenv`. The overlay set is the union of forge's builtin LLM/search/channel keys and whatever the provider enumerates via `List()` — so a skill declaring a custom env var name (e.g. `ACME_API_TOKEN`) flows through without any code change. See [Secret Management — Skill-Declared Secrets](../security/secret-management.md#skill-declared-secrets). + +Before chain assembly, each encrypted-file candidate is eagerly validated. Files that don't exist are silently skipped; files that fail to decrypt (wrong passphrase, corruption) are dropped from the chain with a warning that names the file path. This prevents a stale global `~/.forge/secrets.enc` from poisoning `ChainProvider.Get` / `List` and hiding the agent-local file's keys. See [Provider Chain Validation](../security/secret-management.md#provider-chain-validation). Once overlaid, the runtime also validates that secret values are not reused across different purpose categories. Sharing the same token between unrelated services (e.g., using an OpenAI API key as a Telegram bot token) is blocked with an error. Categories: `llm` (OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, etc.), `search` (TAVILY_API_KEY, PERPLEXITY_API_KEY), `telegram` (TELEGRAM_BOT_TOKEN), `slack` (SLACK_APP_TOKEN, SLACK_BOT_TOKEN). Cross-category reuse detection is scoped to these builtin categories — custom skill-declared keys have no defined category and are not part of the check. Same-category reuse (e.g., two LLM keys with the same value) is allowed. diff --git a/docs/security/secret-management.md b/docs/security/secret-management.md index b436de7..75c77b6 100644 --- a/docs/security/secret-management.md +++ b/docs/security/secret-management.md @@ -44,6 +44,20 @@ forge secret set OPENAI_API_KEY sk-agent2-key --local At runtime, secrets are resolved in order: **agent-local** -> **global** -> **environment variables**. This lets you override global defaults per agent. +## Provider Chain Validation + +When the chain is built, each candidate encrypted-file provider is eagerly validated before being admitted: + +| Candidate state | Result | Operator-visible signal | +|---|---|---| +| File absent (e.g. you never ran `forge secret set --global`) | Silently skipped | None | +| File present and decrypts with the active passphrase | Admitted; cache populated so subsequent reads reuse the cleartext | Normal operation | +| File present but decryption fails (wrong passphrase, corruption) | **Dropped from the chain** with a warning | `forge: skipping secrets provider that failed to load (path=..., error=...)` | + +The drop-with-warning behavior prevents a stale `~/.forge/secrets.enc` — one encrypted with a passphrase you've since forgotten or from an unrelated project — from poisoning the chain and hiding the keys your agent-local file declares. The local file's keys still flow through to the agent. The warning tells you exactly which file to delete or re-encrypt. + +This validation runs once per `forge run`, in both `OverlaySecretsToEnv` (pre-runner startup) and `Runner.buildSecretProvider` (in-runner). + ## Skill-Declared Secrets Skills declare env var requirements in `SKILL.md` (`metadata.forge.requires.env`). At startup the runtime overlays each declared key from the configured secret provider chain into the process environment, so the skill's script or `cli_execute` invocation finds it via `os.Getenv`.