From 41e17d72265d046e397e0449e3436f3650a68df8 Mon Sep 17 00:00:00 2001 From: Swarit Pandey Date: Wed, 6 May 2026 18:56:27 +0530 Subject: [PATCH 1/6] feat(npmrc): surface inventory of every .npmrc on the host Adds a read-only audit that enumerates each .npmrc across all four npm config scopes (built-in, global, user, project), parses every key/value (redacting auth values to ***last4, preserving ${VAR} env-refs), and captures the merged effective view npm itself would resolve via `npm config ls -l --json` plus source attribution from `npm config ls -l`. Surfaces in: - standard scan: compact summary section in --pretty + JSON via ScanResult.NPMRCAudit / Payload.NPMRCAudit - new --npmrc flag: focused, verbose pretty view (or JSON via --npmrc --json) for deep inspection without running the rest of the scan; ~1s runtime - HTML / enterprise telemetry payloads pick up the audit object automatically through the model wiring Drift detection (snapshot/diff across runs), per-project effective overrides, and severity scoring are intentionally out of scope here. The model carries SHA256 / ValueSHA256 fingerprints today specifically so a future drift layer can land cheaply on top of the surface inventory. See .plans/0005-npmrc-audit.md for extension notes. --- cmd/stepsecurity-dev-machine-guard/main.go | 59 +++ internal/cli/cli.go | 4 + internal/detector/npmrc.go | 449 +++++++++++++++++++++ internal/detector/npmrc_parse.go | 200 +++++++++ internal/detector/npmrc_parse_test.go | 250 ++++++++++++ internal/detector/npmrc_stat_unix.go | 39 ++ internal/detector/npmrc_stat_windows.go | 10 + internal/detector/npmrc_test.go | 356 ++++++++++++++++ internal/model/model.go | 77 ++++ internal/output/npmrc_verbose.go | 306 ++++++++++++++ internal/output/npmrc_verbose_test.go | 169 ++++++++ internal/output/pretty.go | 25 ++ internal/scan/scanner.go | 10 + internal/telemetry/telemetry.go | 13 + 14 files changed, 1967 insertions(+) create mode 100644 internal/detector/npmrc.go create mode 100644 internal/detector/npmrc_parse.go create mode 100644 internal/detector/npmrc_parse_test.go create mode 100644 internal/detector/npmrc_stat_unix.go create mode 100644 internal/detector/npmrc_stat_windows.go create mode 100644 internal/detector/npmrc_test.go create mode 100644 internal/output/npmrc_verbose.go create mode 100644 internal/output/npmrc_verbose_test.go diff --git a/cmd/stepsecurity-dev-machine-guard/main.go b/cmd/stepsecurity-dev-machine-guard/main.go index 3a6742f..43d7b6d 100644 --- a/cmd/stepsecurity-dev-machine-guard/main.go +++ b/cmd/stepsecurity-dev-machine-guard/main.go @@ -1,15 +1,21 @@ package main import ( + "context" + "encoding/json" "fmt" + "io" "os" "runtime" "github.com/step-security/dev-machine-guard/internal/buildinfo" "github.com/step-security/dev-machine-guard/internal/cli" "github.com/step-security/dev-machine-guard/internal/config" + "github.com/step-security/dev-machine-guard/internal/detector" + "github.com/step-security/dev-machine-guard/internal/device" "github.com/step-security/dev-machine-guard/internal/executor" "github.com/step-security/dev-machine-guard/internal/launchd" + "github.com/step-security/dev-machine-guard/internal/output" "github.com/step-security/dev-machine-guard/internal/progress" "github.com/step-security/dev-machine-guard/internal/scan" "github.com/step-security/dev-machine-guard/internal/schtasks" @@ -160,6 +166,15 @@ func main() { } default: + // --npmrc: focused, verbose pretty audit of npm config only. + // Bypasses all other detectors for a fast (~1s) deep dive. + if cfg.NPMRCOnly { + if err := runNPMRCOnly(exec, cfg); err != nil { + log.Error("%v", err) + os.Exit(1) + } + return + } // Community mode or auto-detect enterprise switch { case cfg.OutputFormatSet || cfg.HTMLOutputFile != "": @@ -184,3 +199,47 @@ func main() { } } } + +// runNPMRCOnly executes only the npmrc detector and renders the verbose +// pretty view (or JSON when --json is also passed). Skips IDE / AI / Brew / +// Python / Node / pip detection so the run is fast and the output is +// exclusively about npm configuration. +func runNPMRCOnly(exec executor.Executor, cfg *cli.Config) error { + ctx := context.Background() + dev := device.Gather(ctx, exec) + loggedInUser, _ := exec.LoggedInUser() + + searchDirs := resolveScanSearchDirs(exec, cfg.SearchDirs) + audit := detector.NewNPMRCDetector(exec).Detect(ctx, searchDirs, loggedInUser) + + if cfg.OutputFormat == "json" { + return scanJSONEncoder(os.Stdout).Encode(audit) + } + output.PrettyNPMRC(os.Stdout, &audit, dev, cfg.ColorMode) + return nil +} + +// resolveScanSearchDirs expands `$HOME` to the logged-in user's home dir +// and leaves other entries unchanged. Mirrors the helper inside scan.Run +// so --npmrc walks the same project tree the full scan would. +func resolveScanSearchDirs(exec executor.Executor, dirs []string) []string { + resolved := make([]string, 0, len(dirs)) + for _, d := range dirs { + if d == "$HOME" { + if u, err := exec.LoggedInUser(); err == nil { + d = u.HomeDir + } + } + resolved = append(resolved, d) + } + return resolved +} + +// scanJSONEncoder returns a 2-space-indented JSON encoder that doesn't +// HTML-escape — same conventions as the standard scan output. +func scanJSONEncoder(w io.Writer) *json.Encoder { + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + enc.SetEscapeHTML(false) + return enc +} diff --git a/internal/cli/cli.go b/internal/cli/cli.go index 04e1515..be9e904 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -22,6 +22,7 @@ type Config struct { EnableBrewScan *bool // nil=auto, true/false=explicit EnablePythonScan *bool // nil=auto, true/false=explicit IncludeBundledPlugins bool // --include-bundled-plugins: include bundled/platform plugins in output + NPMRCOnly bool // --npmrc: run only the npmrc audit and render verbose pretty output SearchDirs []string // defaults to ["$HOME"] } @@ -87,6 +88,8 @@ func Parse(args []string) (*Config, error) { cfg.EnablePythonScan = &v case arg == "--include-bundled-plugins": cfg.IncludeBundledPlugins = true + case arg == "--npmrc": + cfg.NPMRCOnly = true case strings.HasPrefix(arg, "--color="): mode := strings.TrimPrefix(arg, "--color=") if mode != "auto" && mode != "always" && mode != "never" { @@ -163,6 +166,7 @@ Options: --enable-python-scan Enable Python package scanning --disable-python-scan Disable Python package scanning --include-bundled-plugins Include bundled/platform plugins in output (Windows) + --npmrc Run ONLY the npm config audit (verbose pretty view; --json supported) --log-level=LEVEL Log level: error | warn | info | debug (default: info) --verbose Shortcut for --log-level=debug --color=WHEN Color mode: auto | always | never (default: auto) diff --git a/internal/detector/npmrc.go b/internal/detector/npmrc.go new file mode 100644 index 0000000..599dbe4 --- /dev/null +++ b/internal/detector/npmrc.go @@ -0,0 +1,449 @@ +package detector + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io/fs" + "os" + "os/user" + "path/filepath" + "regexp" + "strings" + "time" + + "github.com/step-security/dev-machine-guard/internal/executor" + "github.com/step-security/dev-machine-guard/internal/model" +) + +// maxNPMRCFiles caps the number of .npmrc files we report. Even on big +// monorepos this should be ample; the cap exists only to prevent a +// pathological case (someone committed `.npmrc` into thousands of subdirs) +// from blowing up the JSON payload. +const maxNPMRCFiles = 1000 + +// npmEnvVars is the set of environment variables we always record on the +// audit, regardless of whether they are set. Recording an unset var keeps +// the audit shape stable across hosts and is the natural extension point +// when a future change-tracking layer wants to detect transitions. +var npmEnvVars = []string{ + "NPM_TOKEN", + "NPM_CONFIG_USERCONFIG", + "NPM_CONFIG_GLOBALCONFIG", + "NPM_CONFIG_REGISTRY", + "npm_config_registry", + "npm_config__authToken", + "npm_config__auth", + "NODE_OPTIONS", + "NODE_TLS_REJECT_UNAUTHORIZED", +} + +// secretEnvNamePattern matches env var names that should be redacted on output. +// The npm config layer accepts both `npm_config_*` (lowercase) and +// `NPM_CONFIG_*` (uppercase) — and any *_TOKEN / *_PASSWORD / *_KEY value +// is treated as a secret regardless of source. +var secretEnvNamePattern = regexp.MustCompile(`(?i)(token|password|secret|_auth|key)`) + +// NPMRCDetector audits npm configuration: discovers all .npmrc files, parses +// them, captures the merged effective view, and surfaces relevant env vars. +// +// The detector intentionally keeps file metadata collection (owner, mode, +// hashes) and git-tracking checks pluggable so unit tests don't need real +// syscalls or a git binary. +type NPMRCDetector struct { + exec executor.Executor + + // ownerLookup returns owner info for a path. Defaults to the real + // platform-specific impl in npmrc_stat_*.go; tests can override. + ownerLookup func(path string) ownerInfo + // gitTracked returns whether the file is tracked by git. Defaults to + // shelling out via the executor; tests can override to a stub. + gitTracked func(ctx context.Context, path string) bool + // inGitRepo walks parent dirs looking for .git. Defaults to a + // filesystem walk; tests can override. + inGitRepo func(path string) bool +} + +type ownerInfo struct { + UID int + GID int + OwnerName string + GroupName string + OK bool +} + +// NewNPMRCDetector returns a detector with default platform-specific +// metadata helpers wired in. +func NewNPMRCDetector(exec executor.Executor) *NPMRCDetector { + d := &NPMRCDetector{exec: exec} + d.ownerLookup = func(p string) ownerInfo { return statOwner(p) } + d.gitTracked = d.defaultGitTracked + d.inGitRepo = defaultInGitRepo + return d +} + +// Detect runs the full audit. searchDirs are the dirs to walk for project- +// level .npmrc files (typically the user's $HOME plus any extra dirs +// configured by the operator). loggedInUser is the username whose ~/.npmrc +// we resolve for the user-scope file. +func (d *NPMRCDetector) Detect(ctx context.Context, searchDirs []string, loggedInUser *user.User) model.NPMRCAudit { + audit := model.NPMRCAudit{ + Files: []model.NPMRCFile{}, + Env: d.collectEnv(), + } + + npmPath, npmErr := d.exec.LookPath("npm") + if npmErr == nil { + audit.Available = true + audit.NPMPath = npmPath + audit.NPMVersion = d.npmVersion(ctx) + } + + // Resolve the four scopes. Each step is independent: if one fails (e.g. + // `npm config get globalconfig` returns nothing), the rest still run. + files := make([]model.NPMRCFile, 0, 8) + seen := make(map[string]bool) + add := func(scope, path string) { + if path == "" { + return + } + abs, err := filepath.Abs(path) + if err == nil { + path = abs + } + if seen[path] { + return + } + seen[path] = true + files = append(files, d.collectFile(ctx, path, scope)) + } + + add("builtin", d.npmConfigGet(ctx, "builtinconfig")) + + if v := d.exec.Getenv("NPM_CONFIG_GLOBALCONFIG"); v != "" { + add("global", v) + } else { + add("global", d.npmConfigGet(ctx, "globalconfig")) + } + + if v := d.exec.Getenv("NPM_CONFIG_USERCONFIG"); v != "" { + add("user", v) + } else if loggedInUser != nil && loggedInUser.HomeDir != "" { + add("user", filepath.Join(loggedInUser.HomeDir, ".npmrc")) + } + + for _, dir := range searchDirs { + for _, p := range d.findProjectNPMRCs(dir) { + if len(files) >= maxNPMRCFiles { + break + } + add("project", p) + } + } + + audit.Files = files + if eff := d.captureEffective(ctx); eff != nil { + audit.Effective = eff + } + + // Drift / change-tracking detection is intentionally out of scope here. + // The npmrc audit currently surfaces inventory + parsed contents + + // merged-effective view + relevant env vars. Diffing the audit against + // a previous snapshot, attributing writers, and surfacing per-project + // effective overrides ("if a developer cd's into this cloned repo and + // runs npm install, what flips?") are documented as future extensions + // in .plans/0005-npmrc-audit.md and were deliberately removed when + // the requirements narrowed to surfacing only. + return audit +} + +// findProjectNPMRCs walks dir looking for .npmrc files, applying the same +// directory-skip rules as the node project scanner plus a small set of +// well-known cache locations (Go module cache, vendor dirs) — random .npmrc +// files inside cached/vendored dependencies aren't config the user authored +// and would only add noise to the audit. Returns absolute paths. +func (d *NPMRCDetector) findProjectNPMRCs(dir string) []string { + if dir == "" { + return nil + } + var results []string + _ = filepath.WalkDir(dir, func(path string, entry fs.DirEntry, err error) error { + if err != nil { + return nil + } + if entry.IsDir() { + if shouldSkipNPMRCDir(path, entry.Name(), dir) { + return filepath.SkipDir + } + return nil + } + if entry.Name() == ".npmrc" { + if abs, err := filepath.Abs(path); err == nil { + results = append(results, abs) + } else { + results = append(results, path) + } + } + return nil + }) + return results +} + +// shouldSkipNPMRCDir returns true when the directory should be skipped during +// project-level .npmrc discovery. Mirrors nodescan.go's exclusions and adds +// well-known dependency-cache locations (Go module cache, vendor dirs, +// language-specific caches under $HOME). +func shouldSkipNPMRCDir(path, name, root string) bool { + switch name { + case "node_modules", ".git", ".cache", "vendor": + return true + } + if strings.HasPrefix(name, ".") && path != root { + return true + } + // Path-based skips for caches whose dir names alone aren't distinctive. + slashed := filepath.ToSlash(path) + if strings.HasSuffix(slashed, "/pkg/mod") || strings.Contains(slashed, "/pkg/mod/") { + return true + } + if strings.Contains(slashed, "/Library/Caches/") { + return true + } + return false +} + +// collectFile gathers everything we know about one .npmrc path. Always +// returns a record — non-existent files are surfaced with Exists=false so +// the caller can see "we looked here, nothing was there." +func (d *NPMRCDetector) collectFile(ctx context.Context, path, scope string) model.NPMRCFile { + f := model.NPMRCFile{ + Path: path, + Scope: scope, + } + + // Lstat first so a symlink doesn't get followed silently. + linfo, err := os.Lstat(path) + if err != nil { + // Distinguish "not found" from "not readable" so the user can act. + if os.IsNotExist(err) { + f.Exists = false + return f + } + f.Exists = true + f.ParseError = "lstat: " + err.Error() + return f + } + f.Exists = true + + if linfo.Mode()&os.ModeSymlink != 0 { + if target, err := os.Readlink(path); err == nil { + f.SymlinkTo = target + } + } + + // Stat (follows symlinks) for size/mtime/mode. + info, err := os.Stat(path) + if err != nil { + f.Readable = false + f.ParseError = "stat: " + err.Error() + return f + } + f.SizeBytes = info.Size() + f.ModTimeUnix = info.ModTime().Unix() + f.Mode = fmt.Sprintf("%#o", info.Mode().Perm()) + + if info.IsDir() { + f.ParseError = "path is a directory" + return f + } + + if d.ownerLookup != nil { + if oi := d.ownerLookup(path); oi.OK { + f.OwnerUID = oi.UID + f.GroupGID = oi.GID + f.OwnerName = oi.OwnerName + f.GroupName = oi.GroupName + } + } + + data, err := os.ReadFile(path) + if err != nil { + f.Readable = false + f.ParseError = "read: " + err.Error() + return f + } + f.Readable = true + + sum := sha256.Sum256(data) + f.SHA256 = hex.EncodeToString(sum[:]) + + f.Entries = parseNPMRC(data) + + if d.inGitRepo != nil && d.inGitRepo(path) { + f.InGitRepo = true + if d.gitTracked != nil && d.gitTracked(ctx, path) { + f.GitTracked = true + } + } + + return f +} + +// captureEffective runs `npm config ls -l --json` and `npm config ls -l` for +// source attribution. Returns nil when npm is unavailable. +func (d *NPMRCDetector) captureEffective(ctx context.Context) *model.NPMRCEffective { + if _, err := d.exec.LookPath("npm"); err != nil { + return nil + } + eff := &model.NPMRCEffective{ + SourceByKey: map[string]string{}, + Config: map[string]any{}, + } + + stdoutJSON, _, exit, _ := d.exec.RunWithTimeout(ctx, 15*time.Second, "npm", "config", "ls", "-l", "--json") + if exit == 0 && strings.TrimSpace(stdoutJSON) != "" { + var parsed map[string]any + if err := json.Unmarshal([]byte(stdoutJSON), &parsed); err != nil { + eff.Error = "json decode: " + err.Error() + } else { + eff.Config = parsed + } + } else if eff.Error == "" && exit != 0 { + eff.Error = fmt.Sprintf("npm config ls -l --json exited with %d", exit) + } + + stdoutText, _, exitText, _ := d.exec.RunWithTimeout(ctx, 15*time.Second, "npm", "config", "ls", "-l") + if exitText == 0 && stdoutText != "" { + eff.SourceByKey = parseSourceAttribution(stdoutText) + } + + return eff +} + +// parseSourceAttribution scans the textual output of `npm config ls -l`, +// which groups keys under `; "" config from ""` headers. +// +// ; "user" config from "/Users/me/.npmrc" +// registry = "https://registry.npmjs.org/" +// ; "default" values +// access = null +// +// We map each non-comment, non-section key to the most recent header seen. +func parseSourceAttribution(text string) map[string]string { + out := map[string]string{} + headerRE := regexp.MustCompile(`^;\s*"([^"]+)"\s*(?:config from\s*"([^"]+)")?`) + currentSource := "default" + for _, line := range strings.Split(text, "\n") { + raw := strings.TrimRight(line, "\r") + trimmed := strings.TrimSpace(raw) + if trimmed == "" { + continue + } + if strings.HasPrefix(trimmed, ";") { + if m := headerRE.FindStringSubmatch(trimmed); m != nil { + if m[2] != "" { + currentSource = m[2] // path is more specific than label + } else { + currentSource = m[1] + } + } + continue + } + // `key = value` or `@scope:registry = value`. + if eq := strings.IndexByte(trimmed, '='); eq > 0 { + key := strings.TrimSpace(trimmed[:eq]) + if key != "" { + out[key] = currentSource + } + } + } + return out +} + +// npmVersion returns the npm CLI's version string, "unknown" on failure. +func (d *NPMRCDetector) npmVersion(ctx context.Context) string { + stdout, _, exit, _ := d.exec.RunWithTimeout(ctx, 5*time.Second, "npm", "--version") + if exit != 0 { + return "unknown" + } + v := strings.TrimSpace(stdout) + if v == "" { + return "unknown" + } + return v +} + +// npmConfigGet runs `npm config get ` and returns the trimmed value, or +// empty if the call failed or the value is "undefined" (npm's literal output +// for an unset key). +func (d *NPMRCDetector) npmConfigGet(ctx context.Context, key string) string { + stdout, _, exit, _ := d.exec.RunWithTimeout(ctx, 5*time.Second, "npm", "config", "get", key) + if exit != 0 { + return "" + } + v := strings.TrimSpace(stdout) + if v == "undefined" || v == "null" { + return "" + } + return v +} + +// collectEnv builds a snapshot of the npm-relevant environment. Sensitive +// values are redacted; the SHA-256 lets the change-tracking layer notice +// rotation without ever surfacing the secret. +func (d *NPMRCDetector) collectEnv() []model.NPMRCEnvVar { + out := make([]model.NPMRCEnvVar, 0, len(npmEnvVars)) + for _, name := range npmEnvVars { + v := d.exec.Getenv(name) + ev := model.NPMRCEnvVar{Name: name, Set: v != ""} + if v != "" { + ev.ValueSHA256 = sha256Hex(v) + if secretEnvNamePattern.MatchString(name) { + ev.DisplayValue = redactSecret(v) + } else { + ev.DisplayValue = v + } + } + out = append(out, ev) + } + return out +} + +// defaultGitTracked shells out to git to check if a file is tracked. +// Returns false on any error (git not installed, not in a repo, untracked). +func (d *NPMRCDetector) defaultGitTracked(ctx context.Context, path string) bool { + dir := filepath.Dir(path) + base := filepath.Base(path) + _, _, exit, err := d.exec.RunWithTimeout(ctx, 5*time.Second, "git", "-C", dir, "ls-files", "--error-unmatch", base) + return err == nil && exit == 0 +} + +// defaultInGitRepo walks parent directories looking for a .git entry. +// Stops at the filesystem root. +func defaultInGitRepo(path string) bool { + dir := filepath.Dir(path) + for { + gitPath := filepath.Join(dir, ".git") + if info, err := os.Stat(gitPath); err == nil { + // .git can be a directory (regular repo) or a file (worktree). + _ = info + return true + } + parent := filepath.Dir(dir) + if parent == dir { + return false + } + dir = parent + } +} + +// sha256Hex returns the hex SHA-256 of a string. +func sha256Hex(s string) string { + if s == "" { + return "" + } + sum := sha256.Sum256([]byte(s)) + return hex.EncodeToString(sum[:]) +} diff --git a/internal/detector/npmrc_parse.go b/internal/detector/npmrc_parse.go new file mode 100644 index 0000000..eab4077 --- /dev/null +++ b/internal/detector/npmrc_parse.go @@ -0,0 +1,200 @@ +package detector + +import ( + "bufio" + "bytes" + "crypto/sha256" + "encoding/hex" + "regexp" + "strings" + + "github.com/step-security/dev-machine-guard/internal/model" +) + +// parseNPMRC parses the contents of a .npmrc file into a slice of NPMRCEntry. +// It is intentionally tolerant: malformed lines are skipped without aborting +// the whole parse, so a single garbage line doesn't hide useful entries. +// +// Behavior matches the npm/ini parser's surface in the ways that matter for +// audit: +// - `;` and `#` start a comment when they are the first non-whitespace char +// on a line (inline `key=value ; comment` is NOT treated as a comment; +// npm/ini retains it as part of the value). +// - `key[]=value` denotes an array entry. +// - URI-prefixed keys (`//host/path/:_authToken`) are valid keys. +// - Surrounding double quotes on a value are unwrapped, but the fact that +// the value was quoted is preserved on the entry. +// - `${VAR}` references are NEVER expanded — preserving the literal form is +// load-bearing for the audit (it's how we tell hardcoded secrets apart +// from env-referenced ones). +// - Section headers `[section]` are accepted (rare in npmrc) but ignored; +// keys are still emitted at the top level. +// +// Returns the parsed entries. Caller decides how to attach them to a file. +func parseNPMRC(data []byte) []model.NPMRCEntry { + var entries []model.NPMRCEntry + + // Strip UTF-8 BOM if present so the first line parses correctly. + data = bytes.TrimPrefix(data, []byte{0xEF, 0xBB, 0xBF}) + + scanner := bufio.NewScanner(bytes.NewReader(data)) + // Allow large lines (some users base64-pin a CA into `cafile`). + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + + lineNum := 0 + for scanner.Scan() { + lineNum++ + raw := scanner.Text() + trimmed := strings.TrimSpace(raw) + if trimmed == "" { + continue + } + // Comment lines. + if trimmed[0] == ';' || trimmed[0] == '#' { + continue + } + // Section header — accept and ignore. + if strings.HasPrefix(trimmed, "[") && strings.HasSuffix(trimmed, "]") { + continue + } + // Find the first '=' — npm/ini uses the first one as the separator. + eq := strings.IndexByte(trimmed, '=') + if eq < 0 { + // No '=' — treat as a key with empty value (matches npm/ini). + key := strings.TrimSpace(trimmed) + if key == "" { + continue + } + entries = append(entries, buildEntry(key, "", false, lineNum)) + continue + } + key := strings.TrimSpace(trimmed[:eq]) + value := strings.TrimSpace(trimmed[eq+1:]) + + if key == "" { + continue + } + + quoted := false + if len(value) >= 2 && value[0] == '"' && value[len(value)-1] == '"' { + value = value[1 : len(value)-1] + quoted = true + } + + entries = append(entries, buildEntry(key, value, quoted, lineNum)) + } + + return entries +} + +// buildEntry classifies a key/value into an NPMRCEntry, populating the +// security-relevant flags (auth, env-ref) and a safe-to-display value. +func buildEntry(key, value string, quoted bool, lineNum int) model.NPMRCEntry { + isArray := false + if strings.HasSuffix(key, "[]") { + isArray = true + key = key[:len(key)-2] + } + + isAuth := isAuthKey(key) + envRefVars, isEnvRef := extractEnvRefs(value) + + display := value + if isAuth && !isEnvRef && value != "" { + display = redactSecret(value) + } + + return model.NPMRCEntry{ + Key: key, + DisplayValue: display, + LineNum: lineNum, + IsArray: isArray, + IsAuth: isAuth, + IsEnvRef: isEnvRef, + EnvRefVars: envRefVars, + ValueSHA256: hashValue(value), + Quoted: quoted, + } +} + +// authKeySuffixes are the trailing key segments that mean "this is a credential." +// We compare against the suffix because npm scopes auth keys with a URI prefix: +// +// //registry.npmjs.org/:_authToken=... +// +// so we have to look at the part after the final `:`. +var authKeySuffixes = []string{ + "_auth", + "_authtoken", + "_password", + "username", + "email", + "cafile", + "certfile", + "keyfile", + // Deprecated but still seen in the wild: + "cert", + "key", +} + +func isAuthKey(key string) bool { + // Compare against the segment after the final `:` (URI-scoped keys) or + // against the full key (non-scoped legacy form). + suffix := key + if idx := strings.LastIndex(key, ":"); idx >= 0 { + suffix = key[idx+1:] + } + suffix = strings.ToLower(suffix) + for _, s := range authKeySuffixes { + if suffix == s { + return true + } + } + return false +} + +// envRefPattern matches ${VAR}, ${VAR:-default}, and ${VAR?error} forms. +// We only care about the VAR name; default/error sub-syntax is captured but +// not used. +var envRefPattern = regexp.MustCompile(`\$\{([A-Za-z_][A-Za-z0-9_]*)(?:[?:][^}]*)?\}`) + +func extractEnvRefs(value string) ([]string, bool) { + matches := envRefPattern.FindAllStringSubmatch(value, -1) + if len(matches) == 0 { + return nil, false + } + seen := make(map[string]struct{}, len(matches)) + out := make([]string, 0, len(matches)) + for _, m := range matches { + name := m[1] + if _, ok := seen[name]; ok { + continue + } + seen[name] = struct{}{} + out = append(out, name) + } + return out, true +} + +// redactSecret returns a safe-to-display form of an auth value. We keep the +// last 4 characters when the secret is long enough to make rotation tracking +// useful; for short secrets we collapse to `***` so we never leak meaningful +// material. The full value is never returned. +func redactSecret(v string) string { + if len(v) <= 8 { + return "***" + } + return "***" + v[len(v)-4:] +} + +// hashValue returns the hex SHA-256 of a value. We hash the raw value (before +// redaction) so two different secrets produce different hashes — that's what +// lets the change-tracking phase notice rotation without ever storing the +// plaintext. +func hashValue(v string) string { + if v == "" { + return "" + } + sum := sha256.Sum256([]byte(v)) + return hex.EncodeToString(sum[:]) +} diff --git a/internal/detector/npmrc_parse_test.go b/internal/detector/npmrc_parse_test.go new file mode 100644 index 0000000..ae3b16a --- /dev/null +++ b/internal/detector/npmrc_parse_test.go @@ -0,0 +1,250 @@ +package detector + +import ( + "strings" + "testing" +) + +func TestParseNPMRC_Basic(t *testing.T) { + input := ` +; this is a comment +# also a comment +registry = https://registry.npmjs.org/ +@mycompany:registry=https://npm.mycompany.com/ +strict-ssl=false +` + entries := parseNPMRC([]byte(input)) + if len(entries) != 3 { + t.Fatalf("expected 3 entries, got %d", len(entries)) + } + + want := map[string]string{ + "registry": "https://registry.npmjs.org/", + "@mycompany:registry": "https://npm.mycompany.com/", + "strict-ssl": "false", + } + for _, e := range entries { + if got := want[e.Key]; got != e.DisplayValue { + t.Errorf("key %q: want %q, got %q", e.Key, got, e.DisplayValue) + } + if e.IsAuth { + t.Errorf("key %q should not be auth", e.Key) + } + } +} + +func TestParseNPMRC_AuthRedaction(t *testing.T) { + input := `//registry.npmjs.org/:_authToken=npm_AbCdEfGhIjKlMnOpQrStUv1234WXYZ +//npm.mycompany.com/:_authToken=short +//registry.yarnpkg.com/:_password=plainpassword123 +` + entries := parseNPMRC([]byte(input)) + if len(entries) != 3 { + t.Fatalf("expected 3 entries, got %d", len(entries)) + } + + for _, e := range entries { + if !e.IsAuth { + t.Errorf("key %q should be auth", e.Key) + } + if strings.Contains(e.DisplayValue, "AbCdEf") || strings.Contains(e.DisplayValue, "plainpassword") { + t.Errorf("key %q: raw secret leaked through DisplayValue=%q", e.Key, e.DisplayValue) + } + if !strings.HasPrefix(e.DisplayValue, "***") { + t.Errorf("key %q: expected redacted prefix, got %q", e.Key, e.DisplayValue) + } + if e.ValueSHA256 == "" { + t.Errorf("key %q: expected ValueSHA256 to be populated", e.Key) + } + } + + // The "short" token should collapse to plain "***" with no last-4 leak. + for _, e := range entries { + if e.Key == "//npm.mycompany.com/:_authToken" && e.DisplayValue != "***" { + t.Errorf("short secret should redact to ***, got %q", e.DisplayValue) + } + } +} + +func TestParseNPMRC_EnvRefPreserved(t *testing.T) { + input := `//registry.npmjs.org/:_authToken=${NPM_TOKEN} +//npm.mycompany.com/:_authToken=${COMPANY_TOKEN:-fallback} +cache = ${HOME}/.npm-packages +` + entries := parseNPMRC([]byte(input)) + if len(entries) != 3 { + t.Fatalf("expected 3 entries, got %d", len(entries)) + } + + for _, e := range entries { + if !e.IsEnvRef { + t.Errorf("key %q: IsEnvRef should be true (value=%q)", e.Key, e.DisplayValue) + } + // For env-ref auth values, we KEEP the literal — that's the whole + // point. Hardcoded vs ${VAR} is the most important distinction in + // the audit. + if !strings.Contains(e.DisplayValue, "${") { + t.Errorf("key %q: env-ref form should be preserved, got %q", e.Key, e.DisplayValue) + } + } + + // Auth + env-ref should still record the var name. + for _, e := range entries { + if e.Key == "//registry.npmjs.org/:_authToken" { + if len(e.EnvRefVars) != 1 || e.EnvRefVars[0] != "NPM_TOKEN" { + t.Errorf("EnvRefVars: want [NPM_TOKEN], got %v", e.EnvRefVars) + } + } + } +} + +func TestParseNPMRC_ArraySyntax(t *testing.T) { + input := `ca[]=cert1 +ca[]=cert2 +` + entries := parseNPMRC([]byte(input)) + if len(entries) != 2 { + t.Fatalf("expected 2 entries, got %d", len(entries)) + } + for _, e := range entries { + if e.Key != "ca" { + t.Errorf("expected key=ca, got %q", e.Key) + } + if !e.IsArray { + t.Errorf("expected IsArray=true") + } + } +} + +func TestParseNPMRC_QuotedValue(t *testing.T) { + input := `node-options = "--max-old-space-size=4096 --require=/tmp/x.js"` + entries := parseNPMRC([]byte(input)) + if len(entries) != 1 { + t.Fatalf("expected 1 entry, got %d", len(entries)) + } + if !entries[0].Quoted { + t.Errorf("expected Quoted=true") + } + if strings.HasPrefix(entries[0].DisplayValue, `"`) || strings.HasSuffix(entries[0].DisplayValue, `"`) { + t.Errorf("quotes should be stripped from DisplayValue, got %q", entries[0].DisplayValue) + } +} + +func TestParseNPMRC_Comments(t *testing.T) { + // Both `;` and `#` at start of line are comments; inline `;` is NOT. + input := `; pure comment +# pure comment +key1 = value1 ; this stays in the value (npm/ini behavior) +# trailing comment line +` + entries := parseNPMRC([]byte(input)) + if len(entries) != 1 { + t.Fatalf("expected 1 entry (only key1), got %d: %+v", len(entries), entries) + } + if !strings.Contains(entries[0].DisplayValue, ";") { + t.Errorf("inline ; should remain in value, got %q", entries[0].DisplayValue) + } +} + +func TestParseNPMRC_BOM(t *testing.T) { + input := "\xEF\xBB\xBFregistry=https://registry.npmjs.org/" + entries := parseNPMRC([]byte(input)) + if len(entries) != 1 || entries[0].Key != "registry" { + t.Fatalf("BOM not stripped; got entries=%+v", entries) + } +} + +func TestParseNPMRC_EmptyAndMalformed(t *testing.T) { + input := ` +=novalue +keyonly +key= + +[section] +key2=value2 +` + entries := parseNPMRC([]byte(input)) + // Expect: keyonly (empty value), key (empty value), key2=value2. + // `=novalue` has empty key so it's skipped. `[section]` is skipped. + keys := make([]string, 0, len(entries)) + for _, e := range entries { + keys = append(keys, e.Key) + } + wantKeys := []string{"keyonly", "key", "key2"} + if len(keys) != len(wantKeys) { + t.Fatalf("want keys %v, got %v", wantKeys, keys) + } + for i, k := range wantKeys { + if keys[i] != k { + t.Errorf("position %d: want %q, got %q", i, k, keys[i]) + } + } +} + +func TestIsAuthKey(t *testing.T) { + cases := map[string]bool{ + "//registry.npmjs.org/:_authToken": true, + "//npm.com/path/:_password": true, + "//registry.npmjs.org/:_AUTHTOKEN": true, // case-insensitive + "_auth": true, // legacy unscoped + "username": true, + "email": true, + "cafile": true, + "cert": true, // deprecated but still flagged + "registry": false, + "@scope:registry": false, + "strict-ssl": false, + "ignore-scripts": false, + } + for k, want := range cases { + if got := isAuthKey(k); got != want { + t.Errorf("isAuthKey(%q) = %v, want %v", k, got, want) + } + } +} + +func TestExtractEnvRefs(t *testing.T) { + cases := []struct { + in string + wantEnv bool + wantVars []string + }{ + {"plain", false, nil}, + {"${VAR}", true, []string{"VAR"}}, + {"${A}/${B}", true, []string{"A", "B"}}, + {"${VAR:-default}", true, []string{"VAR"}}, + {"${VAR?missing}", true, []string{"VAR"}}, + {"${SAME}/${SAME}", true, []string{"SAME"}}, // dedup + {"$VAR", false, nil}, // we only match ${...} + } + for _, c := range cases { + gotVars, gotIs := extractEnvRefs(c.in) + if gotIs != c.wantEnv { + t.Errorf("extractEnvRefs(%q) is=%v want=%v", c.in, gotIs, c.wantEnv) + } + if len(gotVars) != len(c.wantVars) { + t.Errorf("extractEnvRefs(%q) vars=%v want=%v", c.in, gotVars, c.wantVars) + continue + } + for i, v := range c.wantVars { + if gotVars[i] != v { + t.Errorf("extractEnvRefs(%q) var[%d]=%q want %q", c.in, i, gotVars[i], v) + } + } + } +} + +func TestRedactSecret(t *testing.T) { + cases := map[string]string{ + "": "***", // empty stays *** (defensive; redactSecret isn't called for empty in practice) + "abc": "***", + "abcdefgh": "***", // exactly 8 chars: still *** + "abcdefghi": "***fghi", // 9+ chars: ***last4 + "npm_xxxxxxXYZ1234": "***1234", + } + for in, want := range cases { + if got := redactSecret(in); got != want { + t.Errorf("redactSecret(%q) = %q, want %q", in, got, want) + } + } +} diff --git a/internal/detector/npmrc_stat_unix.go b/internal/detector/npmrc_stat_unix.go new file mode 100644 index 0000000..58af9c1 --- /dev/null +++ b/internal/detector/npmrc_stat_unix.go @@ -0,0 +1,39 @@ +//go:build !windows + +package detector + +import ( + "os" + "os/user" + "strconv" + "syscall" +) + +// statOwner returns the owning uid/gid of a path. We deliberately bypass the +// Executor interface here because: +// 1. uid/gid is exposed only via syscall.Stat_t on the Sys() of an os.FileInfo +// and the mock executor's mockFileInfo can't represent that. +// 2. The detector exposes ownerLookup as a hook so tests substitute a stub +// and never reach this function. +func statOwner(path string) ownerInfo { + info, err := os.Stat(path) + if err != nil { + return ownerInfo{} + } + st, ok := info.Sys().(*syscall.Stat_t) + if !ok { + return ownerInfo{} + } + oi := ownerInfo{ + UID: int(st.Uid), + GID: int(st.Gid), + OK: true, + } + if u, err := user.LookupId(strconv.Itoa(oi.UID)); err == nil { + oi.OwnerName = u.Username + } + if g, err := user.LookupGroupId(strconv.Itoa(oi.GID)); err == nil { + oi.GroupName = g.Name + } + return oi +} diff --git a/internal/detector/npmrc_stat_windows.go b/internal/detector/npmrc_stat_windows.go new file mode 100644 index 0000000..b8a08b9 --- /dev/null +++ b/internal/detector/npmrc_stat_windows.go @@ -0,0 +1,10 @@ +//go:build windows + +package detector + +// statOwner is a no-op on Windows: getting a meaningful owner string from a +// SID is non-trivial and not actionable for the audit's first cut. The +// detector handles ownerInfo.OK == false by leaving owner fields empty. +func statOwner(_ string) ownerInfo { + return ownerInfo{} +} diff --git a/internal/detector/npmrc_test.go b/internal/detector/npmrc_test.go new file mode 100644 index 0000000..4b52d6d --- /dev/null +++ b/internal/detector/npmrc_test.go @@ -0,0 +1,356 @@ +package detector + +import ( + "context" + "os" + "os/user" + "path/filepath" + "strings" + "testing" + + "github.com/step-security/dev-machine-guard/internal/executor" +) + +// fixedOwner returns an ownerLookup hook with fixed values, used to keep +// tests deterministic across platforms (no real syscall.Stat involved). +func fixedOwner() func(string) ownerInfo { + return func(_ string) ownerInfo { + return ownerInfo{UID: 1000, GID: 1000, OwnerName: "tester", GroupName: "staff", OK: true} + } +} + +func TestNPMRCDetector_Discovery_AllScopes(t *testing.T) { + tmp := t.TempDir() + + // User config + userPath := filepath.Join(tmp, "home", ".npmrc") + mustWriteFile(t, userPath, "registry=https://registry.npmjs.org/\n//registry.npmjs.org/:_authToken=npm_AbCdEfGhIjKlMnOpQrStUv\n") + + // Global config (e.g. /etc/npmrc) + globalPath := filepath.Join(tmp, "etc", "npmrc") + mustWriteFile(t, globalPath, "strict-ssl=true\n") + + // Builtin config (npm install dir) + builtinPath := filepath.Join(tmp, "lib", "node_modules", "npm", "npmrc") + mustWriteFile(t, builtinPath, "; default builtin config\n") + + // Project-level config inside a search dir + projectDir := filepath.Join(tmp, "code", "myapp") + projectPath := filepath.Join(projectDir, ".npmrc") + mustWriteFile(t, projectPath, "@mycompany:registry=https://npm.mycompany.com/\n//npm.mycompany.com/:_authToken=${COMPANY_TOKEN}\n") + + // Mock npm command outputs + mock := executor.NewMock() + mock.SetPath("npm", filepath.Join(tmp, "bin", "npm")) + mock.SetCommand("11.0.0\n", "", 0, "npm", "--version") + mock.SetCommand(builtinPath+"\n", "", 0, "npm", "config", "get", "builtinconfig") + mock.SetCommand(globalPath+"\n", "", 0, "npm", "config", "get", "globalconfig") + mock.SetCommand(`{"registry":"https://registry.npmjs.org/","strict-ssl":true,"_authToken":"(protected)"}`, "", 0, "npm", "config", "ls", "-l", "--json") + mock.SetCommand(`; "user" config from "`+userPath+`" +registry = "https://registry.npmjs.org/" +; "default" values +strict-ssl = true +`, "", 0, "npm", "config", "ls", "-l") + mock.SetHomeDir(filepath.Join(tmp, "home")) + + d := NewNPMRCDetector(mock) + d.ownerLookup = fixedOwner() + // Disable git checks so tests don't depend on git being installed. + d.gitTracked = func(_ context.Context, _ string) bool { return false } + d.inGitRepo = func(_ string) bool { return false } + + loggedIn := &user.User{Username: "tester", HomeDir: filepath.Join(tmp, "home")} + audit := d.Detect(context.Background(), []string{filepath.Join(tmp, "code")}, loggedIn) + + if !audit.Available { + t.Fatalf("expected npm to be available") + } + if audit.NPMVersion != "11.0.0" { + t.Errorf("npm version = %q, want 11.0.0", audit.NPMVersion) + } + + // We should have discovered all four scopes. + gotScopes := map[string]string{} + for _, f := range audit.Files { + gotScopes[f.Scope] = f.Path + } + for _, want := range []string{"builtin", "global", "user", "project"} { + if _, ok := gotScopes[want]; !ok { + t.Errorf("missing scope %q in output (got: %v)", want, gotScopes) + } + } + + // User file: should have parsed entries with redacted auth. + for _, f := range audit.Files { + if f.Scope != "user" { + continue + } + if !f.Exists || !f.Readable { + t.Errorf("user file should be readable: %+v", f) + } + if f.SHA256 == "" { + t.Errorf("user file should have sha256") + } + if f.OwnerName != "tester" { + t.Errorf("owner name = %q, want tester", f.OwnerName) + } + var sawAuth bool + for _, e := range f.Entries { + if e.IsAuth { + sawAuth = true + if strings.Contains(e.DisplayValue, "AbCdEf") { + t.Errorf("auth value leaked: %q", e.DisplayValue) + } + } + } + if !sawAuth { + t.Errorf("expected to see an auth entry in user file") + } + } + + // Project file: env-ref auth should be preserved. + for _, f := range audit.Files { + if f.Scope != "project" { + continue + } + var sawEnvRef bool + for _, e := range f.Entries { + if e.IsEnvRef { + sawEnvRef = true + if !strings.Contains(e.DisplayValue, "${") { + t.Errorf("env-ref form should be preserved: %q", e.DisplayValue) + } + } + } + if !sawEnvRef { + t.Errorf("expected env-ref entry in project file") + } + } + + // Effective view should populate config + sources. + if audit.Effective == nil { + t.Fatalf("expected effective view") + } + if _, ok := audit.Effective.Config["registry"]; !ok { + t.Errorf("effective config missing registry") + } + if src := audit.Effective.SourceByKey["registry"]; src != userPath { + t.Errorf("expected registry source %q, got %q", userPath, src) + } +} + +func TestNPMRCDetector_NPMNotInstalled(t *testing.T) { + tmp := t.TempDir() + userPath := filepath.Join(tmp, "home", ".npmrc") + mustWriteFile(t, userPath, "registry=https://npm.example.com/\n") + + mock := executor.NewMock() + // No SetPath("npm", ...) -> LookPath fails. + mock.SetHomeDir(filepath.Join(tmp, "home")) + + d := NewNPMRCDetector(mock) + d.ownerLookup = fixedOwner() + d.gitTracked = func(_ context.Context, _ string) bool { return false } + d.inGitRepo = func(_ string) bool { return false } + + loggedIn := &user.User{Username: "tester", HomeDir: filepath.Join(tmp, "home")} + audit := d.Detect(context.Background(), nil, loggedIn) + + if audit.Available { + t.Errorf("npm should not be marked available") + } + if audit.Effective != nil { + t.Errorf("effective view should be nil when npm missing, got %+v", audit.Effective) + } + // User file should still be discovered and parsed. + if len(audit.Files) != 1 || audit.Files[0].Scope != "user" { + t.Fatalf("expected exactly the user file, got %+v", audit.Files) + } + if !audit.Files[0].Readable { + t.Errorf("user file should be readable even with no npm") + } +} + +func TestNPMRCDetector_MissingFiles(t *testing.T) { + tmp := t.TempDir() + mock := executor.NewMock() + mock.SetPath("npm", "/usr/bin/npm") + mock.SetCommand("9.0.0\n", "", 0, "npm", "--version") + mock.SetCommand("/nonexistent/builtin\n", "", 0, "npm", "config", "get", "builtinconfig") + mock.SetCommand("/nonexistent/global\n", "", 0, "npm", "config", "get", "globalconfig") + mock.SetCommand("{}", "", 0, "npm", "config", "ls", "-l", "--json") + mock.SetCommand("", "", 0, "npm", "config", "ls", "-l") + + d := NewNPMRCDetector(mock) + d.ownerLookup = fixedOwner() + d.gitTracked = func(_ context.Context, _ string) bool { return false } + d.inGitRepo = func(_ string) bool { return false } + + loggedIn := &user.User{Username: "tester", HomeDir: filepath.Join(tmp, "nohome")} + audit := d.Detect(context.Background(), nil, loggedIn) + + // Even though no real files exist, the discovery records the absence. + for _, f := range audit.Files { + if f.Exists { + t.Errorf("scope %q at %q should not exist", f.Scope, f.Path) + } + if len(f.Entries) != 0 { + t.Errorf("missing file should have no entries, got %+v", f.Entries) + } + } +} + +func TestNPMRCDetector_EnvVarRedaction(t *testing.T) { + mock := executor.NewMock() + mock.SetEnv("NPM_TOKEN", "npm_LongTokenValueZ1234") + mock.SetEnv("npm_config__authToken", "npm_AnotherSecretValue999") + mock.SetEnv("NPM_CONFIG_REGISTRY", "https://registry.npmjs.org/") + + d := NewNPMRCDetector(mock) + d.ownerLookup = fixedOwner() + d.gitTracked = func(_ context.Context, _ string) bool { return false } + d.inGitRepo = func(_ string) bool { return false } + + envs := d.collectEnv() + + for _, e := range envs { + switch e.Name { + case "NPM_TOKEN": + if !e.Set { + t.Error("NPM_TOKEN should be Set=true") + } + if !strings.HasPrefix(e.DisplayValue, "***") { + t.Errorf("NPM_TOKEN should be redacted, got %q", e.DisplayValue) + } + if strings.Contains(e.DisplayValue, "Long") { + t.Errorf("NPM_TOKEN raw value leaked: %q", e.DisplayValue) + } + if e.ValueSHA256 == "" { + t.Error("NPM_TOKEN should have SHA-256 set") + } + case "npm_config__authToken": + if !strings.HasPrefix(e.DisplayValue, "***") { + t.Errorf("npm_config__authToken should be redacted, got %q", e.DisplayValue) + } + case "NPM_CONFIG_REGISTRY": + // Not a secret; should pass through. + if e.DisplayValue != "https://registry.npmjs.org/" { + t.Errorf("registry env var should not be redacted, got %q", e.DisplayValue) + } + } + } +} + +func TestParseSourceAttribution(t *testing.T) { + in := `; "default" values +access = null +audit = true + +; "user" config from "/Users/me/.npmrc" +registry = "https://registry.npmjs.org/" +//registry.npmjs.org/:_authToken = "(protected)" + +; "project" config from "/Users/me/code/myapp/.npmrc" +@mycompany:registry = "https://npm.mycompany.com/" +strict-ssl = false +` + got := parseSourceAttribution(in) + + cases := map[string]string{ + "access": "default", + "audit": "default", + "registry": "/Users/me/.npmrc", + "//registry.npmjs.org/:_authToken": "/Users/me/.npmrc", + "@mycompany:registry": "/Users/me/code/myapp/.npmrc", + "strict-ssl": "/Users/me/code/myapp/.npmrc", + } + for k, want := range cases { + if got[k] != want { + t.Errorf("source[%q] = %q, want %q", k, got[k], want) + } + } +} + +func TestNPMRCDetector_ProjectWalkSkipsExcludedDirs(t *testing.T) { + tmp := t.TempDir() + + // Should be picked up. + keep := filepath.Join(tmp, "real", ".npmrc") + mustWriteFile(t, keep, "registry=https://kept/\n") + + // Should be skipped (inside node_modules). + mustWriteFile(t, filepath.Join(tmp, "real", "node_modules", "lib", ".npmrc"), "registry=https://skipped/\n") + + // Should be skipped (inside .git). + mustWriteFile(t, filepath.Join(tmp, "real", ".git", ".npmrc"), "registry=https://skipped/\n") + + // Should be skipped (hidden dir). + mustWriteFile(t, filepath.Join(tmp, "real", ".cache", ".npmrc"), "registry=https://skipped/\n") + + mock := executor.NewMock() + d := NewNPMRCDetector(mock) + results := d.findProjectNPMRCs(tmp) + + if len(results) != 1 { + t.Fatalf("expected exactly 1 .npmrc, got %d: %v", len(results), results) + } + if !strings.HasSuffix(results[0], filepath.Join("real", ".npmrc")) { + t.Errorf("wrong file kept: %q", results[0]) + } +} + +func TestNPMRCDetector_RespectsEnvOverridesForUserAndGlobal(t *testing.T) { + tmp := t.TempDir() + + // User config redirected via NPM_CONFIG_USERCONFIG. + overriddenUser := filepath.Join(tmp, "elsewhere", "myrc") + mustWriteFile(t, overriddenUser, "registry=https://overridden/\n") + + // Global config redirected via NPM_CONFIG_GLOBALCONFIG. + overriddenGlobal := filepath.Join(tmp, "elsewhere", "globalrc") + mustWriteFile(t, overriddenGlobal, "audit=false\n") + + mock := executor.NewMock() + mock.SetPath("npm", "/usr/bin/npm") + mock.SetCommand("11.0.0\n", "", 0, "npm", "--version") + mock.SetCommand("/should/not/be/used\n", "", 0, "npm", "config", "get", "builtinconfig") + // NB: globalconfig command should NOT be called when env var is set. + // We still register it so test fails loud if it is. + mock.SetCommand("/should/not/be/used/global\n", "", 0, "npm", "config", "get", "globalconfig") + mock.SetCommand("{}", "", 0, "npm", "config", "ls", "-l", "--json") + mock.SetCommand("", "", 0, "npm", "config", "ls", "-l") + + mock.SetEnv("NPM_CONFIG_USERCONFIG", overriddenUser) + mock.SetEnv("NPM_CONFIG_GLOBALCONFIG", overriddenGlobal) + mock.SetHomeDir(filepath.Join(tmp, "home")) + + d := NewNPMRCDetector(mock) + d.ownerLookup = fixedOwner() + d.gitTracked = func(_ context.Context, _ string) bool { return false } + d.inGitRepo = func(_ string) bool { return false } + + loggedIn := &user.User{Username: "tester", HomeDir: filepath.Join(tmp, "home")} + audit := d.Detect(context.Background(), nil, loggedIn) + + pathsByScope := map[string]string{} + for _, f := range audit.Files { + pathsByScope[f.Scope] = f.Path + } + if got := pathsByScope["user"]; got != overriddenUser { + t.Errorf("user scope path = %q, want %q (NPM_CONFIG_USERCONFIG should win over $HOME/.npmrc)", got, overriddenUser) + } + if got := pathsByScope["global"]; got != overriddenGlobal { + t.Errorf("global scope path = %q, want %q (NPM_CONFIG_GLOBALCONFIG should win)", got, overriddenGlobal) + } +} + +// mustWriteFile creates parent dirs as needed and writes the content. +func mustWriteFile(t *testing.T, path, content string) { + t.Helper() + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatalf("write: %v", err) + } +} diff --git a/internal/model/model.go b/internal/model/model.go index adfe5c4..e04680d 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -26,6 +26,7 @@ type ScanResult struct { SnapPackages []SystemPackage `json:"snap_packages"` FlatpakPkgManager *PkgManager `json:"flatpak_package_manager,omitempty"` FlatpakPackages []SystemPackage `json:"flatpak_packages"` + NPMRCAudit *NPMRCAudit `json:"npmrc_audit,omitempty"` Summary Summary `json:"summary"` } @@ -232,3 +233,79 @@ func FilterUserInstalledExtensions(exts []Extension) []Extension { } return filtered } + +// --- npmrc audit ------------------------------------------------------------- +// +// Surface-only inventory of every .npmrc on the host plus the merged +// effective view npm itself would resolve. Drift detection (snapshot/diff +// across runs) and per-project effective overrides are intentionally out +// of scope for this iteration; see .plans/0005-npmrc-audit.md for the +// extension points. + +// NPMRCAudit is the top-level structure produced by the npmrc detector. +type NPMRCAudit struct { + Available bool `json:"npm_available"` + NPMVersion string `json:"npm_version,omitempty"` + NPMPath string `json:"npm_path,omitempty"` + Files []NPMRCFile `json:"files"` + Effective *NPMRCEffective `json:"effective,omitempty"` + Env []NPMRCEnvVar `json:"env"` + DiscoveryError string `json:"discovery_error,omitempty"` +} + +// NPMRCFile is a single .npmrc file. Metadata is best-effort: fields that +// could not be determined (e.g. owner_name on Windows) are omitted. +type NPMRCFile struct { + Path string `json:"path"` + Scope string `json:"scope"` // builtin | global | user | project + Exists bool `json:"exists"` + Readable bool `json:"readable"` + SizeBytes int64 `json:"size_bytes,omitempty"` + ModTimeUnix int64 `json:"mtime_unix,omitempty"` + Mode string `json:"mode,omitempty"` + OwnerUID int `json:"owner_uid,omitempty"` + OwnerName string `json:"owner_name,omitempty"` + GroupGID int `json:"group_gid,omitempty"` + GroupName string `json:"group_name,omitempty"` + SHA256 string `json:"sha256,omitempty"` + SymlinkTo string `json:"symlink_target,omitempty"` + InGitRepo bool `json:"in_git_repo,omitempty"` + GitTracked bool `json:"git_tracked,omitempty"` + Entries []NPMRCEntry `json:"entries,omitempty"` + ParseError string `json:"parse_error,omitempty"` +} + +// NPMRCEntry is one parsed line of a .npmrc file. DisplayValue is always +// safe to print: auth values are redacted to ***last4 (or *** when the +// secret is short). The raw value is never stored — ValueSHA256 is the +// only fingerprint kept. +type NPMRCEntry struct { + Key string `json:"key"` + DisplayValue string `json:"display_value"` + LineNum int `json:"line_num"` + IsArray bool `json:"is_array,omitempty"` + IsAuth bool `json:"is_auth,omitempty"` + IsEnvRef bool `json:"is_env_ref,omitempty"` + EnvRefVars []string `json:"env_ref_vars,omitempty"` + ValueSHA256 string `json:"value_sha256,omitempty"` + Quoted bool `json:"quoted,omitempty"` +} + +// NPMRCEffective mirrors the merged-config view emitted by +// `npm config ls -l --json`. Auth values are returned by npm as +// "(protected)" — that's what we surface. +type NPMRCEffective struct { + SourceByKey map[string]string `json:"source_by_key,omitempty"` + Config map[string]any `json:"config,omitempty"` + Error string `json:"error,omitempty"` +} + +// NPMRCEnvVar is a single npm-relevant process environment variable. +// Set=false records are kept so the audit shape stays stable across hosts. +type NPMRCEnvVar struct { + Name string `json:"name"` + Set bool `json:"set"` + DisplayValue string `json:"display_value,omitempty"` + ValueSHA256 string `json:"value_sha256,omitempty"` +} + diff --git a/internal/output/npmrc_verbose.go b/internal/output/npmrc_verbose.go new file mode 100644 index 0000000..ba857fc --- /dev/null +++ b/internal/output/npmrc_verbose.go @@ -0,0 +1,306 @@ +package output + +import ( + "fmt" + "io" + "sort" + "strings" + "time" + + "github.com/step-security/dev-machine-guard/internal/model" +) + +// securityRelevantKeys are the npm config keys that materially change install +// behavior or trust posture. We highlight them in the verbose view so they +// stand out from the 100+ other keys npm exposes. +var securityRelevantKeys = map[string]string{ + "registry": "where unscoped packages come from", + "strict-ssl": "TLS verification on registry traffic", + "ignore-scripts": "block lifecycle scripts (preinstall/install/postinstall)", + "script-shell": "shell used by lifecycle scripts and `npm run`", + "node-options": "NODE_OPTIONS for spawned scripts (--require= is dangerous)", + "min-release-age": "skip versions newer than N days (defense vs just-published worms)", + "audit": "run npm audit on install", + "audit-level": "severity threshold for audit failures", + "package-lock": "honor lockfile for reproducible installs", + "replace-registry-host": "rewrite registry host in lockfile entries at install", + "ca": "inline-trusted CA cert(s)", + "cafile": "path to CA cert bundle", + "proxy": "outbound proxy", + "https-proxy": "outbound HTTPS proxy", + "prefix": "global install location", + "cache": "fetched-tarball cache directory", + "globalconfig": "path to global .npmrc", + "userconfig": "path to user .npmrc", +} + +// PrettyNPMRC renders an NPMRCAudit as a verbose, terminal-friendly report. +// It's the implementation behind `--npmrc`: focused, deeper than the default +// summary, but still scannable. +// +//nolint:errcheck // terminal output +func PrettyNPMRC(w io.Writer, audit *model.NPMRCAudit, dev model.Device, colorMode string) { + c := setupColors(colorMode) + + hr := strings.Repeat("─", 76) + fmt.Fprintf(w, "%s%s%s\n", c.purple, hr, c.reset) + fmt.Fprintf(w, "%s%s NPM CONFIG AUDIT %s\n", c.purple, c.bold, c.reset) + fmt.Fprintf(w, "%s%s%s\n", c.purple, hr, c.reset) + fmt.Fprintf(w, " host: %s%s%s user: %s%s%s platform: %s\n", + c.bold, dev.Hostname, c.reset, c.bold, dev.UserIdentity, c.reset, dev.Platform) + if audit.Available { + fmt.Fprintf(w, " npm: %s%s%s @ %s\n", c.green, audit.NPMVersion, c.reset, audit.NPMPath) + } else { + fmt.Fprintf(w, " npm: %s(not found in PATH — file-only audit)%s\n", c.dim, c.reset) + } + if audit.DiscoveryError != "" { + fmt.Fprintf(w, " %swarn: %s%s\n", c.dim, audit.DiscoveryError, c.reset) + } + fmt.Fprintln(w) + + // --- discovered files --- + fmt.Fprintf(w, "%s%s┌── DISCOVERED .npmrc FILES (%d) %s\n", + c.purple, c.bold, len(audit.Files), c.reset) + if len(audit.Files) == 0 { + fmt.Fprintf(w, " %sno .npmrc files at any scope%s\n", c.dim, c.reset) + } + // Stable display order: builtin → global → user → project (then by path). + files := append([]model.NPMRCFile(nil), audit.Files...) + sort.SliceStable(files, func(i, j int) bool { + if scopeRank(files[i].Scope) != scopeRank(files[j].Scope) { + return scopeRank(files[i].Scope) < scopeRank(files[j].Scope) + } + return files[i].Path < files[j].Path + }) + for _, f := range files { + printNPMRCFileVerbose(w, c, f) + } + fmt.Fprintln(w) + + // --- effective config --- + if audit.Effective != nil { + printEffectiveVerbose(w, c, audit.Effective) + } + + // --- env vars --- + printEnvVerbose(w, c, audit.Env) +} + +func scopeRank(s string) int { + switch s { + case "builtin": + return 0 + case "global": + return 1 + case "user": + return 2 + case "project": + return 3 + } + return 99 +} + +//nolint:errcheck // terminal output +func printNPMRCFileVerbose(w io.Writer, c *colors, f model.NPMRCFile) { + scopeTag := strings.ToUpper(f.Scope) + fmt.Fprintf(w, "│\n│ %s%s[%s]%s %s\n", c.purple, c.bold, scopeTag, c.reset, f.Path) + + if !f.Exists { + fmt.Fprintf(w, "│ %s(file does not exist — npm would skip this scope)%s\n", c.dim, c.reset) + return + } + + // Metadata line. + mtime := "" + if f.ModTimeUnix > 0 { + mtime = time.Unix(f.ModTimeUnix, 0).Format("2006-01-02 15:04:05") + } + owner := "?" + if f.OwnerName != "" { + owner = fmt.Sprintf("%s:%s", f.OwnerName, f.GroupName) + } + sha := f.SHA256 + if len(sha) > 12 { + sha = sha[:12] + } + fmt.Fprintf(w, "│ %smode=%s size=%db owner=%s mtime=%s sha=%s%s\n", + c.dim, f.Mode, f.SizeBytes, owner, mtime, sha, c.reset) + + // Notable flags. + flags := []string{} + if f.SymlinkTo != "" { + flags = append(flags, fmt.Sprintf("symlink → %s", f.SymlinkTo)) + } + if f.GitTracked { + flags = append(flags, c.bold+"GIT-TRACKED"+c.reset+" (committed — credentials would be exposed wherever the repo is)") + } else if f.InGitRepo { + flags = append(flags, "inside a git repo (untracked)") + } + if len(flags) > 0 { + for _, fl := range flags { + fmt.Fprintf(w, "│ %s· %s%s\n", c.dim, fl, c.reset) + } + } + + if f.ParseError != "" { + fmt.Fprintf(w, "│ %sparse error: %s%s\n", c.dim, f.ParseError, c.reset) + return + } + + if len(f.Entries) == 0 { + fmt.Fprintf(w, "│ %s(empty file)%s\n", c.dim, c.reset) + return + } + + // Entries — each one with line number, classification badge, key, redacted value. + fmt.Fprintf(w, "│ %sentries (%d):%s\n", c.dim, len(f.Entries), c.reset) + for _, e := range f.Entries { + key := e.Key + if e.IsArray { + key += "[]" + } + + // Classification badges + badges := []string{} + switch { + case e.IsAuth && e.IsEnvRef: + badges = append(badges, c.green+"AUTH:env-ref"+c.reset) + case e.IsAuth: + badges = append(badges, c.bold+"AUTH:hardcoded"+c.reset) + case e.IsEnvRef: + badges = append(badges, "env-ref") + } + if _, ok := securityRelevantKeys[e.Key]; ok { + badges = append(badges, c.purple+"sec-relevant"+c.reset) + } + badgeStr := "" + if len(badges) > 0 { + badgeStr = " " + strings.Join(badges, " ") + } + + fmt.Fprintf(w, "│ %s%4d:%s %-42s = %s%s%s%s\n", + c.dim, e.LineNum, c.reset, key, c.dim, e.DisplayValue, c.reset, badgeStr) + if e.IsAuth && e.IsEnvRef && len(e.EnvRefVars) > 0 { + fmt.Fprintf(w, "│ %s resolves from env: %s%s\n", + c.dim, strings.Join(e.EnvRefVars, ", "), c.reset) + } + } + + // Drift detection (per-file modification, "previous scan was N ago", etc.) + // and per-project effective overrides ("running npm in this dir flips X") + // are intentionally out of scope for this iteration. See + // .plans/0005-npmrc-audit.md for the documented extension points. +} + +//nolint:errcheck // terminal output +func printEffectiveVerbose(w io.Writer, c *colors, eff *model.NPMRCEffective) { + fmt.Fprintf(w, "%s%s┌── EFFECTIVE CONFIG (what npm would actually use) %s\n", + c.purple, c.bold, c.reset) + + if eff.Error != "" { + fmt.Fprintf(w, "│ %swarn: %s%s\n│\n", c.dim, eff.Error, c.reset) + } + + // Group keys by source so the layered structure is visible at a glance. + bySource := map[string][]string{} + for k := range eff.Config { + src := eff.SourceByKey[k] + if src == "" { + src = "default" + } + bySource[src] = append(bySource[src], k) + } + for _, ks := range bySource { + sort.Strings(ks) + } + + // Display order: paths/explicit-layer sources first (those are what the + // user changed); "default" last (compiled-in baseline, usually noise). + sources := make([]string, 0, len(bySource)) + for s := range bySource { + sources = append(sources, s) + } + sort.SliceStable(sources, func(i, j int) bool { + // "default" always sorts last. + if sources[i] == "default" { + return false + } + if sources[j] == "default" { + return true + } + return sources[i] < sources[j] + }) + + for _, src := range sources { + keys := bySource[src] + isDefault := src == "default" + + // In default-section, only show keys that are security-relevant — + // printing 100+ default values is noise. + shown := keys + hidden := 0 + if isDefault { + filtered := keys[:0] + for _, k := range keys { + if _, ok := securityRelevantKeys[k]; ok { + filtered = append(filtered, k) + } + } + hidden = len(keys) - len(filtered) + shown = filtered + } + + fmt.Fprintf(w, "│\n│ %sfrom %s%s%s (%d keys)\n", + c.dim, c.bold, src, c.reset, len(keys)) + + for _, k := range shown { + v := formatEffValue(eff.Config[k]) + marker := " " + if _, ok := securityRelevantKeys[k]; ok { + marker = c.purple + "★ " + c.reset + } + fmt.Fprintf(w, "│ %s%-42s = %s%s%s\n", marker, k, c.dim, v, c.reset) + } + if isDefault && hidden > 0 { + fmt.Fprintf(w, "│ %s(+%d default values not shown)%s\n", c.dim, hidden, c.reset) + } + } + fmt.Fprintln(w) +} + +// formatEffValue stringifies an arbitrary JSON value from npm config. +// Strings render bare; everything else uses fmt's default %v. +func formatEffValue(v any) string { + if v == nil { + return "null" + } + if s, ok := v.(string); ok { + return s + } + return fmt.Sprintf("%v", v) +} + +//nolint:errcheck // terminal output +func printEnvVerbose(w io.Writer, c *colors, env []model.NPMRCEnvVar) { + fmt.Fprintf(w, "%s%s┌── npm-RELEVANT ENVIRONMENT VARIABLES %s\n", + c.purple, c.bold, c.reset) + setCount := 0 + for _, e := range env { + if e.Set { + setCount++ + } + } + fmt.Fprintf(w, "│ %s%d set, %d unset (unset names are recorded so Phase B can detect transitions)%s\n", + c.dim, setCount, len(env)-setCount, c.reset) + fmt.Fprintln(w, "│") + for _, e := range env { + state := c.dim + "unset" + c.reset + val := "" + if e.Set { + state = c.green + " set " + c.reset + val = " = " + c.dim + e.DisplayValue + c.reset + } + fmt.Fprintf(w, "│ [%s] %s%s\n", state, e.Name, val) + } + fmt.Fprintln(w) +} diff --git a/internal/output/npmrc_verbose_test.go b/internal/output/npmrc_verbose_test.go new file mode 100644 index 0000000..7b9828c --- /dev/null +++ b/internal/output/npmrc_verbose_test.go @@ -0,0 +1,169 @@ +package output + +import ( + "bytes" + "strings" + "testing" + + "github.com/step-security/dev-machine-guard/internal/model" +) + +// fakeAudit returns a representative NPMRCAudit covering all the verbose +// view's branches: a missing scope, a present file with auth + env-ref + a +// security-relevant key, a git-tracked project file, and an env var that +// must come out redacted. +func fakeAudit() *model.NPMRCAudit { + return &model.NPMRCAudit{ + Available: true, + NPMVersion: "10.9.4", + NPMPath: "/usr/bin/npm", + Files: []model.NPMRCFile{ + { + Path: "/etc/npmrc", + Scope: "global", + Exists: false, + }, + { + Path: "/home/test/.npmrc", + Scope: "user", + Exists: true, + Readable: true, + SizeBytes: 200, + ModTimeUnix: 1730000000, + Mode: "0600", + OwnerName: "test", + GroupName: "test", + SHA256: "deadbeefcafebabe0123456789abcdef", + Entries: []model.NPMRCEntry{ + {Key: "registry", DisplayValue: "https://registry.npmjs.org/", LineNum: 1}, + {Key: "//registry.npmjs.org/:_authToken", DisplayValue: "***1234", LineNum: 2, IsAuth: true}, + {Key: "//npm.mycompany.com/:_authToken", DisplayValue: "${COMPANY_TOKEN}", LineNum: 3, IsAuth: true, IsEnvRef: true, EnvRefVars: []string{"COMPANY_TOKEN"}}, + {Key: "strict-ssl", DisplayValue: "false", LineNum: 4}, + }, + }, + { + Path: "/home/test/proj/.npmrc", + Scope: "project", + Exists: true, + Readable: true, + SizeBytes: 80, + Mode: "0644", + OwnerName: "test", + GroupName: "test", + SHA256: "abc123", + InGitRepo: true, + GitTracked: true, + Entries: []model.NPMRCEntry{ + {Key: "ignore-scripts", DisplayValue: "true", LineNum: 1}, + }, + }, + }, + Effective: &model.NPMRCEffective{ + Config: map[string]any{ + "registry": "https://registry.npmjs.org/", + "strict-ssl": false, + "ignore-scripts": true, + "audit-level": "moderate", + "long": true, // not security-relevant; should hide if from default + }, + SourceByKey: map[string]string{ + "registry": "user", + "strict-ssl": "user", + "ignore-scripts": "user", + "audit-level": "global", + "long": "default", + }, + }, + Env: []model.NPMRCEnvVar{ + {Name: "NPM_TOKEN", Set: true, DisplayValue: "***f00d"}, + {Name: "NPM_CONFIG_USERCONFIG", Set: false}, + }, + } +} + +func TestPrettyNPMRC_RedactsAuthAndShowsEnvRef(t *testing.T) { + var buf bytes.Buffer + PrettyNPMRC(&buf, fakeAudit(), model.Device{Hostname: "h", UserIdentity: "u", Platform: "linux"}, "never") + out := buf.String() + + // Auth-hardcoded should NEVER print the raw token; should print the + // already-redacted DisplayValue plus the AUTH:hardcoded badge. + if !strings.Contains(out, "***1234") { + t.Errorf("expected redacted display ***1234 in output") + } + if !strings.Contains(out, "AUTH:hardcoded") { + t.Errorf("expected AUTH:hardcoded badge") + } + if !strings.Contains(out, "AUTH:env-ref") { + t.Errorf("expected AUTH:env-ref badge for ${VAR} reference") + } + if !strings.Contains(out, "${COMPANY_TOKEN}") { + t.Errorf("env-ref literal should be preserved verbatim") + } + if !strings.Contains(out, "resolves from env: COMPANY_TOKEN") { + t.Errorf("expected env var name in resolves-from line") + } +} + +func TestPrettyNPMRC_HighlightsGitTracked(t *testing.T) { + var buf bytes.Buffer + PrettyNPMRC(&buf, fakeAudit(), model.Device{}, "never") + out := buf.String() + + if !strings.Contains(out, "GIT-TRACKED") { + t.Errorf("git-tracked file should surface a GIT-TRACKED warning, got:\n%s", out) + } +} + +func TestPrettyNPMRC_GroupsEffectiveBySource(t *testing.T) { + var buf bytes.Buffer + PrettyNPMRC(&buf, fakeAudit(), model.Device{}, "never") + out := buf.String() + + for _, want := range []string{"from user", "from global", "from default"} { + if !strings.Contains(out, want) { + t.Errorf("expected effective view to include %q grouping", want) + } + } + + // Security-relevant keys from non-default sources are always shown. + for _, key := range []string{"registry", "strict-ssl", "ignore-scripts", "audit-level"} { + if !strings.Contains(out, key) { + t.Errorf("expected security-relevant key %q in effective view", key) + } + } + + // Default-section non-security keys should be hidden behind the + // "+N default values not shown" line. + if strings.Contains(out, "long ") && !strings.Contains(out, "default values not shown") { + t.Errorf("non-security default key 'long' should not be expanded") + } +} + +func TestPrettyNPMRC_ShowsMissingFiles(t *testing.T) { + var buf bytes.Buffer + PrettyNPMRC(&buf, fakeAudit(), model.Device{}, "never") + out := buf.String() + + if !strings.Contains(out, "/etc/npmrc") { + t.Errorf("missing global file should still be listed") + } + if !strings.Contains(out, "file does not exist") { + t.Errorf("missing files should be marked clearly") + } +} + +func TestPrettyNPMRC_HandlesNoNPM(t *testing.T) { + a := fakeAudit() + a.Available = false + a.NPMVersion = "" + a.NPMPath = "" + + var buf bytes.Buffer + PrettyNPMRC(&buf, a, model.Device{}, "never") + out := buf.String() + + if !strings.Contains(out, "not found in PATH") { + t.Errorf("expected fallback message when npm missing") + } +} diff --git a/internal/output/pretty.go b/internal/output/pretty.go index dd4aee5..a183409 100644 --- a/internal/output/pretty.go +++ b/internal/output/pretty.go @@ -262,9 +262,34 @@ func Pretty(w io.Writer, result *model.ScanResult, colorMode string) error { fmt.Fprintln(w) } + // NPM CONFIG AUDIT (compact summary; deep view via --npmrc) + if result.NPMRCAudit != nil { + printNPMRCAuditSummary(w, c, result.NPMRCAudit) + } + return nil } +//nolint:errcheck // terminal output +func printNPMRCAuditSummary(w io.Writer, c *colors, a *model.NPMRCAudit) { + fmt.Fprintf(w, " %s%sNPM CONFIG AUDIT%s\n", c.purple, c.bold, c.reset) + if a.Available { + fmt.Fprintf(w, " %snpm:%s %s @ %s\n", c.dim, c.reset, a.NPMVersion, a.NPMPath) + } else { + fmt.Fprintf(w, " %snpm:%s not found in PATH (file-only audit)\n", c.dim, c.reset) + } + existing := 0 + for _, f := range a.Files { + if f.Exists { + existing++ + } + } + fmt.Fprintf(w, " %sfiles:%s %d discovered, %d present\n", c.dim, c.reset, len(a.Files), existing) + fmt.Fprintf(w, " %srun --npmrc for the deep view%s\n", c.dim, c.reset) + fmt.Fprintln(w) +} + + //nolint:errcheck // terminal output func printSectionHeader(w io.Writer, c *colors, title string, count int) { padding := 35 - len(title) diff --git a/internal/scan/scanner.go b/internal/scan/scanner.go index 0220169..acc5c40 100644 --- a/internal/scan/scanner.go +++ b/internal/scan/scanner.go @@ -206,6 +206,15 @@ func Run(exec executor.Executor, log *progress.Logger, cfg *cli.Config) error { log.StepSkip("disabled (use --enable-python-scan to enable)") } + // npm config audit — surface-only inventory of every .npmrc on the host + // plus the merged effective view npm itself would resolve. Always on; + // the audit is cheap (a few stat calls and at most two npm invocations). + log.StepStart("Auditing npm configuration") + start = time.Now() + loggedInUser, _ := exec.LoggedInUser() + npmrcAudit := detector.NewNPMRCDetector(exec).Detect(ctx, searchDirs, loggedInUser) + log.StepDone(time.Since(start)) + // Ensure no nil slices (JSON must emit [] not null) if aiTools == nil { aiTools = []model.AITool{} @@ -274,6 +283,7 @@ func Run(exec executor.Executor, log *progress.Logger, cfg *cli.Config) error { SnapPackages: snapPackages, FlatpakPkgManager: flatpakPkgManager, FlatpakPackages: flatpakPackages, + NPMRCAudit: &npmrcAudit, Summary: model.Summary{ AIAgentsAndToolsCount: len(aiTools), IDEInstallationsCount: len(ides), diff --git a/internal/telemetry/telemetry.go b/internal/telemetry/telemetry.go index 82a9e72..b529d37 100644 --- a/internal/telemetry/telemetry.go +++ b/internal/telemetry/telemetry.go @@ -53,6 +53,7 @@ type Payload struct { SystemPackageScans []model.SystemPackageScanResult `json:"system_package_scans"` AIAgents []model.AITool `json:"ai_agents"` MCPConfigs []model.MCPConfigEnterprise `json:"mcp_configs"` + NPMRCAudit *model.NPMRCAudit `json:"npmrc_audit,omitempty"` ExecutionLogs *ExecutionLogs `json:"execution_logs,omitempty"` PerformanceMetrics *PerformanceMetrics `json:"performance_metrics,omitempty"` @@ -508,6 +509,17 @@ func Run(exec executor.Executor, log *progress.Logger, cfg *cli.Config) (err err systemPackageScans = []model.SystemPackageScanResult{} } + // npm config audit — surface-only inventory of every .npmrc on the + // host plus the merged effective view npm itself would resolve. We + // use the user-aware executor so npm resolves through the logged-in + // user's PATH (catches nvm / fnm / brew installs that root's PATH + // wouldn't see). + log.Progress("Auditing npm configuration...") + npmrcLoggedIn, _ := exec.LoggedInUser() + npmrcAudit := detector.NewNPMRCDetector(userExec).Detect(ctx, searchDirs, npmrcLoggedIn) + log.Progress(" npm available: %v, files discovered: %d", npmrcAudit.Available, len(npmrcAudit.Files)) + fmt.Fprintln(os.Stderr) + // Finalize execution logs before building payload execLogsBase64 := capture.Finalize() endTime := time.Now() @@ -540,6 +552,7 @@ func Run(exec executor.Executor, log *progress.Logger, cfg *cli.Config) (err err SystemPackageScans: systemPackageScans, AIAgents: allAI, MCPConfigs: mcpConfigs, + NPMRCAudit: &npmrcAudit, ExecutionLogs: &ExecutionLogs{ OutputBase64: execLogsBase64, From 18e6959c3312e1b61c4b6a7b8c27ea046833d63d Mon Sep 17 00:00:00 2001 From: Swarit Pandey Date: Wed, 6 May 2026 18:57:27 +0530 Subject: [PATCH 2/6] feat(pipconfig): surface inventory + finding catalog for pip configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a read-only audit that mirrors the npmrc one but covers pip: discovery via `pip config debug` (with OS-specific path-enumeration fallback when pip isn't installed), parsing of every pip.conf / pip.ini across all four scopes (global / user / user-legacy / site / PIP_CONFIG_FILE), the merged effective view from `pip config list -v` with per-key source attribution, a snapshot of PIP_* environment variables, and a `~/.netrc` presence/permissions probe. Where the pip audit goes further than npmrc: it ships a fixed catalog of finding IDs (pip-001 .. pip-024) per the spec — embedded creds in URLs, http:// schemes, extra-index-url presence (dependency-confusion), trusted-host, no-build-isolation, file mode escalations, legacy paths, PIP_CONFIG_FILE redirection, etc. Each finding carries severity, category, source attribution, redacted value, detail, and a remediation hint. Auth tokens and proxy credentials are always rendered as `user:****@host` and the raw value never leaves the detector. Surfaces in: - standard scan: compact summary section in --pretty (severity bucket counts) plus the full audit object in JSON / enterprise telemetry - new --pipconfig flag: focused, verbose pretty view (or JSON via --pipconfig --json) — findings first, then files, then effective view, then env vars + ~/.netrc End-to-end validated on Fedora EC2 with seeded fixtures across all four scopes (PIP_CONFIG_FILE redirect, http:// scheme on index-url, extra-index-url with embedded creds, no-build-isolation, etc.). See .plans/0006-pip-config-audit.md for the full design. --- cmd/stepsecurity-dev-machine-guard/main.go | 27 +- internal/cli/cli.go | 4 + internal/detector/pipconfig.go | 594 +++++++++++++++++ internal/detector/pipconfig_findings.go | 654 +++++++++++++++++++ internal/detector/pipconfig_findings_test.go | 305 +++++++++ internal/detector/pipconfig_helpers.go | 116 ++++ internal/detector/pipconfig_parse.go | 153 +++++ internal/detector/pipconfig_parse_test.go | 173 +++++ internal/detector/pipconfig_stat_unix.go | 37 ++ internal/detector/pipconfig_stat_windows.go | 10 + internal/detector/pipconfig_test.go | 246 +++++++ internal/model/model.go | 98 +++ internal/output/pipconfig_verbose.go | 244 +++++++ internal/output/pretty.go | 26 + internal/scan/scanner.go | 8 + internal/telemetry/telemetry.go | 17 +- 16 files changed, 2705 insertions(+), 7 deletions(-) create mode 100644 internal/detector/pipconfig.go create mode 100644 internal/detector/pipconfig_findings.go create mode 100644 internal/detector/pipconfig_findings_test.go create mode 100644 internal/detector/pipconfig_helpers.go create mode 100644 internal/detector/pipconfig_parse.go create mode 100644 internal/detector/pipconfig_parse_test.go create mode 100644 internal/detector/pipconfig_stat_unix.go create mode 100644 internal/detector/pipconfig_stat_windows.go create mode 100644 internal/detector/pipconfig_test.go create mode 100644 internal/output/pipconfig_verbose.go diff --git a/cmd/stepsecurity-dev-machine-guard/main.go b/cmd/stepsecurity-dev-machine-guard/main.go index 43d7b6d..d0817b9 100644 --- a/cmd/stepsecurity-dev-machine-guard/main.go +++ b/cmd/stepsecurity-dev-machine-guard/main.go @@ -166,8 +166,8 @@ func main() { } default: - // --npmrc: focused, verbose pretty audit of npm config only. - // Bypasses all other detectors for a fast (~1s) deep dive. + // --npmrc and --pipconfig: focused, verbose pretty audits that + // bypass everything else for a fast (~1s) deep dive. if cfg.NPMRCOnly { if err := runNPMRCOnly(exec, cfg); err != nil { log.Error("%v", err) @@ -175,6 +175,13 @@ func main() { } return } + if cfg.PipConfigOnly { + if err := runPipConfigOnly(exec, cfg); err != nil { + log.Error("%v", err) + os.Exit(1) + } + return + } // Community mode or auto-detect enterprise switch { case cfg.OutputFormatSet || cfg.HTMLOutputFile != "": @@ -219,6 +226,22 @@ func runNPMRCOnly(exec executor.Executor, cfg *cli.Config) error { return nil } +// runPipConfigOnly executes only the pip-config detector and renders the +// verbose pretty view (or JSON when --json is also passed). +func runPipConfigOnly(exec executor.Executor, cfg *cli.Config) error { + ctx := context.Background() + dev := device.Gather(ctx, exec) + loggedInUser, _ := exec.LoggedInUser() + + audit := detector.NewPipConfigDetector(exec).Detect(ctx, loggedInUser) + + if cfg.OutputFormat == "json" { + return scanJSONEncoder(os.Stdout).Encode(audit) + } + output.PrettyPipConfig(os.Stdout, &audit, dev, cfg.ColorMode) + return nil +} + // resolveScanSearchDirs expands `$HOME` to the logged-in user's home dir // and leaves other entries unchanged. Mirrors the helper inside scan.Run // so --npmrc walks the same project tree the full scan would. diff --git a/internal/cli/cli.go b/internal/cli/cli.go index be9e904..a8e4f84 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -23,6 +23,7 @@ type Config struct { EnablePythonScan *bool // nil=auto, true/false=explicit IncludeBundledPlugins bool // --include-bundled-plugins: include bundled/platform plugins in output NPMRCOnly bool // --npmrc: run only the npmrc audit and render verbose pretty output + PipConfigOnly bool // --pipconfig: run only the pip config audit and render verbose pretty output SearchDirs []string // defaults to ["$HOME"] } @@ -90,6 +91,8 @@ func Parse(args []string) (*Config, error) { cfg.IncludeBundledPlugins = true case arg == "--npmrc": cfg.NPMRCOnly = true + case arg == "--pipconfig": + cfg.PipConfigOnly = true case strings.HasPrefix(arg, "--color="): mode := strings.TrimPrefix(arg, "--color=") if mode != "auto" && mode != "always" && mode != "never" { @@ -167,6 +170,7 @@ Options: --disable-python-scan Disable Python package scanning --include-bundled-plugins Include bundled/platform plugins in output (Windows) --npmrc Run ONLY the npm config audit (verbose pretty view; --json supported) + --pipconfig Run ONLY the pip config audit (verbose pretty view; --json supported) --log-level=LEVEL Log level: error | warn | info | debug (default: info) --verbose Shortcut for --log-level=debug --color=WHEN Color mode: auto | always | never (default: auto) diff --git a/internal/detector/pipconfig.go b/internal/detector/pipconfig.go new file mode 100644 index 0000000..37a5ad0 --- /dev/null +++ b/internal/detector/pipconfig.go @@ -0,0 +1,594 @@ +package detector + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "io/fs" + "os" + "os/user" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" + + "github.com/step-security/dev-machine-guard/internal/executor" + "github.com/step-security/dev-machine-guard/internal/model" +) + +// devNullPaths are values of $PIP_CONFIG_FILE that disable all config-file +// loading. We compare case-insensitively so `NUL`, `nul`, etc. all match. +var devNullPaths = map[string]struct{}{ + "/dev/null": {}, + "nul": {}, +} + +// pipEnvVarsToWatch is the set of environment variables we always record on +// the audit. Recording an unset var lets a future change-tracking layer +// notice when one is *added* between runs (the same logic the npmrc +// detector uses). Includes a small set of credential-bearing names that +// pip itself will never define but that worms commonly use to inject +// transient creds. +var pipEnvVarsToWatch = []string{ + "PIP_CONFIG_FILE", + "PIP_INDEX_URL", + "PIP_EXTRA_INDEX_URL", + "PIP_TRUSTED_HOST", + "PIP_FIND_LINKS", + "PIP_NO_CACHE_DIR", + "PIP_NO_BUILD_ISOLATION", + "PIP_REQUIRE_HASHES", + "PIP_KEYRING_PROVIDER", + "PIP_CACHE_DIR", + "PIP_CERT", + "PIP_CLIENT_CERT", + "PIP_PROXY", + "VIRTUAL_ENV", + "HTTP_PROXY", + "HTTPS_PROXY", +} + +// pipInvocationsToTry orders the candidate ways to find pip on the host. +// The detector probes each in turn and uses the first that resolves. +var pipInvocationsToTry = []struct { + binary string + args []string // args prepended to the binary call (e.g., "-m" "pip") + display string +}{ + {"pip", nil, "pip"}, + {"pip3", nil, "pip3"}, + {"python3", []string{"-m", "pip"}, "python3 -m pip"}, + {"python", []string{"-m", "pip"}, "python -m pip"}, +} + +// pipConfigDebugSectionRE matches the layer headers in `pip config debug` +// output: `env_var:`, `env:`, `global:`, `site:`, `user:`. +var pipConfigDebugSectionRE = regexp.MustCompile(`^(env_var|env|global|site|user):$`) + +// pipConfigDebugFileRE matches a discovered file line, e.g. +// ` /etc/pip.conf, exists: False`. +var pipConfigDebugFileRE = regexp.MustCompile(`^\s+(.+),\s+exists:\s+(True|False)\s*$`) + +// PipConfigDetector performs the read-only pip config audit. +type PipConfigDetector struct { + exec executor.Executor + + // Hooks for tests; default to platform-specific impls. Owner lookup + // uses syscall.Stat_t on Unix and is a no-op on Windows. + ownerLookup func(path string) pipOwnerInfo + gitTracked func(ctx context.Context, path string) bool + inGitRepo func(path string) bool +} + +// pipOwnerInfo is the result of a Unix stat. OK=false on Windows or when +// the file doesn't exist; the detector treats that as "owner unknown" and +// leaves the model fields empty. +type pipOwnerInfo struct { + UID int + GID int + OwnerName string + GroupName string + OK bool +} + +// NewPipConfigDetector wires platform-specific hooks; tests override. +func NewPipConfigDetector(exec executor.Executor) *PipConfigDetector { + d := &PipConfigDetector{exec: exec} + d.ownerLookup = pipStatOwner + d.gitTracked = d.defaultGitTracked + d.inGitRepo = pipDefaultInGitRepo + return d +} + +// Detect runs the full audit. +// +// loggedInUser drives where we look for the user's pip config when +// running as root (launchd / systemd context). Pass nil when the agent +// is already running as the user. +func (d *PipConfigDetector) Detect(ctx context.Context, loggedInUser *user.User) model.PipAudit { + audit := model.PipAudit{ + Files: []model.PipConfigFile{}, + EnvVars: d.collectEnvVars(), + Findings: []model.PipFinding{}, + } + + // 1) Detect pip itself. + if path, args, display, version, ok := d.detectPip(ctx); ok { + audit.Available = true + audit.Path = path + audit.Invocation = display + audit.Version = version + _ = args // args are consumed by runPip, not surfaced on the audit + } + + // 2) Discover config files. Prefer `pip config debug` output; fall back + // to OS-specific path enumeration when pip is unavailable or its + // output looks malformed. + files := d.discoverFiles(ctx, audit.Available, loggedInUser) + for i := range files { + d.populateFileMetadata(ctx, &files[i]) + } + audit.Files = files + + // 3) Effective merged view (only meaningful when pip is available). + if audit.Available { + eff, err := d.captureEffective(ctx) + if err == nil { + audit.Effective = eff + } else { + audit.Effective = &model.PipEffective{Error: err.Error()} + } + } + + // 4) ~/.netrc presence + permissions (informational only — see plan). + audit.Netrc = d.probeNetrc(loggedInUser) + + // 5) Findings from the rule catalog (pip-001 .. pip-024). + audit.Findings = evaluatePipFindings(&audit) + + return audit +} + +// --- pip detection ---------------------------------------------------------- + +// detectPip probes the candidate invocations and returns the first that +// successfully reports its version. Returns: +// +// path — absolute path to the binary that fronts pip (e.g. /usr/bin/pip3) +// args — extra args to prepend (`["-m", "pip"]` for python -m pip) +// display — human-readable invocation form +// version — `pip --version` first-token version string +// ok — true when a working pip was found +func (d *PipConfigDetector) detectPip(ctx context.Context) (string, []string, string, string, bool) { + for _, cand := range pipInvocationsToTry { + path, err := d.exec.LookPath(cand.binary) + if err != nil { + continue + } + args := append([]string(nil), cand.args...) + args = append(args, "--version") + stdout, _, exit, err := d.exec.RunWithTimeout(ctx, 5*time.Second, cand.binary, args...) + if err != nil || exit != 0 { + continue + } + // `pip --version` outputs "pip X.Y.Z from /path/... (python ...)" + fields := strings.Fields(strings.TrimSpace(stdout)) + version := "" + if len(fields) >= 2 { + version = fields[1] + } + return path, cand.args, cand.display, version, true + } + return "", nil, "", "", false +} + +// runPip runs `pip ` using the detected invocation. We re-detect +// the invocation each call (cheap; LookPath is cached by the OS) so the +// detector remains stateless. Returns stdout, exit code, and whether pip +// was available at all. +func (d *PipConfigDetector) runPip(ctx context.Context, timeout time.Duration, pipArgs ...string) (string, int, bool) { + for _, cand := range pipInvocationsToTry { + if _, err := d.exec.LookPath(cand.binary); err != nil { + continue + } + args := append([]string(nil), cand.args...) + args = append(args, pipArgs...) + stdout, _, exit, err := d.exec.RunWithTimeout(ctx, timeout, cand.binary, args...) + if err != nil && exit == 0 { + // hard error; skip and try next candidate + continue + } + return stdout, exit, true + } + return "", 0, false +} + +// --- discovery -------------------------------------------------------------- + +// discoverFiles returns the union of (`pip config debug`-derived paths) and +// (PIP_CONFIG_FILE / VIRTUAL_ENV-derived paths). Deduplicates by absolute +// path; the first layer assignment wins. +func (d *PipConfigDetector) discoverFiles(ctx context.Context, pipAvailable bool, loggedInUser *user.User) []model.PipConfigFile { + type entry struct { + path string + layer string + } + var ordered []entry + seen := map[string]struct{}{} + add := func(layer, path string) { + if path == "" { + return + } + if abs, err := filepath.Abs(path); err == nil { + path = abs + } + if _, dup := seen[path]; dup { + return + } + seen[path] = struct{}{} + ordered = append(ordered, entry{path: path, layer: layer}) + } + + // Track devnull explicitly so the findings engine can emit pip-021. + if v := strings.TrimSpace(d.exec.Getenv("PIP_CONFIG_FILE")); v != "" { + if _, isDevNull := devNullPaths[strings.ToLower(filepath.Base(v))]; !isDevNull { + add("PIP_CONFIG_FILE", v) + } + // devnull case: still scan env vars; don't add the path. + } + + // VIRTUAL_ENV is set by `source .../activate`. Its pip.conf is the site + // scope — only relevant when the venv is active for the running shell. + if v := strings.TrimSpace(d.exec.Getenv("VIRTUAL_ENV")); v != "" { + add("site", filepath.Join(v, pipConfigFilename(d.exec.GOOS()))) + } + + // Preferred: `pip config debug`. Falls back to manual path enumeration + // when pip isn't installed or the output is unparseable. + usedPipDebug := false + if pipAvailable { + if discovered, ok := d.discoverViaPipDebug(ctx); ok { + usedPipDebug = true + for _, e := range discovered { + add(e.layer, e.path) + } + } + } + if !usedPipDebug { + for _, e := range d.discoverViaPathEnumeration(loggedInUser) { + add(e.layer, e.path) + } + } + + out := make([]model.PipConfigFile, 0, len(ordered)) + for _, e := range ordered { + out = append(out, model.PipConfigFile{Path: e.path, Layer: e.layer}) + } + return out +} + +// discoverViaPipDebug runs `pip config debug` and parses its grouped output. +// Returns false if parsing failed; the caller should then fall back to +// manual path enumeration. +func (d *PipConfigDetector) discoverViaPipDebug(ctx context.Context) ([]struct{ path, layer string }, bool) { + stdout, exit, ok := d.runPip(ctx, 10*time.Second, "config", "debug") + if !ok || exit != 0 || strings.TrimSpace(stdout) == "" { + return nil, false + } + var out []struct{ path, layer string } + currentLayer := "" + for _, line := range strings.Split(stdout, "\n") { + // Layer header. + if m := pipConfigDebugSectionRE.FindStringSubmatch(strings.TrimRight(line, "\r")); m != nil { + currentLayer = m[1] + continue + } + // File line. Only emit when we're in a layer that maps to a real + // config file — `env_var:` and `env:` describe vars, not files. + if currentLayer != "global" && currentLayer != "user" && currentLayer != "site" { + continue + } + if m := pipConfigDebugFileRE.FindStringSubmatch(line); m != nil { + path := strings.TrimSpace(m[1]) + out = append(out, struct{ path, layer string }{path: path, layer: currentLayer}) + } + } + if len(out) == 0 { + return nil, false + } + return out, true +} + +// discoverViaPathEnumeration walks the OS-specific paths from spec §3. +// Used when pip isn't installed (so we still surface stranded config +// files) and as a fallback when `pip config debug` parsing failed. +func (d *PipConfigDetector) discoverViaPathEnumeration(loggedInUser *user.User) []struct{ path, layer string } { + homeDir := "" + if loggedInUser != nil { + homeDir = loggedInUser.HomeDir + } + if homeDir == "" { + if h, err := os.UserHomeDir(); err == nil { + homeDir = h + } + } + goos := d.exec.GOOS() + var out []struct{ path, layer string } + + switch goos { + case "windows": + // Global. Vista is unsupported; skip. + if pd := d.exec.Getenv("ProgramData"); pd != "" { + out = append(out, struct{ path, layer string }{filepath.Join(pd, "pip", "pip.ini"), "global"}) + } + // User. + if appdata := d.exec.Getenv("APPDATA"); appdata != "" { + out = append(out, struct{ path, layer string }{filepath.Join(appdata, "pip", "pip.ini"), "user"}) + } + if homeDir != "" { + out = append(out, struct{ path, layer string }{filepath.Join(homeDir, "pip", "pip.ini"), "user-legacy"}) + } + case "darwin": + // Global. + out = append(out, struct{ path, layer string }{"/Library/Application Support/pip/pip.conf", "global"}) + if homeDir != "" { + // User (current — Library path is preferred when its directory exists) + out = append(out, struct{ path, layer string }{filepath.Join(homeDir, "Library", "Application Support", "pip", "pip.conf"), "user"}) + // Fallback (when the Library dir doesn't exist). + out = append(out, struct{ path, layer string }{filepath.Join(homeDir, ".config", "pip", "pip.conf"), "user"}) + // Legacy. + out = append(out, struct{ path, layer string }{filepath.Join(homeDir, ".pip", "pip.conf"), "user-legacy"}) + } + default: // linux + everything else + // XDG_CONFIG_DIRS is colon-separated; check each. + xdgDirs := d.exec.Getenv("XDG_CONFIG_DIRS") + if xdgDirs == "" { + xdgDirs = "/etc/xdg" + } + for _, dir := range strings.Split(xdgDirs, ":") { + dir = strings.TrimSpace(dir) + if dir == "" { + continue + } + out = append(out, struct{ path, layer string }{filepath.Join(dir, "pip", "pip.conf"), "global"}) + } + out = append(out, struct{ path, layer string }{"/etc/pip.conf", "global"}) + + if homeDir != "" { + xdgHome := d.exec.Getenv("XDG_CONFIG_HOME") + if xdgHome == "" { + xdgHome = filepath.Join(homeDir, ".config") + } + out = append(out, struct{ path, layer string }{filepath.Join(xdgHome, "pip", "pip.conf"), "user"}) + out = append(out, struct{ path, layer string }{filepath.Join(homeDir, ".pip", "pip.conf"), "user-legacy"}) + } + } + return out +} + +// pipConfigFilename returns "pip.conf" or "pip.ini" depending on OS. +func pipConfigFilename(goos string) string { + if goos == "windows" { + return "pip.ini" + } + return "pip.conf" +} + +// --- per-file metadata ------------------------------------------------------ + +func (d *PipConfigDetector) populateFileMetadata(ctx context.Context, f *model.PipConfigFile) { + info, err := os.Lstat(f.Path) + if err != nil { + if os.IsNotExist(err) { + f.Exists = false + return + } + f.Exists = true + f.ParseError = "lstat: " + err.Error() + return + } + f.Exists = true + + // Follow through any symlink for size/mtime/mode (lstat already proved + // the symlink itself exists; a broken symlink target shouldn't crash + // the audit). + if info.Mode()&os.ModeSymlink != 0 { + stat, statErr := os.Stat(f.Path) + if statErr != nil { + f.Readable = false + f.ParseError = "stat (followed symlink): " + statErr.Error() + return + } + info = stat + } + f.SizeBytes = info.Size() + f.ModTimeUnix = info.ModTime().Unix() + f.Mode = fmt.Sprintf("%#o", info.Mode().Perm()) + + if info.IsDir() { + f.ParseError = "path is a directory" + return + } + + if d.ownerLookup != nil { + oi := d.ownerLookup(f.Path) + if oi.OK { + f.OwnerName = oi.OwnerName + f.GroupName = oi.GroupName + } + } + + data, err := os.ReadFile(f.Path) + if err != nil { + f.Readable = false + f.ParseError = "read: " + err.Error() + return + } + f.Readable = true + sum := sha256.Sum256(data) + f.SHA256 = hex.EncodeToString(sum[:]) + + f.Sections = parsePipConfig(data) + + if d.inGitRepo != nil && d.inGitRepo(f.Path) { + f.InGitRepo = true + if d.gitTracked != nil && d.gitTracked(ctx, f.Path) { + f.GitTracked = true + } + } +} + +// --- effective view --------------------------------------------------------- + +// pipConfigListLineRE captures `
.='' from ` +// lines. We split on the literal substring `' from ` rather than greedy +// regex so values containing single quotes don't fool us. +// +// Match group 1 = section.key (split on '.' for individual fields). +// Match group 2 = quoted value (still wrapped in single quotes; strip +// after match). +var pipConfigListPrefix = regexp.MustCompile(`^([A-Za-z0-9_\-]+)\.([A-Za-z0-9_\-]+)='`) + +func (d *PipConfigDetector) captureEffective(ctx context.Context) (*model.PipEffective, error) { + stdout, exit, ok := d.runPip(ctx, 10*time.Second, "config", "list", "-v") + if !ok || exit != 0 { + return nil, fmt.Errorf("pip config list -v exited %d", exit) + } + eff := &model.PipEffective{ + SourceByKey: map[string]string{}, + Config: map[string]string{}, + } + for _, line := range strings.Split(stdout, "\n") { + line = strings.TrimRight(line, "\r") + // Skip the discovery preamble lines. + if strings.HasPrefix(line, "For variant ") { + continue + } + if !pipConfigListPrefix.MatchString(line) { + continue + } + // Split on the literal ` from ` boundary that separates the + // quoted value from the source. + // e.g. global.index-url='https://...' from /path/file + idx := strings.Index(line, "' from ") + if idx < 0 { + continue + } + header := line[:idx] + source := strings.TrimSpace(line[idx+len("' from "):]) + + // header is `
.='` — strip the `
.=` prefix. + eq := strings.IndexByte(header, '=') + if eq < 0 { + continue + } + secKey := header[:eq] + value := header[eq+1:] + // Trim leading single quote from value. + value = strings.TrimPrefix(value, "'") + + // Split section.key. + dot := strings.IndexByte(secKey, '.') + if dot < 0 { + continue + } + section := secKey[:dot] + key := secKey[dot+1:] + full := section + "." + key + + eff.Config[full] = value + eff.SourceByKey[full] = source + } + return eff, nil +} + +// --- env vars --------------------------------------------------------------- + +func (d *PipConfigDetector) collectEnvVars() []model.PipEnvVar { + out := make([]model.PipEnvVar, 0, len(pipEnvVarsToWatch)) + for _, name := range pipEnvVarsToWatch { + v := d.exec.Getenv(name) + if v == "" { + continue + } + out = append(out, model.PipEnvVar{ + Name: name, + Value: v, + Display: redactCredsInValue(v), + SHA256: hashCredential(v), + }) + } + return out +} + +// --- ~/.netrc check --------------------------------------------------------- + +func (d *PipConfigDetector) probeNetrc(loggedInUser *user.User) *model.PipNetrcStatus { + homeDir := "" + if loggedInUser != nil { + homeDir = loggedInUser.HomeDir + } + if homeDir == "" { + if h, err := os.UserHomeDir(); err == nil { + homeDir = h + } + } + if homeDir == "" { + return nil + } + path := filepath.Join(homeDir, ".netrc") + if d.exec.GOOS() == "windows" { + // Windows uses _netrc; allow either spelling. + path = filepath.Join(homeDir, "_netrc") + } + out := &model.PipNetrcStatus{Path: path} + info, err := os.Stat(path) + if err != nil { + if !os.IsNotExist(err) { + out.Exists = true // probe error; surface that we tried + } + return out + } + out.Exists = true + if d.exec.GOOS() != "windows" { + out.Mode = fmt.Sprintf("%#o", info.Mode().Perm()) + } + return out +} + +// --- git helpers ------------------------------------------------------------ + +func (d *PipConfigDetector) defaultGitTracked(ctx context.Context, path string) bool { + dir := filepath.Dir(path) + base := filepath.Base(path) + _, _, exit, err := d.exec.RunWithTimeout(ctx, 5*time.Second, "git", "-C", dir, "ls-files", "--error-unmatch", base) + return err == nil && exit == 0 +} + +// pipDefaultInGitRepo walks parent dirs looking for `.git` (directory or +// file — git worktrees use a file). Stops at filesystem root. +func pipDefaultInGitRepo(path string) bool { + dir := filepath.Dir(path) + for { + gitPath := filepath.Join(dir, ".git") + if _, err := os.Stat(gitPath); err == nil { + return true + } + parent := filepath.Dir(dir) + if parent == dir { + return false + } + dir = parent + } +} + +// pipFsWalkEntries is here so the detector stays self-contained even if +// future versions want to enumerate venvs on disk (out of scope for v1 +// per the plan; keeping the helper around for symmetry with nodescan). +var _ = func() fs.WalkDirFunc { return nil } + +// formatModeOctal is unused today (mode is rendered via fmt.Sprintf in +// populateFileMetadata) but kept for tests; suppress unused warning. +var _ = strconv.FormatUint diff --git a/internal/detector/pipconfig_findings.go b/internal/detector/pipconfig_findings.go new file mode 100644 index 0000000..c6be33a --- /dev/null +++ b/internal/detector/pipconfig_findings.go @@ -0,0 +1,654 @@ +package detector + +import ( + "fmt" + "path/filepath" + "sort" + "strconv" + "strings" + + "github.com/step-security/dev-machine-guard/internal/model" +) + +// Severity constants — strings on purpose so the JSON model is human-grep-able. +const ( + pipSevCritical = "CRITICAL" + pipSevHigh = "HIGH" + pipSevMedium = "MEDIUM" + pipSevLow = "LOW" + pipSevInfo = "INFO" +) + +// evaluatePipFindings walks every (file, section, key, value) entry plus +// every PIP_* env var and emits the catalog from spec §6 (pip-001 .. +// pip-024). Returns a stable-sorted slice keyed by (severity desc, ID, +// source). +// +// Rules are independent — multiple rules may fire on the same source. +// Severity escalation is layered: a credential-bearing key in a git- +// tracked file triggers BOTH pip-001 (creds) AND pip-004 (committed +// secrets) at CRITICAL, and the rendering layer can collapse them if +// desired. +func evaluatePipFindings(audit *model.PipAudit) []model.PipFinding { + var findings []model.PipFinding + emit := func(f model.PipFinding) { findings = append(findings, f) } + + // --- env-var rules --- + for _, ev := range audit.EnvVars { + evaluateEnvVarFindings(ev, emit) + } + + // --- PIP_CONFIG_FILE redirection / disable --- + evaluatePipConfigFileEnv(audit, emit) + + // --- per-file rules --- + for _, f := range audit.Files { + if !f.Exists { + continue + } + // Each section's entries get walked; auth-key escalation considers + // whether the file is git-tracked. + for _, sec := range f.Sections { + for _, kv := range sec.Entries { + for _, v := range kv.Values { + evaluateValueFindings(f, sec.Name, kv.Key, v, emit) + } + // Some rules (pip-007 on trusted-host, pip-023 require-hashes) + // trigger on the *presence* of a key regardless of value count; + // fold those in here for clarity. + evaluateKeyPresenceFindings(f, sec.Name, kv, emit) + } + } + // File-level rules: legacy path, mode permissions. + evaluateFileLevelFindings(f, emit) + } + + // --- netrc presence (informational; we don't parse contents) --- + if audit.Netrc != nil && audit.Netrc.Exists && audit.Netrc.Mode != "" { + if mode, ok := parseModeOctal(audit.Netrc.Mode); ok { + if mode&0o077 != 0 { + emit(model.PipFinding{ + ID: "pip-netrc-perms", + Severity: pipSevMedium, + Category: "file-permissions", + Source: audit.Netrc.Path, + Detail: fmt.Sprintf("~/.netrc has mode %s — group/other readable. curl refuses to read this; pip via requests may still read it.", audit.Netrc.Mode), + Remediation: "chmod 0600 " + audit.Netrc.Path, + }) + } else { + emit(model.PipFinding{ + ID: "pip-netrc-present", + Severity: pipSevInfo, + Category: "informational", + Source: audit.Netrc.Path, + Detail: "~/.netrc present (mode " + audit.Netrc.Mode + "). pip falls back to it for index credentials.", + }) + } + } + } + + // Stable ordering: severity desc → id → source. + sortFindings(findings) + return findings +} + +// evaluateEnvVarFindings runs the env-var-targeted rules from spec §3 / §6. +func evaluateEnvVarFindings(ev model.PipEnvVar, emit func(model.PipFinding)) { + // pip-001 — embedded creds in URL value + if u, has := urlHasEmbeddedCreds(ev.Value); has { + emit(model.PipFinding{ + ID: "pip-001", + Severity: pipSevCritical, + Category: "credential-exposure", + Source: ev.Name, + Key: pipKeyForEnvName(ev.Name), + ValueShown: redactCredsInValue(ev.Value), + Detail: "URL value embeds plaintext credentials in env var.", + Remediation: "Move the credential to a keyring entry or ~/.netrc (mode 0600), or fetch on demand via a CI secret store.", + }) + _ = u + } + // pip-002 / pip-006 / pip-008 — http:// scheme + if urlIsHTTP(ev.Value) { + key := strings.ToLower(pipKeyForEnvName(ev.Name)) + switch key { + case "extra-index-url": + emit(httpSchemeFinding("pip-002", pipSevCritical, ev.Name, "", key, ev.Value)) + case "index-url": + emit(httpSchemeFinding("pip-006", pipSevHigh, ev.Name, "", key, ev.Value)) + case "find-links": + emit(httpSchemeFinding("pip-008", pipSevHigh, ev.Name, "", key, ev.Value)) + } + } + // pip-003 — embedded creds in proxy + if ev.Name == "PIP_PROXY" || ev.Name == "HTTP_PROXY" || ev.Name == "HTTPS_PROXY" { + if _, has := urlHasEmbeddedCreds(ev.Value); has { + emit(model.PipFinding{ + ID: "pip-003", + Severity: pipSevCritical, + Category: "credential-exposure", + Source: ev.Name, + Key: pipKeyForEnvName(ev.Name), + ValueShown: redactCredsInValue(ev.Value), + Detail: "Proxy URL embeds credentials. Anything reading this env var sees them in plaintext.", + Remediation: "Use a credential helper (cntlm, kinit, keyring) instead of inline proxy creds.", + }) + } + } + // pip-005 — extra-index-url present (any value) + if pipKeyForEnvName(ev.Name) == "extra-index-url" && strings.TrimSpace(ev.Value) != "" { + emit(model.PipFinding{ + ID: "pip-005", + Severity: pipSevHigh, + Category: "dependency-confusion", + Source: ev.Name, + Key: "extra-index-url", + ValueShown: redactCredsInValue(ev.Value), + Detail: "Extra index configured. pip queries every index and installs the highest version found — an attacker controlling this index can override public-package versions (dependency-confusion).", + Remediation: "Prefer --index-url for the canonical source; or use PEP 503 simple-repository tooling that scopes packages to specific indexes.", + }) + } + // pip-007 — trusted-host (any value) + if pipKeyForEnvName(ev.Name) == "trusted-host" && strings.TrimSpace(ev.Value) != "" { + hosts := strings.Fields(ev.Value) + for _, h := range hosts { + emit(model.PipFinding{ + ID: "pip-007", + Severity: pipSevHigh, + Category: "tls-disabled", + Source: ev.Name, + Key: "trusted-host", + ValueShown: h, + Detail: fmt.Sprintf("trusted-host=%s disables HTTPS verification for that host. Any MITM on the path can serve trojaned packages.", h), + Remediation: "Remove the trusted-host entry and resolve the underlying TLS issue (corporate CA, valid cert, etc.).", + }) + } + } + // pip-011 — no-build-isolation = true + if pipKeyForEnvName(ev.Name) == "no-build-isolation" && parseTruthy(ev.Value) { + emit(model.PipFinding{ + ID: "pip-011", + Severity: pipSevMedium, + Category: "install-integrity", + Source: ev.Name, + Key: "no-build-isolation", + ValueShown: ev.Value, + Detail: "Build isolation disabled. Packages can install with access to the host's installed packages, which can mask supply-chain issues at build time.", + Remediation: "Leave PIP_NO_BUILD_ISOLATION unset (default false) unless you have a specific need.", + }) + } +} + +// evaluatePipConfigFileEnv handles pip-020 (redirected) and pip-021 (devnull-skip). +func evaluatePipConfigFileEnv(audit *model.PipAudit, emit func(model.PipFinding)) { + var pcfValue string + for _, ev := range audit.EnvVars { + if ev.Name == "PIP_CONFIG_FILE" { + pcfValue = ev.Value + break + } + } + if pcfValue == "" { + return + } + // pip-021: devnull / nul disables all config-file loads. + base := strings.ToLower(filepath.Base(pcfValue)) + if _, isNull := devNullPaths[base]; isNull || strings.EqualFold(pcfValue, "/dev/null") { + emit(model.PipFinding{ + ID: "pip-021", + Severity: pipSevMedium, + Category: "config-disabled", + Source: "PIP_CONFIG_FILE", + Key: "PIP_CONFIG_FILE", + ValueShown: pcfValue, + Detail: "PIP_CONFIG_FILE points to /dev/null (or nul) — pip skips loading every config file. Legitimate for hermetic CI; suspicious on a developer machine.", + Remediation: "Verify the value is intentional. If the operator didn't set it, investigate the parent process.", + }) + return + } + // pip-020: redirected to a non-default location. We can't know the + // "expected" location with certainty, so we surface any value that + // isn't under one of the known config dirs. + if !looksLikeStandardPipConfigPath(pcfValue) { + emit(model.PipFinding{ + ID: "pip-020", + Severity: pipSevMedium, + Category: "config-redirection", + Source: "PIP_CONFIG_FILE", + Key: "PIP_CONFIG_FILE", + ValueShown: pcfValue, + Detail: "PIP_CONFIG_FILE redirects pip to read config from a non-standard location.", + Remediation: "Confirm the override is intentional; the redirected file becomes the highest-precedence config source.", + }) + } +} + +// looksLikeStandardPipConfigPath returns true for paths that match a +// typical user/global pip config location. Used to decide whether +// PIP_CONFIG_FILE = X is suspicious. +func looksLikeStandardPipConfigPath(p string) bool { + p = strings.ToLower(p) + for _, pat := range []string{"/pip/pip.conf", "/pip/pip.ini", `\pip\pip.ini`, "/pip.conf", `\pip.ini`} { + if strings.HasSuffix(p, pat) { + return true + } + } + return false +} + +// evaluateValueFindings runs rules that depend on the value of a single +// (section, key, value) entry from a parsed file. +func evaluateValueFindings(f model.PipConfigFile, section, key, value string, emit func(model.PipFinding)) { + keyLower := strings.ToLower(key) + + // pip-001 — embedded credentials in any value + if _, has := urlHasEmbeddedCreds(value); has { + sev := pipSevCritical + emit(model.PipFinding{ + ID: "pip-001", + Severity: sev, + Category: "credential-exposure", + Source: f.Path, + Section: section, + Key: key, + ValueShown: redactCredsInValue(value), + Detail: "URL value embeds plaintext credentials in pip config file.", + Remediation: "Move the credential to a keyring entry or ~/.netrc (mode 0600), or fetch from a secret store at install time.", + }) + // pip-004 — escalates if file is git-tracked. + if f.GitTracked { + emit(model.PipFinding{ + ID: "pip-004", + Severity: pipSevCritical, + Category: "credential-exposure", + Source: f.Path, + Section: section, + Key: key, + ValueShown: redactCredsInValue(value), + Detail: "Credential-bearing pip config file is committed to git. Wherever this repo is published, the credential is published.", + Remediation: "Remove the credential, rotate it immediately, and rewrite git history (git filter-repo) to purge old commits.", + }) + } + } + + // HTTP scheme rules. Each key has its own pip-NNN. + if urlIsHTTP(value) { + switch keyLower { + case "extra-index-url": + emit(httpSchemeFinding("pip-002", pipSevCritical, f.Path, section, key, value)) + case "index-url": + emit(httpSchemeFinding("pip-006", pipSevHigh, f.Path, section, key, value)) + case "find-links": + emit(httpSchemeFinding("pip-008", pipSevHigh, f.Path, section, key, value)) + } + } + + // pip-003 — proxy with embedded creds + if keyLower == "proxy" { + if _, has := urlHasEmbeddedCreds(value); has { + emit(model.PipFinding{ + ID: "pip-003", + Severity: pipSevCritical, + Category: "credential-exposure", + Source: f.Path, + Section: section, + Key: key, + ValueShown: redactCredsInValue(value), + Detail: "Proxy URL embeds credentials.", + Remediation: "Use a credential helper (cntlm, kinit, keyring) instead of inline proxy creds.", + }) + } + } + + // pip-005 — extra-index-url present (any value) + if keyLower == "extra-index-url" && strings.TrimSpace(value) != "" { + emit(model.PipFinding{ + ID: "pip-005", + Severity: pipSevHigh, + Category: "dependency-confusion", + Source: f.Path, + Section: section, + Key: key, + ValueShown: redactCredsInValue(value), + Detail: "Extra index configured. pip queries every index and installs the highest version found — an attacker controlling this index can override public-package versions (dependency-confusion).", + Remediation: "Prefer --index-url for the canonical source; or use PEP 503 simple-repository tooling that scopes packages to specific indexes.", + }) + } + + // pip-007 — trusted-host (any host present). Files use one host per + // parsed value, so we emit one finding per value here. The env-var + // path emits one per space-separated host in the same way. + if keyLower == "trusted-host" && strings.TrimSpace(value) != "" { + emit(model.PipFinding{ + ID: "pip-007", + Severity: pipSevHigh, + Category: "tls-disabled", + Source: f.Path, + Section: section, + Key: key, + ValueShown: value, + Detail: fmt.Sprintf("trusted-host=%s disables HTTPS verification for that host. Any MITM on the path can serve trojaned packages.", value), + Remediation: "Remove the trusted-host entry and resolve the underlying TLS issue (corporate CA, valid cert, etc.).", + }) + } + + // pip-009 / pip-010 — cert / client-cert configured (informational; severity MEDIUM) + switch keyLower { + case "cert": + emit(model.PipFinding{ + ID: "pip-009", + Severity: pipSevMedium, + Category: "tls-trust", + Source: f.Path, + Section: section, + Key: key, + ValueShown: value, + Detail: "Custom CA bundle configured. Verify the file at this path exists and is owned by a trusted account.", + Remediation: "Confirm the CA file is part of the org's intended trust store.", + }) + case "client-cert": + emit(model.PipFinding{ + ID: "pip-010", + Severity: pipSevMedium, + Category: "credential-exposure", + Source: f.Path, + Section: section, + Key: key, + ValueShown: value, + Detail: "Client certificate configured. Verify the key file is mode 0600 and not world-readable.", + Remediation: "chmod 0600 on the client key file; rotate if it ever was world-readable.", + }) + } + + // pip-011 — no-build-isolation + if keyLower == "no-build-isolation" && parseTruthy(value) { + emit(model.PipFinding{ + ID: "pip-011", + Severity: pipSevMedium, + Category: "install-integrity", + Source: f.Path, + Section: section, + Key: key, + ValueShown: value, + Detail: "Build isolation disabled. Packages install with access to the host's site-packages — supply-chain issues at build time may be masked.", + Remediation: "Set no-build-isolation = false (or remove the line) unless you have a specific need.", + }) + } + + // pip-012 — no-binary = :all: + if keyLower == "no-binary" && strings.TrimSpace(value) == ":all:" { + emit(model.PipFinding{ + ID: "pip-012", + Severity: pipSevMedium, + Category: "install-integrity", + Source: f.Path, + Section: section, + Key: key, + ValueShown: value, + Detail: "no-binary = :all: forces source builds for every package, which run setup.py — i.e. arbitrary Python code at install time.", + Remediation: "Pin to specific packages that genuinely need source builds rather than disabling wheels globally.", + }) + } + + // pip-013 — cache-dir in /tmp or world-writable + if keyLower == "cache-dir" { + v := strings.TrimSpace(value) + if strings.HasPrefix(v, "/tmp/") || strings.HasPrefix(v, "/var/tmp/") { + emit(model.PipFinding{ + ID: "pip-013", + Severity: pipSevMedium, + Category: "path-tampering", + Source: f.Path, + Section: section, + Key: key, + ValueShown: v, + Detail: "cache-dir under /tmp — predictable, world-traversable, and prone to cache-poisoning by a co-tenant.", + Remediation: "Use $HOME/.cache/pip or another user-owned, owner-only-writable path.", + }) + } + } + + // pip-015 — index-url non-default (informational; HTTPS only) + if keyLower == "index-url" { + v := strings.TrimSpace(value) + if v != "" && v != "https://pypi.org/simple" && v != "https://pypi.org/simple/" && !urlIsHTTP(v) { + emit(model.PipFinding{ + ID: "pip-015", + Severity: pipSevInfo, + Category: "dependency-source", + Source: f.Path, + Section: section, + Key: key, + ValueShown: redactCredsInValue(v), + Detail: "index-url overrides PyPI to a private index. This is normal in many orgs — surfaced for inventory.", + }) + } + } + + // pip-016 — keyring-provider = disabled (informational) + if keyLower == "keyring-provider" && strings.EqualFold(strings.TrimSpace(value), "disabled") { + emit(model.PipFinding{ + ID: "pip-016", + Severity: pipSevLow, + Category: "auth-narrowing", + Source: f.Path, + Section: section, + Key: key, + ValueShown: value, + Detail: "keyring-provider = disabled. pip won't consult the system keyring; only URL-embedded creds, env vars, and ~/.netrc are used.", + }) + } + + // pip-017 — no-cache-dir = true + if keyLower == "no-cache-dir" && parseTruthy(value) { + emit(model.PipFinding{ + ID: "pip-017", + Severity: pipSevLow, + Category: "informational", + Source: f.Path, + Section: section, + Key: key, + ValueShown: value, + Detail: "Cache disabled — every install fetches fresh from the index.", + }) + } + + // pip-018 — pre = true + if keyLower == "pre" && parseTruthy(value) { + emit(model.PipFinding{ + ID: "pip-018", + Severity: pipSevLow, + Category: "informational", + Source: f.Path, + Section: section, + Key: key, + ValueShown: value, + Detail: "pre = true — pre-release versions are eligible for install. Reduces version predictability.", + }) + } + + // pip-023 (positive) — require-hashes = true + if keyLower == "require-hashes" && parseTruthy(value) { + emit(model.PipFinding{ + ID: "pip-023", + Severity: pipSevInfo, + Category: "defensive-control", + Source: f.Path, + Section: section, + Key: key, + ValueShown: value, + Detail: "require-hashes = true — every install must include a hash. Strong defensive control against tampered indexes.", + }) + } + + // pip-024 (positive) — only-binary = :all: in [install] + if keyLower == "only-binary" && strings.TrimSpace(value) == ":all:" && section == "install" { + emit(model.PipFinding{ + ID: "pip-024", + Severity: pipSevInfo, + Category: "defensive-control", + Source: f.Path, + Section: section, + Key: key, + ValueShown: value, + Detail: "only-binary = :all: blocks source builds — installs cannot run setup.py. Strong defensive control.", + }) + } +} + +// evaluateKeyPresenceFindings handles rules that fire on key presence +// regardless of value count or repeats. Currently empty (most presence +// rules are handled per-value above), but kept as a hook for future +// rules that need a "saw it once" pattern. +func evaluateKeyPresenceFindings(_ model.PipConfigFile, _ string, _ model.PipKeyValue, _ func(model.PipFinding)) { + // no-op for now +} + +// evaluateFileLevelFindings handles rules that depend on file metadata +// rather than parsed values: legacy paths, mode permissions. +func evaluateFileLevelFindings(f model.PipConfigFile, emit func(model.PipFinding)) { + // pip-019 — legacy ~/.pip/pip.conf or %HOME%\pip\pip.ini in use + if f.Layer == "user-legacy" { + emit(model.PipFinding{ + ID: "pip-019", + Severity: pipSevLow, + Category: "hygiene", + Source: f.Path, + Detail: "Legacy pip config location in use. pip will keep loading it, but the current location ($XDG_CONFIG_HOME/pip/pip.conf or %APPDATA%\\pip\\pip.ini) is preferred.", + Remediation: "Move the file to the current location; delete the legacy one.", + }) + } + + // pip-022 — file mode broader than 0644 (or 0600 when contains creds) + if f.Mode == "" { + return // Windows, where we don't compute permissions + } + mode, ok := parseModeOctal(f.Mode) + if !ok { + return + } + containsCreds := fileContainsEmbeddedCreds(f) + switch { + case containsCreds && mode&0o077 != 0: + emit(model.PipFinding{ + ID: "pip-022", + Severity: pipSevHigh, + Category: "file-permissions", + Source: f.Path, + Detail: fmt.Sprintf("Config file contains embedded credentials AND has mode %s — group/other readable. Anyone with shell access reads the secret.", f.Mode), + Remediation: fmt.Sprintf("chmod 0600 %s and rotate the credential.", f.Path), + }) + case mode&0o022 != 0 && f.Layer == "global": + emit(model.PipFinding{ + ID: "pip-022", + Severity: pipSevHigh, + Category: "file-permissions", + Source: f.Path, + Detail: fmt.Sprintf("Global pip config has mode %s — group/other writable. A non-root account can change pip's defaults for every user.", f.Mode), + Remediation: fmt.Sprintf("chown root:root and chmod 0644 %s.", f.Path), + }) + case mode > 0o644: + emit(model.PipFinding{ + ID: "pip-022", + Severity: pipSevMedium, + Category: "file-permissions", + Source: f.Path, + Detail: fmt.Sprintf("Config file mode %s is broader than 0644.", f.Mode), + Remediation: fmt.Sprintf("chmod 0644 %s (or 0600 if it contains credentials).", f.Path), + }) + } +} + +// fileContainsEmbeddedCreds returns true if any URL value across any +// section of the file embeds credentials. Used to decide whether mode +// findings should escalate. +func fileContainsEmbeddedCreds(f model.PipConfigFile) bool { + for _, sec := range f.Sections { + for _, kv := range sec.Entries { + for _, v := range kv.Values { + if _, has := urlHasEmbeddedCreds(v); has { + return true + } + } + } + } + return false +} + +// httpSchemeFinding builds the common shape used by pip-002 / pip-006 / +// pip-008. The caller picks the ID and severity. +func httpSchemeFinding(id, sev, source, section, key, value string) model.PipFinding { + return model.PipFinding{ + ID: id, + Severity: sev, + Category: "tls-disabled", + Source: source, + Section: section, + Key: key, + ValueShown: redactCredsInValue(value), + Detail: fmt.Sprintf("%s uses http:// — registry traffic is unencrypted and trivially MITM-able.", key), + Remediation: "Switch to https:// (with a valid cert if it's an internal index).", + } +} + +// parseTruthy mimics pip's RawConfigParser-derived bool semantics: +// `true` / `yes` / `1` / `on` (case-insensitive) are truthy. Empty +// string is NOT false — but for our finding rules we want exact +// matching, so empty returns false. +func parseTruthy(v string) bool { + v = strings.TrimSpace(strings.ToLower(v)) + switch v { + case "true", "yes", "1", "on": + return true + } + return false +} + +// parseModeOctal parses a mode string like "0644" or "0o644" into a +// uint32. Returns false on parse failure. +func parseModeOctal(s string) (uint32, bool) { + if s == "" { + return 0, false + } + s = strings.TrimPrefix(s, "0o") + s = strings.TrimPrefix(s, "0") + if s == "" { + return 0, true // "0" alone is mode 0 + } + v, err := strconv.ParseUint(s, 8, 32) + if err != nil { + return 0, false + } + return uint32(v), true +} + +// sortFindings produces a deterministic ordering for output. We sort by +// severity (CRITICAL → HIGH → MEDIUM → LOW → INFO), then by ID, then by +// source for tie-breaking. Stable sort so equal-keyed rows preserve +// the order they were emitted. +func sortFindings(findings []model.PipFinding) { + rank := func(s string) int { + switch s { + case pipSevCritical: + return 0 + case pipSevHigh: + return 1 + case pipSevMedium: + return 2 + case pipSevLow: + return 3 + case pipSevInfo: + return 4 + } + return 99 + } + sort.SliceStable(findings, func(i, j int) bool { + if rank(findings[i].Severity) != rank(findings[j].Severity) { + return rank(findings[i].Severity) < rank(findings[j].Severity) + } + if findings[i].ID != findings[j].ID { + return findings[i].ID < findings[j].ID + } + return findings[i].Source < findings[j].Source + }) +} diff --git a/internal/detector/pipconfig_findings_test.go b/internal/detector/pipconfig_findings_test.go new file mode 100644 index 0000000..68cc0c7 --- /dev/null +++ b/internal/detector/pipconfig_findings_test.go @@ -0,0 +1,305 @@ +package detector + +import ( + "strings" + "testing" + + "github.com/step-security/dev-machine-guard/internal/model" +) + +const at = "@" // dodge editor auto-link rewriting on literal user@host forms + +// findingsByID extracts a quick map of present finding IDs for a test +// audit. Multiple findings with the same ID collapse into one entry — +// counts can be inspected via the slice indexing on findings[i]. +func findingsByID(findings []model.PipFinding) map[string]int { + out := map[string]int{} + for _, f := range findings { + out[f.ID]++ + } + return out +} + +// makeAuditWithUserFile constructs a minimal PipAudit with one +// already-parsed file containing the given sections+entries. +func makeAuditWithUserFile(path, mode string, gitTracked bool, sections []model.PipSection) *model.PipAudit { + return &model.PipAudit{ + Files: []model.PipConfigFile{ + { + Path: path, + Layer: "user", + Exists: true, + Readable: true, + Mode: mode, + SHA256: "deadbeef", + GitTracked: gitTracked, + Sections: sections, + }, + }, + } +} + +// section helper for terser test fixtures. +func sec(name string, entries ...model.PipKeyValue) model.PipSection { + return model.PipSection{Name: name, Entries: entries} +} + +func kv(key string, values ...string) model.PipKeyValue { + return model.PipKeyValue{Key: key, Values: values, Display: strings.Join(values, ", ")} +} + +func TestFindings_EmbeddedCredsInURL_pip001(t *testing.T) { + audit := makeAuditWithUserFile("/u/.config/pip/pip.conf", "0644", false, []model.PipSection{ + sec("global", kv("extra-index-url", "https://__token__:pypi-AgEI..."+at+"my-private.example.com/simple")), + }) + findings := evaluatePipFindings(audit) + ids := findingsByID(findings) + for _, want := range []string{"pip-001", "pip-005"} { + if ids[want] == 0 { + t.Errorf("expected %s, got %v", want, ids) + } + } + // pip-001 must be CRITICAL. + for _, f := range findings { + if f.ID == "pip-001" && f.Severity != "CRITICAL" { + t.Errorf("pip-001 severity %q, want CRITICAL", f.Severity) + } + if f.ID == "pip-001" && strings.Contains(f.ValueShown, "pypi-AgEI") { + t.Errorf("pip-001 leaked credential in ValueShown: %q", f.ValueShown) + } + } +} + +func TestFindings_GitTrackedEscalation_pip004(t *testing.T) { + audit := makeAuditWithUserFile("/u/code/repo/.pip/pip.conf", "0644", true /* git-tracked */, []model.PipSection{ + sec("global", kv("extra-index-url", "https://alice:secret"+at+"internal.example.com/simple")), + }) + findings := evaluatePipFindings(audit) + ids := findingsByID(findings) + if ids["pip-004"] == 0 { + t.Errorf("expected pip-004 escalation, got %v", ids) + } + for _, f := range findings { + if f.ID == "pip-004" && f.Severity != "CRITICAL" { + t.Errorf("pip-004 severity %q, want CRITICAL", f.Severity) + } + } +} + +func TestFindings_HTTPScheme_pip002_pip006_pip008(t *testing.T) { + audit := makeAuditWithUserFile("/u/.config/pip/pip.conf", "0644", false, []model.PipSection{ + sec("global", + kv("index-url", "http://internal.example.com/simple"), + kv("extra-index-url", "http://other.example.com/simple"), + kv("find-links", "http://mirror.example.com"), + ), + }) + findings := evaluatePipFindings(audit) + ids := findingsByID(findings) + for _, want := range []string{"pip-002", "pip-006", "pip-008"} { + if ids[want] == 0 { + t.Errorf("expected %s, got %v", want, ids) + } + } +} + +func TestFindings_TrustedHost_pip007(t *testing.T) { + audit := makeAuditWithUserFile("/u/.config/pip/pip.conf", "0644", false, []model.PipSection{ + sec("global", kv("trusted-host", "a.example.com", "b.example.com")), + }) + findings := evaluatePipFindings(audit) + count := 0 + for _, f := range findings { + if f.ID == "pip-007" { + count++ + if f.Severity != "HIGH" { + t.Errorf("pip-007 severity %q, want HIGH", f.Severity) + } + } + } + if count != 2 { + t.Errorf("expected 2 pip-007 findings (one per host), got %d", count) + } +} + +func TestFindings_NoBuildIsolation_pip011(t *testing.T) { + audit := makeAuditWithUserFile("/u/.config/pip/pip.conf", "0644", false, []model.PipSection{ + sec("global", kv("no-build-isolation", "true")), + }) + ids := findingsByID(evaluatePipFindings(audit)) + if ids["pip-011"] == 0 { + t.Errorf("expected pip-011, got %v", ids) + } +} + +func TestFindings_RequireHashesPositive_pip023(t *testing.T) { + audit := makeAuditWithUserFile("/u/.config/pip/pip.conf", "0644", false, []model.PipSection{ + sec("install", kv("require-hashes", "true"), kv("only-binary", ":all:")), + }) + findings := evaluatePipFindings(audit) + ids := findingsByID(findings) + if ids["pip-023"] == 0 { + t.Errorf("expected positive pip-023, got %v", ids) + } + if ids["pip-024"] == 0 { + t.Errorf("expected positive pip-024, got %v", ids) + } + for _, f := range findings { + if f.ID == "pip-023" || f.ID == "pip-024" { + if f.Severity != "INFO" { + t.Errorf("%s should be INFO, got %s", f.ID, f.Severity) + } + } + } +} + +func TestFindings_PipConfigFileDevNull_pip021(t *testing.T) { + audit := &model.PipAudit{ + EnvVars: []model.PipEnvVar{ + {Name: "PIP_CONFIG_FILE", Value: "/dev/null", Display: "/dev/null"}, + }, + } + ids := findingsByID(evaluatePipFindings(audit)) + if ids["pip-021"] == 0 { + t.Errorf("expected pip-021, got %v", ids) + } + if ids["pip-020"] != 0 { + t.Errorf("pip-020 should NOT fire when devnull is present (different rule path), got %v", ids) + } +} + +func TestFindings_PipConfigFileRedirected_pip020(t *testing.T) { + audit := &model.PipAudit{ + EnvVars: []model.PipEnvVar{ + {Name: "PIP_CONFIG_FILE", Value: "/tmp/attacker.conf", Display: "/tmp/attacker.conf"}, + }, + } + ids := findingsByID(evaluatePipFindings(audit)) + if ids["pip-020"] == 0 { + t.Errorf("expected pip-020, got %v", ids) + } +} + +func TestFindings_LegacyPath_pip019(t *testing.T) { + audit := &model.PipAudit{ + Files: []model.PipConfigFile{ + {Path: "/home/u/.pip/pip.conf", Layer: "user-legacy", Exists: true, Readable: true, Mode: "0644"}, + }, + } + ids := findingsByID(evaluatePipFindings(audit)) + if ids["pip-019"] == 0 { + t.Errorf("expected pip-019, got %v", ids) + } +} + +func TestFindings_FilePermissions_pip022(t *testing.T) { + // Plain mode > 0644 → MEDIUM, no creds. + a1 := &model.PipAudit{ + Files: []model.PipConfigFile{{ + Path: "/u/.config/pip/pip.conf", Layer: "user", Exists: true, Readable: true, Mode: "0666", + Sections: []model.PipSection{}, + }}, + } + for _, f := range evaluatePipFindings(a1) { + if f.ID == "pip-022" && f.Severity != "MEDIUM" { + t.Errorf("plain mode > 0644 should be MEDIUM, got %s", f.Severity) + } + } + + // Contains creds + mode > 0600 → HIGH. + a2 := makeAuditWithUserFile("/u/.config/pip/pip.conf", "0644", false, []model.PipSection{ + sec("global", kv("extra-index-url", "https://alice:secret"+at+"internal.example.com/simple")), + }) + for _, f := range evaluatePipFindings(a2) { + if f.ID == "pip-022" && f.Severity != "HIGH" { + t.Errorf("creds + mode 0644 should be HIGH, got %s", f.Severity) + } + } + + // Global + group/other writable → HIGH. + a3 := &model.PipAudit{ + Files: []model.PipConfigFile{{ + Path: "/etc/pip.conf", Layer: "global", Exists: true, Readable: true, Mode: "0666", + Sections: []model.PipSection{}, + }}, + } + sawHigh := false + for _, f := range evaluatePipFindings(a3) { + if f.ID == "pip-022" && f.Severity == "HIGH" { + sawHigh = true + } + } + if !sawHigh { + t.Errorf("global + world-writable should escalate to HIGH") + } +} + +func TestFindings_NetrcLoosePerms(t *testing.T) { + audit := &model.PipAudit{ + Netrc: &model.PipNetrcStatus{Path: "/u/.netrc", Exists: true, Mode: "0644"}, + } + sawNetrc := false + for _, f := range evaluatePipFindings(audit) { + if f.ID == "pip-netrc-perms" { + sawNetrc = true + if f.Severity != "MEDIUM" { + t.Errorf("netrc perms severity %q, want MEDIUM", f.Severity) + } + } + } + if !sawNetrc { + t.Errorf("expected pip-netrc-perms finding") + } +} + +func TestFindings_StableOrdering_severityFirst(t *testing.T) { + audit := makeAuditWithUserFile("/u/.config/pip/pip.conf", "0644", false, []model.PipSection{ + sec("global", + kv("pre", "true"), // LOW + kv("extra-index-url", "https://alice:secret"+at+"internal.example.com/simple"), // CRITICAL + kv("trusted-host", "x.example.com"), // HIGH + ), + }) + findings := evaluatePipFindings(audit) + if findings[0].Severity != "CRITICAL" { + t.Errorf("expected first finding to be CRITICAL, got %+v", findings[0]) + } + last := findings[len(findings)-1] + if last.Severity != "LOW" && last.Severity != "INFO" { + t.Errorf("expected last finding to be LOW/INFO, got %+v", last) + } +} + +func TestParseTruthy(t *testing.T) { + for _, tt := range []struct { + in string + want bool + }{ + {"true", true}, {"TRUE", true}, {"yes", true}, {"1", true}, {"on", true}, + {"false", false}, {"no", false}, {"0", false}, {"", false}, {" ", false}, + } { + if got := parseTruthy(tt.in); got != tt.want { + t.Errorf("parseTruthy(%q) = %v, want %v", tt.in, got, tt.want) + } + } +} + +func TestParseModeOctal(t *testing.T) { + cases := []struct { + in string + want uint32 + ok bool + }{ + {"0644", 0o644, true}, + {"0o644", 0o644, true}, + {"600", 0o600, true}, + {"abc", 0, false}, + {"", 0, false}, + } + for _, c := range cases { + got, ok := parseModeOctal(c.in) + if ok != c.ok || got != c.want { + t.Errorf("parseModeOctal(%q) = (%v, %v), want (%v, %v)", c.in, got, ok, c.want, c.ok) + } + } +} diff --git a/internal/detector/pipconfig_helpers.go b/internal/detector/pipconfig_helpers.go new file mode 100644 index 0000000..a0dae65 --- /dev/null +++ b/internal/detector/pipconfig_helpers.go @@ -0,0 +1,116 @@ +package detector + +import ( + "crypto/sha256" + "encoding/hex" + "net/url" + "regexp" + "strings" +) + +// urlSchemePattern matches the leading scheme of a URL-shaped string. We +// use it as a quick gate before paying for net/url.Parse. +var urlSchemePattern = regexp.MustCompile(`^[a-zA-Z][a-zA-Z0-9+\-.]*://`) + +// looksLikeURL returns true if the string starts with a URL scheme. +func looksLikeURL(s string) bool { + return urlSchemePattern.MatchString(strings.TrimSpace(s)) +} + +// urlHasEmbeddedCreds reports whether the value contains a URL with +// `user[:pass]@host` syntax. Returns the parsed URL when true so callers +// can inspect scheme and host without re-parsing. +func urlHasEmbeddedCreds(s string) (*url.URL, bool) { + if !looksLikeURL(s) { + return nil, false + } + u, err := url.Parse(strings.TrimSpace(s)) + if err != nil || u == nil { + return nil, false + } + if u.User == nil { + return nil, false + } + return u, true +} + +// urlIsHTTP returns true when the URL scheme is plain http (not https, +// not file, not anything else). Used for pip-002, pip-006, pip-008. +func urlIsHTTP(s string) bool { + if !looksLikeURL(s) { + return false + } + u, err := url.Parse(strings.TrimSpace(s)) + if err != nil || u == nil { + return false + } + return strings.EqualFold(u.Scheme, "http") +} + +// redactCredsInValue returns a safe-to-display copy of a value with any +// embedded URL credentials reduced to `user:****@host` form. Non-URL +// inputs (and URLs without credentials) are returned unchanged. +// +// We avoid url.URL.String() for the rewritten URL because it +// percent-encodes the asterisks back to %2A. Since we already validated +// the structure via net/url, we can splice the original string between +// the scheme and the `@` and recompose the credential portion ourselves. +func redactCredsInValue(s string) string { + u, has := urlHasEmbeddedCreds(s) + if !has { + return s + } + trimmed := strings.TrimSpace(s) + // Find the `://` and the `@` that terminates userinfo. A URL with + // userinfo always has the form "scheme://userinfo@host[/path]". + schemeIdx := strings.Index(trimmed, "://") + if schemeIdx < 0 { + return s + } + rest := trimmed[schemeIdx+3:] + atIdx := strings.IndexByte(rest, '@') + if atIdx < 0 { + return s + } + prefix := trimmed[:schemeIdx+3] + tail := rest[atIdx:] // includes the '@' + username := u.User.Username() + if _, hasPassword := u.User.Password(); hasPassword { + return prefix + username + ":****" + tail + } + return prefix + "****" + tail +} + +// hashCredential returns a stable short identifier for a credential value +// — used to recognize "the same credential" across runs without ever +// storing or re-emitting the plaintext. We hash the full raw value (incl. +// any URL prefix) and return the first 12 hex chars; that's collision- +// resistant enough for de-duplication and short enough to display. +func hashCredential(s string) string { + if s == "" { + return "" + } + sum := sha256.Sum256([]byte(s)) + return hex.EncodeToString(sum[:])[:12] +} + +// pipEnvNameForKey converts a pip config key (e.g. "no-build-isolation") to +// its environment-variable form (PIP_NO_BUILD_ISOLATION). Used when we +// need to check whether a key set in a file has also been overridden by +// an env var. +func pipEnvNameForKey(key string) string { + upper := strings.ToUpper(key) + upper = strings.ReplaceAll(upper, "-", "_") + return "PIP_" + upper +} + +// pipKeyForEnvName is the inverse of pipEnvNameForKey: PIP_INDEX_URL → +// "index-url". Returns the lower-cased, hyphenated key without the PIP_ +// prefix. If the input doesn't start with PIP_, returns the input unchanged. +func pipKeyForEnvName(env string) string { + if !strings.HasPrefix(env, "PIP_") { + return env + } + tail := strings.ToLower(env[4:]) + return strings.ReplaceAll(tail, "_", "-") +} diff --git a/internal/detector/pipconfig_parse.go b/internal/detector/pipconfig_parse.go new file mode 100644 index 0000000..803b184 --- /dev/null +++ b/internal/detector/pipconfig_parse.go @@ -0,0 +1,153 @@ +package detector + +import ( + "bufio" + "bytes" + "strings" + + "github.com/step-security/dev-machine-guard/internal/model" +) + +// parsePipConfig parses a pip config file into ordered sections. The format +// matches Python's RawConfigParser semantics: +// +// - `[section]` headers introduce a new section. Entries before any +// section header land in an implicit "" (empty-name) section, which +// matches RawConfigParser's "DEFAULT"-but-we-store-empty convention +// (we don't actually use DEFAULT semantics; pip never relies on them). +// - `key = value` or `key=value` is a single-value entry. +// - A line whose key is followed by `=` and an empty value, then one or +// more INDENTED lines, accumulates the indented lines as the multi- +// value list. This matches pip's documented multi-value format for +// repeatable options like `find-links` and `trusted-host`. +// - Comments: a line whose first non-whitespace character is `;` or `#` +// is a comment. Pip's parser does NOT recognize inline comments +// (matching RawConfigParser), so `key = value ; this is part of the value` +// keeps `; this is part of the value` in the value. +// - Blank lines are skipped. +// - Pip does NOT interpolate `${VAR}` references. Literal `${VAR}` in a +// value stays a literal — we propagate it verbatim. +// +// Returns the ordered list of sections. Malformed lines are skipped (we +// prefer to surface partial data rather than fail the whole audit on a +// stray byte). +func parsePipConfig(data []byte) []model.PipSection { + // Strip BOM if present. + data = bytes.TrimPrefix(data, []byte{0xEF, 0xBB, 0xBF}) + + scanner := bufio.NewScanner(bytes.NewReader(data)) + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + + var sections []model.PipSection + curSection := -1 // index into sections; -1 means no active section yet + curEntry := -1 // index into sections[curSection].Entries; -1 means no continuation target + pendingMulti := false + + openSection := func(name string, lineNum int) { + sections = append(sections, model.PipSection{Name: name, LineNum: lineNum}) + curSection = len(sections) - 1 + curEntry = -1 + pendingMulti = false + } + + // Ensure we always have a section to attach entries to. Pip files almost + // always start with [global], but the spec doesn't require it; honor + // pre-section entries by attaching them to an unnamed section. + ensureSection := func(lineNum int) { + if curSection < 0 { + openSection("", lineNum) + } + } + + lineNum := 0 + for scanner.Scan() { + lineNum++ + raw := scanner.Text() + // Detect indented continuation BEFORE trimming. RawConfigParser + // treats any leading whitespace as a continuation when there's a + // prior key with an empty value awaiting values. + indented := len(raw) > 0 && (raw[0] == ' ' || raw[0] == '\t') + trimmed := strings.TrimSpace(raw) + if trimmed == "" { + pendingMulti = false + continue + } + // Comment line. + if trimmed[0] == ';' || trimmed[0] == '#' { + pendingMulti = false + continue + } + // Section header. + if strings.HasPrefix(trimmed, "[") && strings.HasSuffix(trimmed, "]") && len(trimmed) >= 2 { + name := strings.TrimSpace(trimmed[1 : len(trimmed)-1]) + openSection(name, lineNum) + continue + } + // Continuation line — attach to most recent entry's value list. + if indented && pendingMulti && curSection >= 0 && curEntry >= 0 { + sections[curSection].Entries[curEntry].Values = append( + sections[curSection].Entries[curEntry].Values, trimmed, + ) + continue + } + // New key/value line. + eq := strings.IndexByte(trimmed, '=') + if eq < 0 { + // Could be `key:` style (configparser supports `:` too). Try + // that as a fallback. + eq = strings.IndexByte(trimmed, ':') + if eq < 0 { + pendingMulti = false + continue + } + } + key := strings.TrimSpace(trimmed[:eq]) + value := strings.TrimSpace(trimmed[eq+1:]) + if key == "" { + pendingMulti = false + continue + } + + ensureSection(lineNum) + entry := model.PipKeyValue{Key: key, LineNum: lineNum} + if value == "" { + // Empty inline value — likely the multi-value continuation form. + // Leave Values empty for now; subsequent indented lines will + // append. + entry.Values = nil + pendingMulti = true + } else { + entry.Values = []string{value} + pendingMulti = false + } + sections[curSection].Entries = append(sections[curSection].Entries, entry) + curEntry = len(sections[curSection].Entries) - 1 + } + + // Post-pass: build Display strings (used by the verbose renderer for a + // compact one-line view of multi-values). + for s := range sections { + for e := range sections[s].Entries { + ent := §ions[s].Entries[e] + ent.Display = renderPipDisplay(ent.Key, ent.Values) + } + } + + return sections +} + +// renderPipDisplay returns a safe-for-display string for a parsed pip +// entry. Multi-values become "v1, v2". URLs with embedded credentials are +// redacted to "user:****@host" form so the parsed view never leaks +// secrets even in pretty/HTML output. (The findings engine re-runs the +// same redaction; doing it here keeps the static view honest too.) +func renderPipDisplay(_ string, values []string) string { + if len(values) == 0 { + return "" + } + parts := make([]string, len(values)) + for i, v := range values { + parts[i] = redactCredsInValue(v) + } + return strings.Join(parts, ", ") +} diff --git a/internal/detector/pipconfig_parse_test.go b/internal/detector/pipconfig_parse_test.go new file mode 100644 index 0000000..78bfd59 --- /dev/null +++ b/internal/detector/pipconfig_parse_test.go @@ -0,0 +1,173 @@ +package detector + +import ( + "strings" + "testing" +) + +func TestParsePipConfig_BasicSections(t *testing.T) { + in := `[global] +index-url = https://pypi.org/simple +timeout = 60 + +[install] +no-build-isolation = true +` + secs := parsePipConfig([]byte(in)) + if len(secs) != 2 { + t.Fatalf("expected 2 sections, got %d: %+v", len(secs), secs) + } + if secs[0].Name != "global" || secs[1].Name != "install" { + t.Errorf("section names wrong: %+v", []string{secs[0].Name, secs[1].Name}) + } + if len(secs[0].Entries) != 2 { + t.Errorf("expected 2 entries in [global], got %+v", secs[0].Entries) + } + // Spot-check a value. + for _, e := range secs[0].Entries { + if e.Key == "index-url" { + if len(e.Values) != 1 || e.Values[0] != "https://pypi.org/simple" { + t.Errorf("index-url value wrong: %+v", e) + } + } + } +} + +func TestParsePipConfig_MultiValue(t *testing.T) { + in := `[global] +trusted-host = + a.example.com + b.example.com + c.example.com +find-links = + https://mirror1.example.com + https://mirror2.example.com +` + secs := parsePipConfig([]byte(in)) + if len(secs) != 1 || len(secs[0].Entries) != 2 { + t.Fatalf("unexpected structure: %+v", secs) + } + for _, e := range secs[0].Entries { + switch e.Key { + case "trusted-host": + if len(e.Values) != 3 { + t.Errorf("trusted-host: want 3 values, got %v", e.Values) + } + if e.Values[0] != "a.example.com" || e.Values[2] != "c.example.com" { + t.Errorf("trusted-host values wrong: %v", e.Values) + } + case "find-links": + if len(e.Values) != 2 { + t.Errorf("find-links: want 2 values, got %v", e.Values) + } + } + } +} + +func TestParsePipConfig_CommentsBoth(t *testing.T) { + in := `; comment 1 +[global] +# comment 2 +index-url = https://pypi.org/simple +` + secs := parsePipConfig([]byte(in)) + if len(secs) != 1 || len(secs[0].Entries) != 1 { + t.Fatalf("comments not skipped correctly: %+v", secs) + } +} + +func TestParsePipConfig_NoEnvInterpolation(t *testing.T) { + // pip does NOT expand ${VAR}. Our parser must propagate it verbatim. + in := `[global] +index-url = https://${INTERNAL_INDEX_HOST}/simple +` + secs := parsePipConfig([]byte(in)) + if len(secs) != 1 || len(secs[0].Entries) != 1 { + t.Fatalf("structure: %+v", secs) + } + if !strings.Contains(secs[0].Entries[0].Values[0], "${INTERNAL_INDEX_HOST}") { + t.Errorf("env-ref form not preserved: %v", secs[0].Entries[0]) + } +} + +func TestParsePipConfig_ColonSeparator(t *testing.T) { + // configparser supports `key: value` as well as `key = value`. + in := `[global] +index-url: https://pypi.org/simple +` + secs := parsePipConfig([]byte(in)) + if len(secs) != 1 || len(secs[0].Entries) != 1 || secs[0].Entries[0].Values[0] != "https://pypi.org/simple" { + t.Fatalf("colon separator not handled: %+v", secs) + } +} + +func TestParsePipConfig_BOMStrippedAndDisplayRedacts(t *testing.T) { + // NB: editor tooling auto-rewrites the literal string "user@host" forms + // into markdown email-protection links. Build the input via fmt to + // dodge that. + const at = "@" + in := "\xEF\xBB\xBF[global]\nextra-index-url = https://user:secret" + at + "internal.example.com/simple\n" + secs := parsePipConfig([]byte(in)) + if len(secs) != 1 || len(secs[0].Entries) != 1 { + t.Fatalf("BOM/sections wrong: %+v", secs) + } + d := secs[0].Entries[0].Display + if strings.Contains(d, "secret") { + t.Errorf("Display leaks credential: %q", d) + } + if !strings.Contains(d, "user:****"+at+"internal.example.com") { + t.Errorf("Display should redact to user:****%shost form, got %q", at, d) + } +} + +func TestRedactCredsInValue(t *testing.T) { + const at = "@" + cases := []struct{ in, want string }{ + {"https://pypi.org/simple", "https://pypi.org/simple"}, + {"https://alice" + at + "internal.example.com/simple", "https://****" + at + "internal.example.com/simple"}, + {"https://alice:secret" + at + "internal.example.com/simple", "https://alice:****" + at + "internal.example.com/simple"}, + {"http://__token__:pypi-AgEI..." + at + "upload.pypi.org/", "http://__token__:****" + at + "upload.pypi.org/"}, + {"not-a-url", "not-a-url"}, + } + for _, c := range cases { + got := redactCredsInValue(c.in) + if got != c.want { + t.Errorf("redactCredsInValue(%q) = %q, want %q", c.in, got, c.want) + } + } +} + +func TestUrlIsHTTP(t *testing.T) { + cases := map[string]bool{ + "http://example.com": true, + "https://example.com": false, + "file:///etc/x": false, + "not-a-url": false, + "HTTP://EXAMPLE.COM": true, // case-insensitive scheme + } + for in, want := range cases { + if got := urlIsHTTP(in); got != want { + t.Errorf("urlIsHTTP(%q) = %v, want %v", in, got, want) + } + } +} + +func TestPipKeyEnvNameRoundTrip(t *testing.T) { + cases := []struct{ key, env string }{ + {"index-url", "PIP_INDEX_URL"}, + {"no-build-isolation", "PIP_NO_BUILD_ISOLATION"}, + {"trusted-host", "PIP_TRUSTED_HOST"}, + } + for _, c := range cases { + if got := pipEnvNameForKey(c.key); got != c.env { + t.Errorf("pipEnvNameForKey(%q) = %q, want %q", c.key, got, c.env) + } + if got := pipKeyForEnvName(c.env); got != c.key { + t.Errorf("pipKeyForEnvName(%q) = %q, want %q", c.env, got, c.key) + } + } + // Non-PIP_ env name returns unchanged. + if got := pipKeyForEnvName("HTTP_PROXY"); got != "HTTP_PROXY" { + t.Errorf("non-PIP_ name should round-trip unchanged, got %q", got) + } +} diff --git a/internal/detector/pipconfig_stat_unix.go b/internal/detector/pipconfig_stat_unix.go new file mode 100644 index 0000000..4b95618 --- /dev/null +++ b/internal/detector/pipconfig_stat_unix.go @@ -0,0 +1,37 @@ +//go:build !windows + +package detector + +import ( + "os" + "os/user" + "strconv" + "syscall" +) + +// pipStatOwner returns the file owner via syscall.Stat_t. We bypass the +// Executor here because uid/gid are exposed only through Sys() of an +// os.FileInfo — the mock executor's mockFileInfo can't represent that. +// Tests substitute their own ownerLookup hook so they never reach this. +func pipStatOwner(path string) pipOwnerInfo { + info, err := os.Stat(path) + if err != nil { + return pipOwnerInfo{} + } + st, ok := info.Sys().(*syscall.Stat_t) + if !ok { + return pipOwnerInfo{} + } + oi := pipOwnerInfo{ + UID: int(st.Uid), + GID: int(st.Gid), + OK: true, + } + if u, err := user.LookupId(strconv.Itoa(oi.UID)); err == nil { + oi.OwnerName = u.Username + } + if g, err := user.LookupGroupId(strconv.Itoa(oi.GID)); err == nil { + oi.GroupName = g.Name + } + return oi +} diff --git a/internal/detector/pipconfig_stat_windows.go b/internal/detector/pipconfig_stat_windows.go new file mode 100644 index 0000000..d6d7edd --- /dev/null +++ b/internal/detector/pipconfig_stat_windows.go @@ -0,0 +1,10 @@ +//go:build windows + +package detector + +// pipStatOwner is a no-op on Windows: SID-to-username resolution is +// non-trivial and not actionable for v1 of this audit. The detector +// handles ok=false by leaving owner fields empty. +func pipStatOwner(_ string) pipOwnerInfo { + return pipOwnerInfo{} +} diff --git a/internal/detector/pipconfig_test.go b/internal/detector/pipconfig_test.go new file mode 100644 index 0000000..483f31c --- /dev/null +++ b/internal/detector/pipconfig_test.go @@ -0,0 +1,246 @@ +package detector + +import ( + "context" + "os" + "os/user" + "path/filepath" + "strings" + "testing" + + "github.com/step-security/dev-machine-guard/internal/executor" +) + +// fixedPipOwner is a deterministic ownerLookup hook so tests don't depend +// on real syscalls or platform-specific user names. +func fixedPipOwner() func(string) pipOwnerInfo { + return func(_ string) pipOwnerInfo { + return pipOwnerInfo{UID: 1000, GID: 1000, OwnerName: "tester", GroupName: "staff", OK: true} + } +} + +func mustWritePipFile(t *testing.T, path, content string) { + t.Helper() + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatalf("write: %v", err) + } +} + +func TestDetect_DiscoveryViaPipDebug(t *testing.T) { + tmp := t.TempDir() + userPath := filepath.Join(tmp, "home", ".config", "pip", "pip.conf") + mustWritePipFile(t, userPath, "[global]\nindex-url = https://pypi.org/simple\n") + + mock := executor.NewMock() + mock.SetPath("pip", "/usr/bin/pip") + mock.SetCommand("pip 24.0 from /usr/bin (python 3.12)\n", "", 0, "pip", "--version") + // `pip config debug` output mimicking the real format. + debug := `env_var: +env: +global: + /etc/xdg/pip/pip.conf, exists: False + /etc/pip.conf, exists: False +user: + ` + userPath + `, exists: True + index-url: https://pypi.org/simple +site: +` + mock.SetCommand(debug, "", 0, "pip", "config", "debug") + // Effective view (we don't rely on the body for this test). + mock.SetCommand("", "", 0, "pip", "config", "list", "-v") + mock.SetHomeDir(filepath.Join(tmp, "home")) + + d := NewPipConfigDetector(mock) + d.ownerLookup = fixedPipOwner() + d.gitTracked = func(_ context.Context, _ string) bool { return false } + d.inGitRepo = func(_ string) bool { return false } + + loggedIn := &user.User{Username: "tester", HomeDir: filepath.Join(tmp, "home")} + audit := d.Detect(context.Background(), loggedIn) + + if !audit.Available { + t.Errorf("expected pip available") + } + if audit.Version != "24.0" { + t.Errorf("pip version = %q, want 24.0", audit.Version) + } + + // Should have at least one user-scope file pointing at our temp path. + var sawUser bool + for _, f := range audit.Files { + if f.Path == userPath && f.Layer == "user" { + sawUser = true + if !f.Exists || !f.Readable { + t.Errorf("user file should exist+read: %+v", f) + } + if len(f.Sections) == 0 || f.Sections[0].Name != "global" { + t.Errorf("expected [global] section: %+v", f.Sections) + } + } + } + if !sawUser { + t.Errorf("user-scope file not surfaced; got files=%+v", audit.Files) + } +} + +func TestDetect_PipMissingDoesntCrash(t *testing.T) { + tmp := t.TempDir() + userPath := filepath.Join(tmp, "home", ".config", "pip", "pip.conf") + mustWritePipFile(t, userPath, "[global]\nextra-index-url = http://internal.example.com/simple\n") + + mock := executor.NewMock() + // No SetPath for pip — LookPath fails. + mock.SetHomeDir(filepath.Join(tmp, "home")) + + d := NewPipConfigDetector(mock) + d.ownerLookup = fixedPipOwner() + d.gitTracked = func(_ context.Context, _ string) bool { return false } + d.inGitRepo = func(_ string) bool { return false } + + loggedIn := &user.User{Username: "tester", HomeDir: filepath.Join(tmp, "home")} + audit := d.Detect(context.Background(), loggedIn) + + if audit.Available { + t.Errorf("pip should be marked unavailable") + } + if audit.Effective != nil { + t.Errorf("effective view should be nil when pip missing, got %+v", audit.Effective) + } + // We should still discover the user file via path enumeration. + var sawFile bool + for _, f := range audit.Files { + if f.Path == userPath && f.Exists { + sawFile = true + } + } + if !sawFile { + t.Errorf("file enumeration should still find the user file when pip is missing; got %+v", audit.Files) + } + // And the http:// extra-index-url should still produce findings. + var sawHTTP bool + for _, fnd := range audit.Findings { + if fnd.ID == "pip-002" { + sawHTTP = true + } + } + if !sawHTTP { + t.Errorf("pip-002 should fire even without pip installed; got findings=%+v", audit.Findings) + } +} + +func TestDetect_VirtualEnvAddedAsSite(t *testing.T) { + tmp := t.TempDir() + venvPath := filepath.Join(tmp, "venv") + pipConf := filepath.Join(venvPath, "pip.conf") + mustWritePipFile(t, pipConf, "[global]\nrequire-hashes = true\n") + + mock := executor.NewMock() + mock.SetEnv("VIRTUAL_ENV", venvPath) + // No pip on PATH — keep this test focused on env-driven discovery. + mock.SetHomeDir(filepath.Join(tmp, "home")) + + d := NewPipConfigDetector(mock) + d.ownerLookup = fixedPipOwner() + d.gitTracked = func(_ context.Context, _ string) bool { return false } + d.inGitRepo = func(_ string) bool { return false } + + audit := d.Detect(context.Background(), &user.User{Username: "tester", HomeDir: filepath.Join(tmp, "home")}) + + var sawSite bool + for _, f := range audit.Files { + if f.Path == pipConf && f.Layer == "site" { + sawSite = true + } + } + if !sawSite { + t.Errorf("VIRTUAL_ENV pip.conf should be added as site layer; got files=%+v", audit.Files) + } +} + +func TestDetect_PipConfigFileEnvHonored(t *testing.T) { + tmp := t.TempDir() + customPath := filepath.Join(tmp, "custom-pip.conf") + mustWritePipFile(t, customPath, "[global]\nno-build-isolation = true\n") + + mock := executor.NewMock() + mock.SetEnv("PIP_CONFIG_FILE", customPath) + mock.SetHomeDir(filepath.Join(tmp, "home")) + + d := NewPipConfigDetector(mock) + d.ownerLookup = fixedPipOwner() + d.gitTracked = func(_ context.Context, _ string) bool { return false } + d.inGitRepo = func(_ string) bool { return false } + + audit := d.Detect(context.Background(), &user.User{Username: "tester", HomeDir: filepath.Join(tmp, "home")}) + + var sawCustom bool + for _, f := range audit.Files { + if f.Path == customPath && f.Layer == "PIP_CONFIG_FILE" { + sawCustom = true + } + } + if !sawCustom { + t.Errorf("PIP_CONFIG_FILE override not honored; got %+v", audit.Files) + } + // Should also produce a pip-020 redirection finding (custom path). + var sawRedirect bool + for _, fnd := range audit.Findings { + if fnd.ID == "pip-020" { + sawRedirect = true + } + } + if !sawRedirect { + t.Errorf("pip-020 should fire for non-standard PIP_CONFIG_FILE; got %+v", audit.Findings) + } +} + +func TestParseEffectiveOutput(t *testing.T) { + // Direct test of the effective-view text parser via a hand-crafted output. + mock := executor.NewMock() + mock.SetPath("pip", "/usr/bin/pip") + mock.SetCommand("pip 24.0\n", "", 0, "pip", "--version") + mock.SetCommand("", "", 0, "pip", "config", "debug") + out := `For variant 'global', will try loading '/etc/pip.conf'. +For variant 'user', will try loading '/u/.config/pip/pip.conf'. +global.index-url='https://internal.example.com/simple' from /u/.config/pip/pip.conf +install.no-build-isolation='true' from PIP_NO_BUILD_ISOLATION +` + mock.SetCommand(out, "", 0, "pip", "config", "list", "-v") + + d := NewPipConfigDetector(mock) + d.ownerLookup = fixedPipOwner() + d.gitTracked = func(_ context.Context, _ string) bool { return false } + d.inGitRepo = func(_ string) bool { return false } + + audit := d.Detect(context.Background(), nil) + if audit.Effective == nil { + t.Fatal("effective view nil") + } + if got := audit.Effective.Config["global.index-url"]; got != "https://internal.example.com/simple" { + t.Errorf("merged config wrong for global.index-url: %q", got) + } + if got := audit.Effective.SourceByKey["global.index-url"]; got != "/u/.config/pip/pip.conf" { + t.Errorf("source wrong for global.index-url: %q", got) + } + if got := audit.Effective.SourceByKey["install.no-build-isolation"]; got != "PIP_NO_BUILD_ISOLATION" { + t.Errorf("env-var source wrong: %q", got) + } + // Sanity: also produced a positive informational finding for index-url + // being non-default. + var sawInfo bool + for _, f := range audit.Findings { + if f.ID == "pip-015" { + sawInfo = true + } + } + if !sawInfo && strings.Contains(out, "internal.example.com") { + // pip-015 only fires when the merged value lands in a *parsed file*, + // not from the effective output. Our test doesn't ship a parsed + // file with that value, so absence is correct. Keeping the check + // commented in spirit so the next reader knows why. + _ = sawInfo + } +} diff --git a/internal/model/model.go b/internal/model/model.go index e04680d..0f22233 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -27,6 +27,7 @@ type ScanResult struct { FlatpakPkgManager *PkgManager `json:"flatpak_package_manager,omitempty"` FlatpakPackages []SystemPackage `json:"flatpak_packages"` NPMRCAudit *NPMRCAudit `json:"npmrc_audit,omitempty"` + PipAudit *PipAudit `json:"pip_audit,omitempty"` Summary Summary `json:"summary"` } @@ -309,3 +310,100 @@ type NPMRCEnvVar struct { ValueSHA256 string `json:"value_sha256,omitempty"` } +// --- pip configuration audit ------------------------------------------------- +// +// Mirrors NPMRCAudit but reflects pip-specific realities: real INI +// sections, no env-var interpolation, and a fixed finding catalog +// (pip-001 .. pip-024) instead of free-form classification. + +// PipAudit is the top-level pip audit object. +type PipAudit struct { + Available bool `json:"pip_available"` + Invocation string `json:"pip_invocation,omitempty"` // "pip" | "pip3" | "python3 -m pip" + Version string `json:"pip_version,omitempty"` + Path string `json:"pip_path,omitempty"` + Files []PipConfigFile `json:"files"` + EnvVars []PipEnvVar `json:"env_vars"` + Effective *PipEffective `json:"effective,omitempty"` + Netrc *PipNetrcStatus `json:"netrc,omitempty"` + Findings []PipFinding `json:"findings"` + DiscoveryError string `json:"discovery_error,omitempty"` +} + +// PipConfigFile is one pip.conf / pip.ini discovered on disk. Layer is the +// precedence layer pip itself assigns. +type PipConfigFile struct { + Path string `json:"path"` + Layer string `json:"layer"` // global | user | user-legacy | site | PIP_CONFIG_FILE + Exists bool `json:"exists"` + Readable bool `json:"readable"` + SizeBytes int64 `json:"size_bytes,omitempty"` + ModTimeUnix int64 `json:"mtime_unix,omitempty"` + Mode string `json:"mode,omitempty"` + OwnerName string `json:"owner_name,omitempty"` + GroupName string `json:"group_name,omitempty"` + SHA256 string `json:"sha256,omitempty"` + InGitRepo bool `json:"in_git_repo,omitempty"` + GitTracked bool `json:"git_tracked,omitempty"` + Sections []PipSection `json:"sections,omitempty"` + ParseError string `json:"parse_error,omitempty"` +} + +// PipSection is one [section] block in a pip config file. +type PipSection struct { + Name string `json:"name"` // "global", "install", "freeze", "wheel", "list", "hash", ... + LineNum int `json:"line_num"` + Entries []PipKeyValue `json:"entries"` +} + +// PipKeyValue is a single key/value (or key/multi-value) entry inside a +// section. Repeatable options surface as multiple Values. +type PipKeyValue struct { + Key string `json:"key"` + Values []string `json:"values"` // len()>=1; each value is a separate logical entry + Display string `json:"display,omitempty"` // human-readable single-line rendering, with creds redacted + LineNum int `json:"line_num"` +} + +// PipEnvVar captures one PIP_* environment variable. Display is the +// finding-grade safe-to-print form (creds redacted in URLs). +type PipEnvVar struct { + Name string `json:"name"` + Value string `json:"-"` // raw; never serialized + Display string `json:"display"` + SHA256 string `json:"sha256,omitempty"` +} + +// PipEffective is the merged-config view from `pip config list -v`. The +// SourceByKey map keys are "
." to disambiguate the same key +// appearing in multiple sections. +type PipEffective struct { + SourceByKey map[string]string `json:"source_by_key,omitempty"` + Config map[string]string `json:"config,omitempty"` + Error string `json:"error,omitempty"` +} + +// PipFinding is one detection from the rule catalog (pip-001 .. pip-024). +// ValueShown is always pre-redacted; the raw value never leaves the +// detector. +type PipFinding struct { + ID string `json:"id"` // "pip-001" etc. + Severity string `json:"severity"` // CRITICAL | HIGH | MEDIUM | LOW | INFO + Category string `json:"category"` + Source string `json:"source"` // file path or env var name + Section string `json:"section,omitempty"` // "global" / "install" / "" for env vars + Key string `json:"key,omitempty"` + ValueShown string `json:"value_shown,omitempty"` + Detail string `json:"detail"` + Remediation string `json:"remediation,omitempty"` +} + +// PipNetrcStatus is informational: pip falls back to ~/.netrc for +// credentials, so its presence + permissions matter even though we don't +// parse the contents (.netrc is shared with curl/wget/twine/etc.; auditing +// its content is a separate concern). +type PipNetrcStatus struct { + Path string `json:"path"` + Exists bool `json:"exists"` + Mode string `json:"mode,omitempty"` // empty on Windows +} diff --git a/internal/output/pipconfig_verbose.go b/internal/output/pipconfig_verbose.go new file mode 100644 index 0000000..2d37399 --- /dev/null +++ b/internal/output/pipconfig_verbose.go @@ -0,0 +1,244 @@ +package output + +import ( + "fmt" + "io" + "sort" + "strings" + + "github.com/step-security/dev-machine-guard/internal/model" +) + +// PrettyPipConfig renders a pip-only audit in a verbose, terminal-friendly +// format. Behind the `--pipconfig` flag. +// +//nolint:errcheck // terminal output +func PrettyPipConfig(w io.Writer, audit *model.PipAudit, dev model.Device, colorMode string) { + c := setupColors(colorMode) + + hr := strings.Repeat("─", 76) + fmt.Fprintf(w, "%s%s%s\n", c.purple, hr, c.reset) + fmt.Fprintf(w, "%s%s PIP CONFIG AUDIT %s\n", c.purple, c.bold, c.reset) + fmt.Fprintf(w, "%s%s%s\n", c.purple, hr, c.reset) + fmt.Fprintf(w, " host: %s%s%s user: %s%s%s platform: %s\n", + c.bold, dev.Hostname, c.reset, c.bold, dev.UserIdentity, c.reset, dev.Platform) + if audit.Available { + fmt.Fprintf(w, " pip: %s%s%s @ %s (%s)\n", c.green, audit.Version, c.reset, audit.Path, audit.Invocation) + } else { + fmt.Fprintf(w, " pip: %s(not found in PATH — file-only audit)%s\n", c.dim, c.reset) + } + if audit.DiscoveryError != "" { + fmt.Fprintf(w, " %swarn: %s%s\n", c.dim, audit.DiscoveryError, c.reset) + } + fmt.Fprintln(w) + + // --- findings (most important; show first) --- + printPipFindings(w, c, audit.Findings) + + // --- discovered files --- + printPipFiles(w, c, audit.Files) + + // --- effective merged view --- + if audit.Effective != nil { + printPipEffective(w, c, audit.Effective) + } + + // --- env vars (only shows ones that are set) --- + printPipEnvVars(w, c, audit.EnvVars) + + // --- netrc status --- + printPipNetrc(w, c, audit.Netrc) +} + +//nolint:errcheck // terminal output +func printPipFindings(w io.Writer, c *colors, findings []model.PipFinding) { + counts := map[string]int{} + for _, f := range findings { + counts[f.Severity]++ + } + fmt.Fprintf(w, "%s%s┌── FINDINGS%s %sCRITICAL %d HIGH %d MEDIUM %d LOW %d INFO %d%s\n", + c.purple, c.bold, c.reset, + c.dim, counts["CRITICAL"], counts["HIGH"], counts["MEDIUM"], counts["LOW"], counts["INFO"], c.reset) + if len(findings) == 0 { + fmt.Fprintf(w, "│ %sno findings — pip configuration looks clean%s\n", c.dim, c.reset) + fmt.Fprintln(w) + return + } + for _, f := range findings { + printPipFinding(w, c, f) + } + fmt.Fprintln(w) +} + +//nolint:errcheck // terminal output +func printPipFinding(w io.Writer, c *colors, f model.PipFinding) { + sevColor := c.dim + switch f.Severity { + case "CRITICAL": + sevColor = c.bold + c.purple + case "HIGH": + sevColor = c.bold + case "MEDIUM": + sevColor = c.purple + case "LOW": + sevColor = c.dim + case "INFO": + sevColor = c.green + } + source := f.Source + if f.Section != "" { + source = fmt.Sprintf("%s [%s]", source, f.Section) + } + fmt.Fprintf(w, "│ %s%-8s%s %s%s%s %s%s%s\n", + sevColor, f.Severity, c.reset, c.bold, f.ID, c.reset, c.dim, f.Category, c.reset) + fmt.Fprintf(w, "│ %ssource:%s %s\n", c.dim, c.reset, source) + if f.Key != "" { + fmt.Fprintf(w, "│ %skey:%s %s\n", c.dim, c.reset, f.Key) + } + if f.ValueShown != "" { + fmt.Fprintf(w, "│ %svalue:%s %s\n", c.dim, c.reset, f.ValueShown) + } + if f.Detail != "" { + fmt.Fprintf(w, "│ %sdetail:%s %s\n", c.dim, c.reset, f.Detail) + } + if f.Remediation != "" { + fmt.Fprintf(w, "│ %sfix:%s %s\n", c.dim, c.reset, f.Remediation) + } + fmt.Fprintln(w, "│") +} + +//nolint:errcheck // terminal output +func printPipFiles(w io.Writer, c *colors, files []model.PipConfigFile) { + fmt.Fprintf(w, "%s%s┌── DISCOVERED CONFIG FILES (%d)%s\n", c.purple, c.bold, len(files), c.reset) + if len(files) == 0 { + fmt.Fprintf(w, "│ %sno pip config files at any scope%s\n", c.dim, c.reset) + fmt.Fprintln(w) + return + } + // Stable order: layer rank → path. + sorted := append([]model.PipConfigFile(nil), files...) + sort.SliceStable(sorted, func(i, j int) bool { + if pipLayerRank(sorted[i].Layer) != pipLayerRank(sorted[j].Layer) { + return pipLayerRank(sorted[i].Layer) < pipLayerRank(sorted[j].Layer) + } + return sorted[i].Path < sorted[j].Path + }) + for _, f := range sorted { + printPipFile(w, c, f) + } + fmt.Fprintln(w) +} + +func pipLayerRank(l string) int { + switch l { + case "global": + return 0 + case "user-legacy": + return 1 + case "user": + return 2 + case "site": + return 3 + case "PIP_CONFIG_FILE": + return 4 + } + return 99 +} + +//nolint:errcheck // terminal output +func printPipFile(w io.Writer, c *colors, f model.PipConfigFile) { + tag := strings.ToUpper(f.Layer) + fmt.Fprintf(w, "│\n│ %s%s[%s]%s %s\n", c.purple, c.bold, tag, c.reset, f.Path) + if !f.Exists { + fmt.Fprintf(w, "│ %s(file does not exist — pip would skip this scope)%s\n", c.dim, c.reset) + return + } + owner := "?" + if f.OwnerName != "" { + owner = fmt.Sprintf("%s:%s", f.OwnerName, f.GroupName) + } + sha := f.SHA256 + if len(sha) > 12 { + sha = sha[:12] + } + fmt.Fprintf(w, "│ %smode=%s size=%db owner=%s sha=%s%s\n", + c.dim, f.Mode, f.SizeBytes, owner, sha, c.reset) + if f.GitTracked { + fmt.Fprintf(w, "│ %s· %sGIT-TRACKED%s%s — committed credentials/config would be exposed wherever the repo is\n", c.dim, c.bold, c.reset, c.dim) + } else if f.InGitRepo { + fmt.Fprintf(w, "│ %s· inside a git repo (untracked)%s\n", c.dim, c.reset) + } + if f.ParseError != "" { + fmt.Fprintf(w, "│ %sparse error: %s%s\n", c.dim, f.ParseError, c.reset) + return + } + if len(f.Sections) == 0 { + fmt.Fprintf(w, "│ %s(empty file)%s\n", c.dim, c.reset) + return + } + for _, sec := range f.Sections { + fmt.Fprintf(w, "│ %s[%s]%s\n", c.bold, sec.Name, c.reset) + for _, kv := range sec.Entries { + vals := kv.Display + if vals == "" && len(kv.Values) > 0 { + vals = strings.Join(kv.Values, ", ") + } + fmt.Fprintf(w, "│ %s%4d:%s %-32s = %s%s%s\n", + c.dim, kv.LineNum, c.reset, kv.Key, c.dim, vals, c.reset) + } + } +} + +//nolint:errcheck // terminal output +func printPipEffective(w io.Writer, c *colors, eff *model.PipEffective) { + fmt.Fprintf(w, "%s%s┌── EFFECTIVE CONFIG (what pip would actually use) %s\n", c.purple, c.bold, c.reset) + if eff.Error != "" { + fmt.Fprintf(w, "│ %swarn: %s%s\n", c.dim, eff.Error, c.reset) + } + if len(eff.Config) == 0 { + fmt.Fprintf(w, "│ %s(no merged config returned — pip not available, or no config set)%s\n", c.dim, c.reset) + fmt.Fprintln(w) + return + } + keys := make([]string, 0, len(eff.Config)) + for k := range eff.Config { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + src := eff.SourceByKey[k] + if src == "" { + src = "default" + } + fmt.Fprintf(w, "│ %-40s = %s%s%s %sfrom %s%s\n", + k, c.dim, eff.Config[k], c.reset, c.dim, src, c.reset) + } + fmt.Fprintln(w) +} + +//nolint:errcheck // terminal output +func printPipEnvVars(w io.Writer, c *colors, env []model.PipEnvVar) { + if len(env) == 0 { + return + } + fmt.Fprintf(w, "%s%s┌── PIP-RELEVANT ENVIRONMENT VARIABLES (set: %d)%s\n", c.purple, c.bold, len(env), c.reset) + for _, e := range env { + fmt.Fprintf(w, "│ %s = %s%s%s\n", e.Name, c.dim, e.Display, c.reset) + } + fmt.Fprintln(w) +} + +//nolint:errcheck // terminal output +func printPipNetrc(w io.Writer, c *colors, n *model.PipNetrcStatus) { + if n == nil { + return + } + fmt.Fprintf(w, "%s%s┌── ~/.netrc%s\n", c.purple, c.bold, c.reset) + if !n.Exists { + fmt.Fprintf(w, "│ %snot present%s\n", c.dim, c.reset) + } else { + fmt.Fprintf(w, "│ %spath:%s %s %smode:%s %s\n", c.dim, c.reset, n.Path, c.dim, c.reset, n.Mode) + fmt.Fprintf(w, "│ %s(content not parsed — .netrc is shared with curl/wget/twine; that's a separate audit)%s\n", c.dim, c.reset) + } + fmt.Fprintln(w) +} diff --git a/internal/output/pretty.go b/internal/output/pretty.go index a183409..68e0fd1 100644 --- a/internal/output/pretty.go +++ b/internal/output/pretty.go @@ -267,6 +267,11 @@ func Pretty(w io.Writer, result *model.ScanResult, colorMode string) error { printNPMRCAuditSummary(w, c, result.NPMRCAudit) } + // PIP CONFIG AUDIT (compact summary; deep view via --pipconfig) + if result.PipAudit != nil { + printPipAuditSummary(w, c, result.PipAudit) + } + return nil } @@ -289,6 +294,27 @@ func printNPMRCAuditSummary(w io.Writer, c *colors, a *model.NPMRCAudit) { fmt.Fprintln(w) } +//nolint:errcheck // terminal output +func printPipAuditSummary(w io.Writer, c *colors, a *model.PipAudit) { + fmt.Fprintf(w, " %s%sPIP CONFIG AUDIT%s\n", c.purple, c.bold, c.reset) + if a.Available { + fmt.Fprintf(w, " %spip:%s %s @ %s\n", c.dim, c.reset, a.Version, a.Path) + } else { + fmt.Fprintf(w, " %spip:%s not found in PATH\n", c.dim, c.reset) + } + counts := map[string]int{} + for _, f := range a.Findings { + counts[f.Severity]++ + } + fmt.Fprintf(w, " %sfiles:%s %d %sfindings:%s %sCRITICAL %d HIGH %d MEDIUM %d LOW %d INFO %d%s\n", + c.dim, c.reset, len(a.Files), + c.dim, c.reset, + c.bold, counts["CRITICAL"], counts["HIGH"], counts["MEDIUM"], counts["LOW"], counts["INFO"], c.reset) + if len(a.Findings) > 0 { + fmt.Fprintf(w, " %srun --pipconfig for the deep view%s\n", c.dim, c.reset) + } + fmt.Fprintln(w) +} //nolint:errcheck // terminal output func printSectionHeader(w io.Writer, c *colors, title string, count int) { diff --git a/internal/scan/scanner.go b/internal/scan/scanner.go index acc5c40..5d9ae60 100644 --- a/internal/scan/scanner.go +++ b/internal/scan/scanner.go @@ -215,6 +215,13 @@ func Run(exec executor.Executor, log *progress.Logger, cfg *cli.Config) error { npmrcAudit := detector.NewNPMRCDetector(exec).Detect(ctx, searchDirs, loggedInUser) log.StepDone(time.Since(start)) + // pip config audit — same shape: every pip.conf / pip.ini discovered, + // merged effective view, env-var snapshot, and a fixed finding catalog. + log.StepStart("Auditing pip configuration") + start = time.Now() + pipAudit := detector.NewPipConfigDetector(exec).Detect(ctx, loggedInUser) + log.StepDone(time.Since(start)) + // Ensure no nil slices (JSON must emit [] not null) if aiTools == nil { aiTools = []model.AITool{} @@ -284,6 +291,7 @@ func Run(exec executor.Executor, log *progress.Logger, cfg *cli.Config) error { FlatpakPkgManager: flatpakPkgManager, FlatpakPackages: flatpakPackages, NPMRCAudit: &npmrcAudit, + PipAudit: &pipAudit, Summary: model.Summary{ AIAgentsAndToolsCount: len(aiTools), IDEInstallationsCount: len(ides), diff --git a/internal/telemetry/telemetry.go b/internal/telemetry/telemetry.go index b529d37..587c490 100644 --- a/internal/telemetry/telemetry.go +++ b/internal/telemetry/telemetry.go @@ -54,6 +54,7 @@ type Payload struct { AIAgents []model.AITool `json:"ai_agents"` MCPConfigs []model.MCPConfigEnterprise `json:"mcp_configs"` NPMRCAudit *model.NPMRCAudit `json:"npmrc_audit,omitempty"` + PipAudit *model.PipAudit `json:"pip_audit,omitempty"` ExecutionLogs *ExecutionLogs `json:"execution_logs,omitempty"` PerformanceMetrics *PerformanceMetrics `json:"performance_metrics,omitempty"` @@ -509,17 +510,22 @@ func Run(exec executor.Executor, log *progress.Logger, cfg *cli.Config) (err err systemPackageScans = []model.SystemPackageScanResult{} } - // npm config audit — surface-only inventory of every .npmrc on the - // host plus the merged effective view npm itself would resolve. We - // use the user-aware executor so npm resolves through the logged-in - // user's PATH (catches nvm / fnm / brew installs that root's PATH - // wouldn't see). + // npm + pip configuration audits — surface-only inventory of every + // .npmrc and pip.conf on the host, plus the merged effective views + // each tool would resolve. We use the user-aware executor so npm and + // pip resolve through the logged-in user's PATH (catches nvm / fnm / + // pyenv / asdf / brew installs that root's PATH wouldn't see). log.Progress("Auditing npm configuration...") npmrcLoggedIn, _ := exec.LoggedInUser() npmrcAudit := detector.NewNPMRCDetector(userExec).Detect(ctx, searchDirs, npmrcLoggedIn) log.Progress(" npm available: %v, files discovered: %d", npmrcAudit.Available, len(npmrcAudit.Files)) fmt.Fprintln(os.Stderr) + log.Progress("Auditing pip configuration...") + pipAudit := detector.NewPipConfigDetector(userExec).Detect(ctx, npmrcLoggedIn) + log.Progress(" pip available: %v, files discovered: %d, findings: %d", pipAudit.Available, len(pipAudit.Files), len(pipAudit.Findings)) + fmt.Fprintln(os.Stderr) + // Finalize execution logs before building payload execLogsBase64 := capture.Finalize() endTime := time.Now() @@ -553,6 +559,7 @@ func Run(exec executor.Executor, log *progress.Logger, cfg *cli.Config) (err err AIAgents: allAI, MCPConfigs: mcpConfigs, NPMRCAudit: &npmrcAudit, + PipAudit: &pipAudit, ExecutionLogs: &ExecutionLogs{ OutputBase64: execLogsBase64, From 1aa14666f83d15664bb69c93e940ab8703c4fc4d Mon Sep 17 00:00:00 2001 From: Swarit Pandey Date: Wed, 6 May 2026 19:25:43 +0530 Subject: [PATCH 3/6] refactor(configaudit): split rc/pip audits into internal/detector/configaudit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moves the 15 npmrc + pipconfig files out of internal/detector/ and into a dedicated sub-package internal/detector/configaudit/. Pure rename + a package-decl change in each file (`package detector` → `package configaudit`) plus updated imports in the three callsites: - internal/scan/scanner.go - internal/telemetry/telemetry.go - cmd/stepsecurity-dev-machine-guard/main.go `detector.NewNPMRCDetector(...)` becomes `configaudit.NewNPMRCDetector(...)` and likewise for `NewPipConfigDetector`. No public-API changes beyond the package qualifier. Why: internal/detector/ had grown to 55 files and the rc/config-file audits are a self-contained subset (15 files, no cross-references with sibling detector files). Splitting them off drops detector/ to 40 files and clarifies the architecture: detector/ owns inventory-style detectors (IDEs, AI tools, package managers, etc.); configaudit/ owns config-file audits with their own discovery + parsing + finding semantics. Future sibling audits — .yarnrc.yml, .gemrc, .cargo/config.toml — fit naturally under configaudit/. Verified: full test suite green (configaudit tests run as a separate package now), cross-compile clean for darwin/linux/windows. --- cmd/stepsecurity-dev-machine-guard/main.go | 6 +++--- internal/detector/{ => configaudit}/npmrc.go | 2 +- internal/detector/{ => configaudit}/npmrc_parse.go | 2 +- internal/detector/{ => configaudit}/npmrc_parse_test.go | 2 +- internal/detector/{ => configaudit}/npmrc_stat_unix.go | 2 +- internal/detector/{ => configaudit}/npmrc_stat_windows.go | 2 +- internal/detector/{ => configaudit}/npmrc_test.go | 2 +- internal/detector/{ => configaudit}/pipconfig.go | 2 +- internal/detector/{ => configaudit}/pipconfig_findings.go | 2 +- .../detector/{ => configaudit}/pipconfig_findings_test.go | 2 +- internal/detector/{ => configaudit}/pipconfig_helpers.go | 2 +- internal/detector/{ => configaudit}/pipconfig_parse.go | 2 +- internal/detector/{ => configaudit}/pipconfig_parse_test.go | 2 +- internal/detector/{ => configaudit}/pipconfig_stat_unix.go | 2 +- .../detector/{ => configaudit}/pipconfig_stat_windows.go | 2 +- internal/detector/{ => configaudit}/pipconfig_test.go | 2 +- internal/scan/scanner.go | 5 +++-- internal/telemetry/telemetry.go | 5 +++-- 18 files changed, 24 insertions(+), 22 deletions(-) rename internal/detector/{ => configaudit}/npmrc.go (99%) rename internal/detector/{ => configaudit}/npmrc_parse.go (99%) rename internal/detector/{ => configaudit}/npmrc_parse_test.go (99%) rename internal/detector/{ => configaudit}/npmrc_stat_unix.go (97%) rename internal/detector/{ => configaudit}/npmrc_stat_windows.go (93%) rename internal/detector/{ => configaudit}/npmrc_test.go (99%) rename internal/detector/{ => configaudit}/pipconfig.go (99%) rename internal/detector/{ => configaudit}/pipconfig_findings.go (99%) rename internal/detector/{ => configaudit}/pipconfig_findings_test.go (99%) rename internal/detector/{ => configaudit}/pipconfig_helpers.go (99%) rename internal/detector/{ => configaudit}/pipconfig_parse.go (99%) rename internal/detector/{ => configaudit}/pipconfig_parse_test.go (99%) rename internal/detector/{ => configaudit}/pipconfig_stat_unix.go (97%) rename internal/detector/{ => configaudit}/pipconfig_stat_windows.go (93%) rename internal/detector/{ => configaudit}/pipconfig_test.go (99%) diff --git a/cmd/stepsecurity-dev-machine-guard/main.go b/cmd/stepsecurity-dev-machine-guard/main.go index d0817b9..b709163 100644 --- a/cmd/stepsecurity-dev-machine-guard/main.go +++ b/cmd/stepsecurity-dev-machine-guard/main.go @@ -11,7 +11,7 @@ import ( "github.com/step-security/dev-machine-guard/internal/buildinfo" "github.com/step-security/dev-machine-guard/internal/cli" "github.com/step-security/dev-machine-guard/internal/config" - "github.com/step-security/dev-machine-guard/internal/detector" + "github.com/step-security/dev-machine-guard/internal/detector/configaudit" "github.com/step-security/dev-machine-guard/internal/device" "github.com/step-security/dev-machine-guard/internal/executor" "github.com/step-security/dev-machine-guard/internal/launchd" @@ -217,7 +217,7 @@ func runNPMRCOnly(exec executor.Executor, cfg *cli.Config) error { loggedInUser, _ := exec.LoggedInUser() searchDirs := resolveScanSearchDirs(exec, cfg.SearchDirs) - audit := detector.NewNPMRCDetector(exec).Detect(ctx, searchDirs, loggedInUser) + audit := configaudit.NewNPMRCDetector(exec).Detect(ctx, searchDirs, loggedInUser) if cfg.OutputFormat == "json" { return scanJSONEncoder(os.Stdout).Encode(audit) @@ -233,7 +233,7 @@ func runPipConfigOnly(exec executor.Executor, cfg *cli.Config) error { dev := device.Gather(ctx, exec) loggedInUser, _ := exec.LoggedInUser() - audit := detector.NewPipConfigDetector(exec).Detect(ctx, loggedInUser) + audit := configaudit.NewPipConfigDetector(exec).Detect(ctx, loggedInUser) if cfg.OutputFormat == "json" { return scanJSONEncoder(os.Stdout).Encode(audit) diff --git a/internal/detector/npmrc.go b/internal/detector/configaudit/npmrc.go similarity index 99% rename from internal/detector/npmrc.go rename to internal/detector/configaudit/npmrc.go index 599dbe4..a280c01 100644 --- a/internal/detector/npmrc.go +++ b/internal/detector/configaudit/npmrc.go @@ -1,4 +1,4 @@ -package detector +package configaudit import ( "context" diff --git a/internal/detector/npmrc_parse.go b/internal/detector/configaudit/npmrc_parse.go similarity index 99% rename from internal/detector/npmrc_parse.go rename to internal/detector/configaudit/npmrc_parse.go index eab4077..4bbf716 100644 --- a/internal/detector/npmrc_parse.go +++ b/internal/detector/configaudit/npmrc_parse.go @@ -1,4 +1,4 @@ -package detector +package configaudit import ( "bufio" diff --git a/internal/detector/npmrc_parse_test.go b/internal/detector/configaudit/npmrc_parse_test.go similarity index 99% rename from internal/detector/npmrc_parse_test.go rename to internal/detector/configaudit/npmrc_parse_test.go index ae3b16a..96edc78 100644 --- a/internal/detector/npmrc_parse_test.go +++ b/internal/detector/configaudit/npmrc_parse_test.go @@ -1,4 +1,4 @@ -package detector +package configaudit import ( "strings" diff --git a/internal/detector/npmrc_stat_unix.go b/internal/detector/configaudit/npmrc_stat_unix.go similarity index 97% rename from internal/detector/npmrc_stat_unix.go rename to internal/detector/configaudit/npmrc_stat_unix.go index 58af9c1..b1f725e 100644 --- a/internal/detector/npmrc_stat_unix.go +++ b/internal/detector/configaudit/npmrc_stat_unix.go @@ -1,6 +1,6 @@ //go:build !windows -package detector +package configaudit import ( "os" diff --git a/internal/detector/npmrc_stat_windows.go b/internal/detector/configaudit/npmrc_stat_windows.go similarity index 93% rename from internal/detector/npmrc_stat_windows.go rename to internal/detector/configaudit/npmrc_stat_windows.go index b8a08b9..69e5c15 100644 --- a/internal/detector/npmrc_stat_windows.go +++ b/internal/detector/configaudit/npmrc_stat_windows.go @@ -1,6 +1,6 @@ //go:build windows -package detector +package configaudit // statOwner is a no-op on Windows: getting a meaningful owner string from a // SID is non-trivial and not actionable for the audit's first cut. The diff --git a/internal/detector/npmrc_test.go b/internal/detector/configaudit/npmrc_test.go similarity index 99% rename from internal/detector/npmrc_test.go rename to internal/detector/configaudit/npmrc_test.go index 4b52d6d..5d43ee3 100644 --- a/internal/detector/npmrc_test.go +++ b/internal/detector/configaudit/npmrc_test.go @@ -1,4 +1,4 @@ -package detector +package configaudit import ( "context" diff --git a/internal/detector/pipconfig.go b/internal/detector/configaudit/pipconfig.go similarity index 99% rename from internal/detector/pipconfig.go rename to internal/detector/configaudit/pipconfig.go index 37a5ad0..949275d 100644 --- a/internal/detector/pipconfig.go +++ b/internal/detector/configaudit/pipconfig.go @@ -1,4 +1,4 @@ -package detector +package configaudit import ( "context" diff --git a/internal/detector/pipconfig_findings.go b/internal/detector/configaudit/pipconfig_findings.go similarity index 99% rename from internal/detector/pipconfig_findings.go rename to internal/detector/configaudit/pipconfig_findings.go index c6be33a..463b51c 100644 --- a/internal/detector/pipconfig_findings.go +++ b/internal/detector/configaudit/pipconfig_findings.go @@ -1,4 +1,4 @@ -package detector +package configaudit import ( "fmt" diff --git a/internal/detector/pipconfig_findings_test.go b/internal/detector/configaudit/pipconfig_findings_test.go similarity index 99% rename from internal/detector/pipconfig_findings_test.go rename to internal/detector/configaudit/pipconfig_findings_test.go index 68cc0c7..aed146b 100644 --- a/internal/detector/pipconfig_findings_test.go +++ b/internal/detector/configaudit/pipconfig_findings_test.go @@ -1,4 +1,4 @@ -package detector +package configaudit import ( "strings" diff --git a/internal/detector/pipconfig_helpers.go b/internal/detector/configaudit/pipconfig_helpers.go similarity index 99% rename from internal/detector/pipconfig_helpers.go rename to internal/detector/configaudit/pipconfig_helpers.go index a0dae65..fdf6cad 100644 --- a/internal/detector/pipconfig_helpers.go +++ b/internal/detector/configaudit/pipconfig_helpers.go @@ -1,4 +1,4 @@ -package detector +package configaudit import ( "crypto/sha256" diff --git a/internal/detector/pipconfig_parse.go b/internal/detector/configaudit/pipconfig_parse.go similarity index 99% rename from internal/detector/pipconfig_parse.go rename to internal/detector/configaudit/pipconfig_parse.go index 803b184..d0d74ff 100644 --- a/internal/detector/pipconfig_parse.go +++ b/internal/detector/configaudit/pipconfig_parse.go @@ -1,4 +1,4 @@ -package detector +package configaudit import ( "bufio" diff --git a/internal/detector/pipconfig_parse_test.go b/internal/detector/configaudit/pipconfig_parse_test.go similarity index 99% rename from internal/detector/pipconfig_parse_test.go rename to internal/detector/configaudit/pipconfig_parse_test.go index 78bfd59..47a6ef9 100644 --- a/internal/detector/pipconfig_parse_test.go +++ b/internal/detector/configaudit/pipconfig_parse_test.go @@ -1,4 +1,4 @@ -package detector +package configaudit import ( "strings" diff --git a/internal/detector/pipconfig_stat_unix.go b/internal/detector/configaudit/pipconfig_stat_unix.go similarity index 97% rename from internal/detector/pipconfig_stat_unix.go rename to internal/detector/configaudit/pipconfig_stat_unix.go index 4b95618..e54925d 100644 --- a/internal/detector/pipconfig_stat_unix.go +++ b/internal/detector/configaudit/pipconfig_stat_unix.go @@ -1,6 +1,6 @@ //go:build !windows -package detector +package configaudit import ( "os" diff --git a/internal/detector/pipconfig_stat_windows.go b/internal/detector/configaudit/pipconfig_stat_windows.go similarity index 93% rename from internal/detector/pipconfig_stat_windows.go rename to internal/detector/configaudit/pipconfig_stat_windows.go index d6d7edd..1e24331 100644 --- a/internal/detector/pipconfig_stat_windows.go +++ b/internal/detector/configaudit/pipconfig_stat_windows.go @@ -1,6 +1,6 @@ //go:build windows -package detector +package configaudit // pipStatOwner is a no-op on Windows: SID-to-username resolution is // non-trivial and not actionable for v1 of this audit. The detector diff --git a/internal/detector/pipconfig_test.go b/internal/detector/configaudit/pipconfig_test.go similarity index 99% rename from internal/detector/pipconfig_test.go rename to internal/detector/configaudit/pipconfig_test.go index 483f31c..ce53b40 100644 --- a/internal/detector/pipconfig_test.go +++ b/internal/detector/configaudit/pipconfig_test.go @@ -1,4 +1,4 @@ -package detector +package configaudit import ( "context" diff --git a/internal/scan/scanner.go b/internal/scan/scanner.go index 5d9ae60..7767b3a 100644 --- a/internal/scan/scanner.go +++ b/internal/scan/scanner.go @@ -8,6 +8,7 @@ import ( "github.com/step-security/dev-machine-guard/internal/buildinfo" "github.com/step-security/dev-machine-guard/internal/cli" "github.com/step-security/dev-machine-guard/internal/detector" + "github.com/step-security/dev-machine-guard/internal/detector/configaudit" "github.com/step-security/dev-machine-guard/internal/device" "github.com/step-security/dev-machine-guard/internal/executor" "github.com/step-security/dev-machine-guard/internal/model" @@ -212,14 +213,14 @@ func Run(exec executor.Executor, log *progress.Logger, cfg *cli.Config) error { log.StepStart("Auditing npm configuration") start = time.Now() loggedInUser, _ := exec.LoggedInUser() - npmrcAudit := detector.NewNPMRCDetector(exec).Detect(ctx, searchDirs, loggedInUser) + npmrcAudit := configaudit.NewNPMRCDetector(exec).Detect(ctx, searchDirs, loggedInUser) log.StepDone(time.Since(start)) // pip config audit — same shape: every pip.conf / pip.ini discovered, // merged effective view, env-var snapshot, and a fixed finding catalog. log.StepStart("Auditing pip configuration") start = time.Now() - pipAudit := detector.NewPipConfigDetector(exec).Detect(ctx, loggedInUser) + pipAudit := configaudit.NewPipConfigDetector(exec).Detect(ctx, loggedInUser) log.StepDone(time.Since(start)) // Ensure no nil slices (JSON must emit [] not null) diff --git a/internal/telemetry/telemetry.go b/internal/telemetry/telemetry.go index 587c490..9e4f98a 100644 --- a/internal/telemetry/telemetry.go +++ b/internal/telemetry/telemetry.go @@ -18,6 +18,7 @@ import ( "github.com/step-security/dev-machine-guard/internal/cli" "github.com/step-security/dev-machine-guard/internal/config" "github.com/step-security/dev-machine-guard/internal/detector" + "github.com/step-security/dev-machine-guard/internal/detector/configaudit" "github.com/step-security/dev-machine-guard/internal/device" "github.com/step-security/dev-machine-guard/internal/executor" "github.com/step-security/dev-machine-guard/internal/lock" @@ -517,12 +518,12 @@ func Run(exec executor.Executor, log *progress.Logger, cfg *cli.Config) (err err // pyenv / asdf / brew installs that root's PATH wouldn't see). log.Progress("Auditing npm configuration...") npmrcLoggedIn, _ := exec.LoggedInUser() - npmrcAudit := detector.NewNPMRCDetector(userExec).Detect(ctx, searchDirs, npmrcLoggedIn) + npmrcAudit := configaudit.NewNPMRCDetector(userExec).Detect(ctx, searchDirs, npmrcLoggedIn) log.Progress(" npm available: %v, files discovered: %d", npmrcAudit.Available, len(npmrcAudit.Files)) fmt.Fprintln(os.Stderr) log.Progress("Auditing pip configuration...") - pipAudit := detector.NewPipConfigDetector(userExec).Detect(ctx, npmrcLoggedIn) + pipAudit := configaudit.NewPipConfigDetector(userExec).Detect(ctx, npmrcLoggedIn) log.Progress(" pip available: %v, files discovered: %d, findings: %d", pipAudit.Available, len(pipAudit.Files), len(pipAudit.Findings)) fmt.Fprintln(os.Stderr) From 4dd1117cd9c7d8fabfc9122feaf70c66c23ec146 Mon Sep 17 00:00:00 2001 From: Swarit Pandey Date: Wed, 6 May 2026 20:17:22 +0530 Subject: [PATCH 4/6] fix(pipconfig): redact effective.config + legacy-path detection by suffix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs surfaced by end-to-end testing on Fedora EC2 with a 69-scenario harness: 1. Plaintext credential leak in `effective.config`. `pip config list -v` emits URL values verbatim, including any embedded `user:pass@host` userinfo. We were copying the value into `effective.config[]` without redaction — so a hardcoded token in `extra-index-url` would show up in the JSON output even though the per-file `entries` view already redacted it. Fix: run `redactCredsInValue` over the value before storing in the merged map. 2. `pip-019` (legacy `~/.pip/pip.conf`) only fired when the discovery layer was tagged `user-legacy`. But when `pip` is installed, our discovery uses `pip config debug`, which reports the legacy path under the `user` layer (pip itself doesn't expose the "legacy" concept) — so the rule silently never fired in practice. Fix: detect by path suffix (`isLegacyPipConfigPath`) instead of the layer label. The Windows discriminator carefully excludes both `%APPDATA%\...` (current user) and `%PROGRAMDATA%\...` (global) so only the `%HOME%\pip\pip.ini` form trips the rule. Also: `PipKeyValue.Values` is now `json:"-"`. It holds the raw, un-redacted parsed values for the findings engine to inspect (URL.User parsing, http-scheme detection, etc.) — it must never be serialized, because for keys like `extra-index-url` it can hold a literal `user:pass@host` URL. The `Display` field is the canonical user-facing rendering and it stays in JSON. Three new unit tests lock all of this in: - `TestParseEffective_RedactsEmbeddedCreds` - `TestIsLegacyPipConfigPath` (10 cases incl. all three Windows variants) - `TestEvaluateFileLevel_Pip019_FiresOnLegacyPathRegardlessOfLayer` Validated on Fedora EC2: 69/69 scenarios pass. --- internal/detector/configaudit/pipconfig.go | 7 ++- .../configaudit/pipconfig_findings.go | 37 ++++++++++++- .../configaudit/pipconfig_findings_test.go | 52 +++++++++++++++++++ .../detector/configaudit/pipconfig_test.go | 37 +++++++++++++ internal/model/model.go | 9 +++- 5 files changed, 137 insertions(+), 5 deletions(-) diff --git a/internal/detector/configaudit/pipconfig.go b/internal/detector/configaudit/pipconfig.go index 949275d..dc4ced3 100644 --- a/internal/detector/configaudit/pipconfig.go +++ b/internal/detector/configaudit/pipconfig.go @@ -498,7 +498,12 @@ func (d *PipConfigDetector) captureEffective(ctx context.Context) (*model.PipEff key := secKey[dot+1:] full := section + "." + key - eff.Config[full] = value + // Pip's text output emits URL values verbatim, including any + // embedded `user:pass@host` userinfo. We must redact before + // storing — the per-file `entries` view redacts; the effective + // view has to as well, otherwise it becomes the credential leak + // the audit was supposed to prevent. + eff.Config[full] = redactCredsInValue(value) eff.SourceByKey[full] = source } return eff, nil diff --git a/internal/detector/configaudit/pipconfig_findings.go b/internal/detector/configaudit/pipconfig_findings.go index 463b51c..c43f800 100644 --- a/internal/detector/configaudit/pipconfig_findings.go +++ b/internal/detector/configaudit/pipconfig_findings.go @@ -236,6 +236,34 @@ func looksLikeStandardPipConfigPath(p string) bool { return false } +// isLegacyPipConfigPath returns true for paths matching the legacy pip +// config location: `~/.pip/pip.conf` (Unix) or `%HOME%\pip\pip.ini` +// (Windows). Detected via suffix rather than the discovery `Layer` label +// because `pip config debug` reports the legacy path under the "user" +// layer when pip itself is installed. +// +// On Windows, three locations share the `\pip\pip.ini` suffix: +// +// %PROGRAMDATA%\pip\pip.ini (global, not legacy) +// %APPDATA%\pip\pip.ini (current user, not legacy) +// %HOME%\pip\pip.ini (legacy) +// +// We discriminate by checking that the path does NOT include an +// `\appdata\` or `\programdata\` component — both unique to the +// non-legacy locations. +func isLegacyPipConfigPath(p string) bool { + pl := strings.ToLower(p) + if strings.HasSuffix(pl, "/.pip/pip.conf") { + return true + } + if strings.HasSuffix(pl, `\pip\pip.ini`) && + !strings.Contains(pl, `\appdata\`) && + !strings.Contains(pl, `\programdata\`) { + return true + } + return false +} + // evaluateValueFindings runs rules that depend on the value of a single // (section, key, value) entry from a parsed file. func evaluateValueFindings(f model.PipConfigFile, section, key, value string, emit func(model.PipFinding)) { @@ -507,8 +535,13 @@ func evaluateKeyPresenceFindings(_ model.PipConfigFile, _ string, _ model.PipKey // evaluateFileLevelFindings handles rules that depend on file metadata // rather than parsed values: legacy paths, mode permissions. func evaluateFileLevelFindings(f model.PipConfigFile, emit func(model.PipFinding)) { - // pip-019 — legacy ~/.pip/pip.conf or %HOME%\pip\pip.ini in use - if f.Layer == "user-legacy" { + // pip-019 — legacy ~/.pip/pip.conf or %HOME%\pip\pip.ini in use. + // + // Detected by path suffix rather than the discovery layer label, + // because when pip itself is installed `pip config debug` reports the + // legacy path under the "user" layer (pip doesn't expose the "legacy" + // concept). Path matching catches both cases. + if isLegacyPipConfigPath(f.Path) { emit(model.PipFinding{ ID: "pip-019", Severity: pipSevLow, diff --git a/internal/detector/configaudit/pipconfig_findings_test.go b/internal/detector/configaudit/pipconfig_findings_test.go index aed146b..5afa0fb 100644 --- a/internal/detector/configaudit/pipconfig_findings_test.go +++ b/internal/detector/configaudit/pipconfig_findings_test.go @@ -303,3 +303,55 @@ func TestParseModeOctal(t *testing.T) { } } } + +func TestIsLegacyPipConfigPath(t *testing.T) { + cases := map[string]bool{ + // Unix legacy + "/home/alice/.pip/pip.conf": true, + "/Users/bob/.pip/pip.conf": true, + // Unix current + "/home/alice/.config/pip/pip.conf": false, + "/etc/pip.conf": false, + "/etc/xdg/pip/pip.conf": false, + // macOS user + "/Users/bob/Library/Application Support/pip/pip.conf": false, + // Windows legacy (HOME-rooted, no AppData component) + `C:\Users\Carol\pip\pip.ini`: true, + // Windows current (under AppData) + `C:\Users\Carol\AppData\Roaming\pip\pip.ini`: false, + // Windows global + `C:\ProgramData\pip\pip.ini`: false, + // Random path + "/tmp/something/pip.conf": false, + } + for in, want := range cases { + if got := isLegacyPipConfigPath(in); got != want { + t.Errorf("isLegacyPipConfigPath(%q) = %v, want %v", in, got, want) + } + } +} + +// TestEvaluateFileLevel_Pip019_FiresOnLegacyPathRegardlessOfLayer locks +// in the bug fix: when pip is installed, `pip config debug` reports the +// legacy path under the `user` layer (not `user-legacy`), so the rule +// must detect by path suffix, not by Layer field. Validated end-to-end +// on Fedora EC2. +func TestEvaluateFileLevel_Pip019_FiresOnLegacyPathRegardlessOfLayer(t *testing.T) { + for _, layer := range []string{"user", "user-legacy"} { + f := model.PipConfigFile{ + Path: "/home/test/.pip/pip.conf", + Layer: layer, + Exists: true, + Mode: "0644", + } + var fired bool + evaluateFileLevelFindings(f, func(fnd model.PipFinding) { + if fnd.ID == "pip-019" { + fired = true + } + }) + if !fired { + t.Errorf("pip-019 must fire for legacy path under layer=%q (was missed when pip-debug reports the legacy file as 'user')", layer) + } + } +} diff --git a/internal/detector/configaudit/pipconfig_test.go b/internal/detector/configaudit/pipconfig_test.go index ce53b40..29fa254 100644 --- a/internal/detector/configaudit/pipconfig_test.go +++ b/internal/detector/configaudit/pipconfig_test.go @@ -244,3 +244,40 @@ install.no-build-isolation='true' from PIP_NO_BUILD_ISOLATION _ = sawInfo } } + +// TestParseEffective_RedactsEmbeddedCreds locks in the bug fix where +// `pip config list -v` emits URL values verbatim — including any +// embedded `user:pass@host` userinfo — and we used to copy them into +// effective.config without redaction. The per-file `entries` view +// already redacted; the merged effective view is now consistent. +// +// Validated end-to-end on Fedora EC2 (P3): the literal token must NOT +// appear anywhere in the JSON output. +func TestParseEffective_RedactsEmbeddedCreds(t *testing.T) { + mock := executor.NewMock() + mock.SetPath("pip", "/usr/bin/pip") + mock.SetCommand("pip 24.0\n", "", 0, "pip", "--version") + mock.SetCommand("", "", 0, "pip", "config", "debug") + // Pip's text output includes the literal credential — that's how pip + // renders it. Our parser must redact before storing. + out := "For variant 'user', will try loading '/u/.config/pip/pip.conf'.\n" + + "global.extra-index-url='https://__token__:LEAKED_SECRET" + at + "private.example.com/simple' from /u/.config/pip/pip.conf\n" + mock.SetCommand(out, "", 0, "pip", "config", "list", "-v") + + d := NewPipConfigDetector(mock) + d.ownerLookup = fixedPipOwner() + d.gitTracked = func(_ context.Context, _ string) bool { return false } + d.inGitRepo = func(_ string) bool { return false } + + audit := d.Detect(context.Background(), nil) + if audit.Effective == nil { + t.Fatal("effective view nil") + } + got := audit.Effective.Config["global.extra-index-url"] + if strings.Contains(got, "LEAKED_SECRET") { + t.Errorf("effective.config leaked credential: %q", got) + } + if !strings.Contains(got, "****") { + t.Errorf("effective.config should have redacted to user:****@host form, got: %q", got) + } +} diff --git a/internal/model/model.go b/internal/model/model.go index 0f22233..3c6ff79 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -359,8 +359,13 @@ type PipSection struct { // PipKeyValue is a single key/value (or key/multi-value) entry inside a // section. Repeatable options surface as multiple Values. type PipKeyValue struct { - Key string `json:"key"` - Values []string `json:"values"` // len()>=1; each value is a separate logical entry + Key string `json:"key"` + // Values holds the raw, un-redacted parsed values. Used internally by + // the findings engine (URL.User parsing, http-scheme detection, etc.) + // — NEVER serialized to JSON or pretty output, since for keys like + // `extra-index-url` it can hold a literal `user:pass@host` URL. Use + // Display for any user-visible rendering. + Values []string `json:"-"` Display string `json:"display,omitempty"` // human-readable single-line rendering, with creds redacted LineNum int `json:"line_num"` } From 006bdb6b49b14840e5353e1146c227c40c660df5 Mon Sep 17 00:00:00 2001 From: Swarit Pandey Date: Wed, 6 May 2026 20:28:32 +0530 Subject: [PATCH 5/6] test(configaudit): add tests/test_rc_audit.sh end-to-end harness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reusable shell harness covering ~65 scenarios for the npmrc + pip config audits — discovery across all four scopes for each tool, credential redaction, env-var interactions (NPM_CONFIG_USERCONFIG, PIP_CONFIG_FILE incl. /dev/null disable, VIRTUAL_ENV), every pip-001..pip-024 finding rule, file-mode escalations, severity ordering, and edge cases (missing files, unreadable files, garbage content, symlinks). Usage: tests/test_rc_audit.sh # uses ./stepsecurity-dev-machine-guard BINARY=/path/to/binary tests/test_rc_audit.sh # explicit binary Conventions match tests/test_smoke_go.sh (pass/fail/section helpers, PASS/FAIL/SKIP counts, color-coded output, exits non-zero on any FAIL). Safety: backs up any pre-existing user config (~/.npmrc, ~/.netrc, ~/.config/pip/pip.conf, ~/.pip/pip.conf) on entry and restores via trap on EXIT/INT/TERM, so a developer running this against their own machine never loses config. Sudo-required scenarios (writing /etc/pip.conf for the global-scope test) are skipped automatically when passwordless sudo is unavailable; git scenarios skip if git is absent. No secrets, no hardcoded paths to credential stores, no remote-machine assumptions. Verified on: - macOS (BINARY=/tmp/dmg-rc-mac): PASS=64 FAIL=0 SKIP=1 (no sudo) - Fedora 42 EC2 (BINARY=~/dmg-rc): PASS=65 FAIL=0 SKIP=0 --- tests/test_rc_audit.sh | 601 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 601 insertions(+) create mode 100755 tests/test_rc_audit.sh diff --git a/tests/test_rc_audit.sh b/tests/test_rc_audit.sh new file mode 100755 index 0000000..757218e --- /dev/null +++ b/tests/test_rc_audit.sh @@ -0,0 +1,601 @@ +#!/usr/bin/env bash +# +# End-to-end harness for the npmrc + pip config audits. +# +# Runs ~70 scenarios covering: discovery across all four scopes for each +# tool, credential redaction, env-var interactions (NPM_CONFIG_USERCONFIG, +# PIP_CONFIG_FILE incl. /dev/null disable, VIRTUAL_ENV), every pip-001.. +# pip-024 finding rule, file-mode escalations, severity ordering, and +# edge cases (missing files, unreadable files, garbage content, symlinks). +# +# Usage: +# tests/test_rc_audit.sh # uses ./stepsecurity-dev-machine-guard +# BINARY=/path/to/binary tests/test_rc_audit.sh # explicit binary +# +# The harness mutates a small set of well-known config paths under +# $HOME (~/.npmrc, ~/.config/pip/pip.conf, ~/.netrc, ~/.pip/pip.conf). +# Anything that already exists is backed up to a tempdir on entry and +# restored on exit (including abort via Ctrl-C). The backup/restore is +# idempotent and bulletproof against double-runs. +# +# Tests that would require root (writing /etc/pip.conf for the global- +# scope scenario) are skipped automatically when passwordless sudo is +# unavailable. +# +# Requirements: +# - jq (assertion plumbing) +# - git (1-2 scenarios; auto-skipped if absent) +# - npm and/or pip (optional; absent-tool scenarios are still +# exercised with PATH=/empty) + +set -uo pipefail + +#============================================================================== +# Configuration +#============================================================================== + +BINARY="${BINARY:-./stepsecurity-dev-machine-guard}" +RESULTS_DIR="${RESULTS_DIR:-/tmp/rc-audit-results}" +TEST_SEARCH_DIR="$(mktemp -d -t rc-test-search.XXXXXX)" + +# Files we mutate. They get backed up under $BACKUP_DIR if present, and +# the originals are restored on exit. Sudo-managed paths (/etc/pip.conf, +# /etc/npmrc) are handled in the sudo-gated scenarios only. +USER_FILES=( + "$HOME/.npmrc" + "$HOME/.netrc" + "$HOME/.config/pip/pip.conf" + "$HOME/.pip/pip.conf" +) +BACKUP_DIR="$(mktemp -d -t rc-test-backup.XXXXXX)" + +mkdir -p "$RESULTS_DIR" + +# Detect optional tooling so we can gate sudo-required and tool-specific scenarios. +HAVE_SUDO=0 +if sudo -n true 2>/dev/null; then HAVE_SUDO=1; fi +HAVE_NPM=$(command -v npm >/dev/null 2>&1 && echo 1 || echo 0) +HAVE_PIP=$(command -v pip3 >/dev/null 2>&1 || command -v pip >/dev/null 2>&1 && echo 1 || echo 0) +HAVE_GIT=$(command -v git >/dev/null 2>&1 && echo 1 || echo 0) +HAVE_JQ=$(command -v jq >/dev/null 2>&1 && echo 1 || echo 0) + +if [ "$HAVE_JQ" != "1" ]; then + echo "ERROR: jq is required for assertions. Install jq and re-run." >&2 + exit 2 +fi + +#============================================================================== +# Test framework (matches tests/test_smoke_go.sh conventions) +#============================================================================== + +PASS_COUNT=0 +FAIL_COUNT=0 +SKIP_COUNT=0 +GREEN='\033[0;32m' +RED='\033[0;31m' +YELLOW='\033[0;33m' +BOLD='\033[1m' +RESET='\033[0m' + +pass() { + PASS_COUNT=$((PASS_COUNT + 1)) + printf " ${GREEN}PASS${RESET} %s\n" "$1" +} + +fail() { + FAIL_COUNT=$((FAIL_COUNT + 1)) + printf " ${RED}FAIL${RESET} %s\n" "$1" + if [ -n "${2:-}" ]; then + printf " %s\n" "$2" + fi +} + +skip() { + SKIP_COUNT=$((SKIP_COUNT + 1)) + printf " ${YELLOW}SKIP${RESET} %s (%s)\n" "$1" "$2" +} + +assert_eq() { + local label="$1" expected="$2" actual="$3" + if [ "$expected" = "$actual" ]; then + pass "$label" + else + fail "$label" "expected=$expected actual=$actual" + fi +} + +assert_match() { + local label="$1" pattern="$2" actual="$3" + if grep -qE "$pattern" <<<"$actual"; then + pass "$label" + else + fail "$label" "pattern=$pattern actual=$actual" + fi +} + +assert_no_match_in_file() { + local label="$1" pattern="$2" file="$3" + if grep -qE "$pattern" "$file" 2>/dev/null; then + fail "$label" "pattern '$pattern' was present in $file" + else + pass "$label" + fi +} + +section() { + printf "\n${BOLD}── %s${RESET}\n" "$1" +} + +#============================================================================== +# Backup / restore (so we never destroy a developer's real config) +#============================================================================== + +backup_user_state() { + for f in "${USER_FILES[@]}"; do + if [ -e "$f" ]; then + local rel="${f#$HOME/}" + mkdir -p "$BACKUP_DIR/$(dirname "$rel")" + cp -p "$f" "$BACKUP_DIR/$rel" + fi + done +} + +restore_user_state() { + # Remove anything the harness wrote. + for f in "${USER_FILES[@]}"; do + rm -f "$f" + done + # Restore originals. + if [ -d "$BACKUP_DIR" ]; then + for f in "${USER_FILES[@]}"; do + local rel="${f#$HOME/}" + if [ -e "$BACKUP_DIR/$rel" ]; then + mkdir -p "$(dirname "$f")" + cp -p "$BACKUP_DIR/$rel" "$f" + fi + done + fi + rm -rf "$BACKUP_DIR" "$TEST_SEARCH_DIR" +} + +# Restore on any exit path — normal, error, signal. +trap 'restore_user_state' EXIT INT TERM + +backup_user_state + +#============================================================================== +# Helpers +#============================================================================== + +# Wipe + recreate per-scenario state (does not touch the backup). +reset_state() { + rm -f "${USER_FILES[@]}" 2>/dev/null + rm -rf "$HOME/.config/pip" "$HOME/.pip" "$TEST_SEARCH_DIR"/* 2>/dev/null + mkdir -p "$TEST_SEARCH_DIR" "$HOME/.config/pip" +} + +# Run --npmrc, capture pretty + JSON output to $RESULTS_DIR/.{out,json} +run_npmrc() { + local id="$1" + "$BINARY" --npmrc --color=never --search-dirs "$TEST_SEARCH_DIR" 2>/dev/null > "$RESULTS_DIR/$id.out" + "$BINARY" --npmrc --json --search-dirs "$TEST_SEARCH_DIR" 2>/dev/null > "$RESULTS_DIR/$id.json" +} + +# Run --pipconfig with optional env vars: run_pip P3 PIP_CONFIG_FILE=/foo +run_pip() { + local id="$1"; shift + env "$@" "$BINARY" --pipconfig --color=never --search-dirs "$TEST_SEARCH_DIR" 2>/dev/null > "$RESULTS_DIR/$id.out" + env "$@" "$BINARY" --pipconfig --json --search-dirs "$TEST_SEARCH_DIR" 2>/dev/null > "$RESULTS_DIR/$id.json" +} + +# Path-suffix filter for the user-scope pip.conf — pip discovery often +# reports both ~/.config/pip/pip.conf and ~/.pip/pip.conf under the +# "user" layer; tests target the former. +USER_PIP='.files[] | select(.path | endswith(".config/pip/pip.conf"))' + +#============================================================================== +# 0. Binary check +#============================================================================== + +section "Binary check" + +if [ -x "$BINARY" ]; then + pass "Binary exists and is executable: $BINARY" +else + fail "Binary exists and is executable" "BINARY=$BINARY not found or not executable. Build first or set BINARY=..." + exit 1 +fi + +#============================================================================== +# NPMRC scenarios +#============================================================================== + +section "NPMRC scenarios" + +# N1: clean slate +reset_state +run_npmrc N1 +assert_eq "N1 no existing files" "0" "$(jq '[.files[] | select(.exists)] | length' "$RESULTS_DIR/N1.json")" +if [ "$HAVE_NPM" = "1" ]; then + assert_eq "N1 npm_available=true" "true" "$(jq -r '.npm_available' "$RESULTS_DIR/N1.json")" +fi + +# N2: hardcoded + env-ref tokens, plus normal config +reset_state +cat > "$HOME/.npmrc" <<'NPMEOF' +registry=https://registry.npmjs.org/ +//registry.npmjs.org/:_authToken=npm_HARDCODED_TOKEN_LASTFOUR +//npm.example.com/:_authToken=${COMPANY_TOKEN} +strict-ssl=true +NPMEOF +run_npmrc N2 +assert_eq "N2 user file exists" "true" "$(jq -r '.files[] | select(.scope=="user") | .exists' "$RESULTS_DIR/N2.json")" +HARDCODED=$(jq -r '.files[] | select(.scope=="user") | .entries[] | select(.is_auth and (.is_env_ref|not)) | .display_value' "$RESULTS_DIR/N2.json" | head -1) +assert_match "N2 hardcoded token redacted to ***" '^\*\*\*' "$HARDCODED" +ENVREF=$(jq -r '.files[] | select(.scope=="user") | .entries[] | select(.is_env_ref) | .display_value' "$RESULTS_DIR/N2.json" | head -1) +assert_match "N2 env-ref preserved verbatim" 'COMPANY_TOKEN' "$ENVREF" +assert_no_match_in_file "N2 no plaintext token in JSON output" "HARDCODED_TOKEN" "$RESULTS_DIR/N2.json" + +# N3: ca[]= multi-line array +reset_state +cat > "$HOME/.npmrc" <<'NPMEOF' +ca[]=cert-1 +ca[]=cert-2 +NPMEOF +run_npmrc N3 +assert_eq "N3 two ca[] entries flagged is_array=true" "2" "$(jq '.files[] | select(.scope=="user") | [.entries[] | select(.is_array)] | length' "$RESULTS_DIR/N3.json")" + +# N4: git-tracked project file +if [ "$HAVE_GIT" = "1" ]; then + reset_state + mkdir -p "$TEST_SEARCH_DIR/proj-tracked" + (cd "$TEST_SEARCH_DIR/proj-tracked" && git init -q) + echo 'save-exact=true' > "$TEST_SEARCH_DIR/proj-tracked/.npmrc" + (cd "$TEST_SEARCH_DIR/proj-tracked" && git add .npmrc && git -c user.email=t@t -c user.name=t commit -q -m init >/dev/null) + run_npmrc N4 + assert_eq "N4 git_tracked=true" "true" "$(jq -r '.files[] | select(.path | endswith("/proj-tracked/.npmrc")) | .git_tracked' "$RESULTS_DIR/N4.json")" + assert_eq "N4 in_git_repo=true" "true" "$(jq -r '.files[] | select(.path | endswith("/proj-tracked/.npmrc")) | .in_git_repo' "$RESULTS_DIR/N4.json")" +else + skip "N4 git-tracked project file" "git not installed" +fi + +# N5: project .npmrc in non-git dir +reset_state +mkdir -p "$TEST_SEARCH_DIR/proj-untracked" +echo 'save-exact=true' > "$TEST_SEARCH_DIR/proj-untracked/.npmrc" +run_npmrc N5 +assert_eq "N5 non-git project: in_git_repo=false" "false" "$(jq -r '.files[] | select(.path | endswith("/proj-untracked/.npmrc")) | .in_git_repo // false' "$RESULTS_DIR/N5.json")" + +# N6: NPM_CONFIG_USERCONFIG override +reset_state +CUSTOM_NPM="$(mktemp -d -t custom-npm.XXXXXX)" +echo 'audit=false' > "$CUSTOM_NPM/myrc" +NPM_CONFIG_USERCONFIG="$CUSTOM_NPM/myrc" "$BINARY" --npmrc --json --search-dirs "$TEST_SEARCH_DIR" 2>/dev/null > "$RESULTS_DIR/N6.json" +assert_eq "N6 user-scope path == NPM_CONFIG_USERCONFIG" "$CUSTOM_NPM/myrc" "$(jq -r '.files[] | select(.scope=="user") | .path' "$RESULTS_DIR/N6.json")" +rm -rf "$CUSTOM_NPM" + +# N7: npm not on PATH (truly hidden via empty PATH) +reset_state +echo 'registry=https://registry.npmjs.org/' > "$HOME/.npmrc" +PATH=/empty "$BINARY" --npmrc --json --search-dirs "$TEST_SEARCH_DIR" 2>/dev/null > "$RESULTS_DIR/N7.json" +assert_eq "N7 npm_available=false when PATH hides npm" "false" "$(jq -r '.npm_available' "$RESULTS_DIR/N7.json")" +assert_eq "N7 user file still discovered" "true" "$(jq -r '.files[] | select(.scope=="user") | .exists' "$RESULTS_DIR/N7.json")" +assert_eq "N7 no effective view when npm absent" "false" "$(jq 'has("effective")' "$RESULTS_DIR/N7.json")" + +# N8: empty .npmrc +reset_state +: > "$HOME/.npmrc" +run_npmrc N8 +assert_eq "N8 empty user file: 0 entries" "0" "$(jq '.files[] | select(.scope=="user") | (.entries // []) | length' "$RESULTS_DIR/N8.json")" + +# N9: --npmrc --json output shape +reset_state +"$BINARY" --npmrc --json --search-dirs "$TEST_SEARCH_DIR" 2>/dev/null > "$RESULTS_DIR/N9.json" +assert_eq "N9 --npmrc --json has npm_available key" "true" "$(jq 'has("npm_available")' "$RESULTS_DIR/N9.json")" +assert_eq "N9 --npmrc --json scoped to audit only" "false" "$(jq 'has("ide_installations")' "$RESULTS_DIR/N9.json")" + +#============================================================================== +# PIP scenarios +#============================================================================== + +section "PIP scenarios" + +# Helper: assert a finding fired with a given severity. +assert_severity() { + local id="$1" findid="$2" want_sev="$3" desc="$4" + local got + got=$(jq -r --arg id "$findid" '[.findings[]? | select(.id==$id) | .severity] | first // ""' "$RESULTS_DIR/$id.json") + assert_eq "$id $desc" "$want_sev" "$got" +} + +# Helper: assert a count of findings with the given ID. +assert_finding_count() { + local id="$1" findid="$2" wantcount="$3" desc="$4" + local got + got=$(jq --arg id "$findid" '[.findings[]? | select(.id==$id)] | length' "$RESULTS_DIR/$id.json") + assert_eq "$id $desc" "$wantcount" "$got" +} + +# P1: clean slate +reset_state +run_pip P1 +NETRC_ONLY=$(jq '[.findings[]? | select(.id|startswith("pip-")) | select(.id != "pip-netrc-perms" and .id != "pip-netrc-present")] | length == 0' "$RESULTS_DIR/P1.json") +assert_eq "P1 no findings (or netrc-only)" "true" "$NETRC_ONLY" + +# P2: global /etc/pip.conf — requires sudo +if [ "$HAVE_SUDO" = "1" ]; then + reset_state + sudo tee /etc/pip.conf >/dev/null <<'PIPEOF' +[global] +audit-level = moderate +PIPEOF + run_pip P2 + assert_eq "P2 global file discovered" "true" "$(jq -r '[.files[] | select(.layer=="global" and .exists)] | length > 0' "$RESULTS_DIR/P2.json")" + sudo rm -f /etc/pip.conf +else + skip "P2 global /etc/pip.conf discovery" "passwordless sudo unavailable" +fi + +# P3: hardcoded creds in extra-index-url +reset_state +cat > "$HOME/.config/pip/pip.conf" <<'PIPEOF' +[global] +extra-index-url = https://__token__:secret_value_xyz_LASTFOUR@my-private.example.com/simple +PIPEOF +chmod 0644 "$HOME/.config/pip/pip.conf" +run_pip P3 +assert_severity P3 "pip-001" "CRITICAL" "pip-001 is CRITICAL on hardcoded creds" +assert_severity P3 "pip-005" "HIGH" "pip-005 is HIGH (extra-index-url presence)" +assert_severity P3 "pip-022" "HIGH" "pip-022 is HIGH (creds + group/other readable)" +assert_no_match_in_file "P3 no plaintext secret anywhere in JSON" "secret_value_xyz" "$RESULTS_DIR/P3.json" + +# P4: index-url with http:// +reset_state +cat > "$HOME/.config/pip/pip.conf" <<'PIPEOF' +[global] +index-url = http://internal.example.com/simple +PIPEOF +run_pip P4 +assert_severity P4 "pip-006" "HIGH" "pip-006 fires for http:// index-url" + +# P5: extra-index-url with http:// +reset_state +cat > "$HOME/.config/pip/pip.conf" <<'PIPEOF' +[global] +extra-index-url = http://other.example.com/simple +PIPEOF +run_pip P5 +assert_severity P5 "pip-002" "CRITICAL" "pip-002 fires for http:// extra-index-url" +assert_severity P5 "pip-005" "HIGH" "pip-005 also fires" + +# P6: trusted-host with two values +reset_state +cat > "$HOME/.config/pip/pip.conf" <<'PIPEOF' +[global] +trusted-host = + a.example.com + b.example.com +PIPEOF +run_pip P6 +assert_finding_count P6 "pip-007" "2" "pip-007 fires once per trusted host" + +# P7: build-integrity dial-downs +reset_state +cat > "$HOME/.config/pip/pip.conf" <<'PIPEOF' +[global] +no-build-isolation = true +cache-dir = /tmp/pipcache + +[install] +no-binary = :all: +PIPEOF +run_pip P7 +assert_severity P7 "pip-011" "MEDIUM" "pip-011 no-build-isolation" +assert_severity P7 "pip-012" "MEDIUM" "pip-012 no-binary=:all:" +assert_severity P7 "pip-013" "MEDIUM" "pip-013 cache-dir under /tmp" + +# P8: positive controls +reset_state +cat > "$HOME/.config/pip/pip.conf" <<'PIPEOF' +[install] +require-hashes = true +only-binary = :all: +PIPEOF +run_pip P8 +assert_severity P8 "pip-023" "INFO" "pip-023 require-hashes (positive)" +assert_severity P8 "pip-024" "INFO" "pip-024 only-binary=:all: (positive)" + +# P9: proxy with embedded creds +reset_state +cat > "$HOME/.config/pip/pip.conf" <<'PIPEOF' +[global] +proxy = http://proxyuser:proxypass@proxy.example.com:8080 +PIPEOF +run_pip P9 +assert_severity P9 "pip-003" "CRITICAL" "pip-003 proxy creds" + +# P10: cert + client-cert +reset_state +cat > "$HOME/.config/pip/pip.conf" <<'PIPEOF' +[global] +cert = /etc/ssl/certs/custom-ca.crt +client-cert = /home/user/client.pem +PIPEOF +run_pip P10 +assert_severity P10 "pip-009" "MEDIUM" "pip-009 custom CA" +assert_severity P10 "pip-010" "MEDIUM" "pip-010 client-cert" + +# P11: low-severity informational +reset_state +cat > "$HOME/.config/pip/pip.conf" <<'PIPEOF' +[global] +keyring-provider = disabled +no-cache-dir = true +pre = true +PIPEOF +run_pip P11 +assert_severity P11 "pip-016" "LOW" "pip-016 keyring-disabled" +assert_severity P11 "pip-017" "LOW" "pip-017 no-cache-dir" +assert_severity P11 "pip-018" "LOW" "pip-018 pre" + +# P12: legacy ~/.pip/pip.conf in use +reset_state +mkdir -p "$HOME/.pip" +cat > "$HOME/.pip/pip.conf" <<'PIPEOF' +[global] +index-url = https://pypi.org/simple +PIPEOF +run_pip P12 +assert_severity P12 "pip-019" "LOW" "pip-019 fires by path suffix even when pip-debug labels it 'user'" + +# P13: PIP_CONFIG_FILE redirect +reset_state +echo '[global]' > /tmp/redirect.conf +echo 'audit = false' >> /tmp/redirect.conf +run_pip P13 PIP_CONFIG_FILE=/tmp/redirect.conf +assert_severity P13 "pip-020" "MEDIUM" "pip-020 PIP_CONFIG_FILE redirect" +rm -f /tmp/redirect.conf + +# P14: PIP_CONFIG_FILE=/dev/null disables config-file load +reset_state +run_pip P14 PIP_CONFIG_FILE=/dev/null +assert_severity P14 "pip-021" "MEDIUM" "pip-021 PIP_CONFIG_FILE=/dev/null" + +# P15: VIRTUAL_ENV picked up as site scope +reset_state +TEST_VENV="$(mktemp -d -t test-venv.XXXXXX)" +cat > "$TEST_VENV/pip.conf" <<'PIPEOF' +[global] +require-hashes = true +PIPEOF +run_pip P15 VIRTUAL_ENV="$TEST_VENV" +assert_eq "P15 VIRTUAL_ENV pip.conf surfaces under site scope" "1" "$(jq -r '[.files[] | select(.layer=="site" and .exists)] | length' "$RESULTS_DIR/P15.json")" +rm -rf "$TEST_VENV" + +# P16: severity ordering — critical first +reset_state +cat > "$HOME/.config/pip/pip.conf" <<'PIPEOF' +[global] +extra-index-url = https://__token__:s3cr3t_LASTFOUR@host.example.com/simple +trusted-host = host.example.com +no-build-isolation = true + +[install] +require-hashes = true +PIPEOF +run_pip P16 +FIRST_SEV=$(jq -r '.findings[0].severity' "$RESULTS_DIR/P16.json") +LAST_SEV=$(jq -r '.findings[-1].severity' "$RESULTS_DIR/P16.json") +assert_eq "P16 first finding is CRITICAL" "CRITICAL" "$FIRST_SEV" +assert_eq "P16 last finding is INFO" "INFO" "$LAST_SEV" + +# P17: file mode 0666 with creds → HIGH escalation +reset_state +cat > "$HOME/.config/pip/pip.conf" <<'PIPEOF' +[global] +extra-index-url = https://__token__:another_secret_LASTFOUR@host.example.com/simple +PIPEOF +chmod 0666 "$HOME/.config/pip/pip.conf" +run_pip P17 +assert_severity P17 "pip-022" "HIGH" "pip-022 HIGH on creds+0666" + +# P18: ~/.netrc 0644 — pip-netrc-perms MEDIUM +reset_state +echo 'machine pypi.org login user password p' > "$HOME/.netrc" +chmod 0644 "$HOME/.netrc" +run_pip P18 +assert_severity P18 "pip-netrc-perms" "MEDIUM" "pip-netrc-perms MEDIUM at 0644" + +# P19: ~/.netrc 0600 — pip-netrc-present INFO +reset_state +echo 'machine pypi.org login user password p' > "$HOME/.netrc" +chmod 0600 "$HOME/.netrc" +run_pip P19 +assert_severity P19 "pip-netrc-present" "INFO" "pip-netrc-present INFO at 0600" + +# P20: pip not on PATH — file-only audit, findings still fire +reset_state +cat > "$HOME/.config/pip/pip.conf" <<'PIPEOF' +[global] +extra-index-url = http://no-pip.example.com/simple +PIPEOF +PATH=/empty "$BINARY" --pipconfig --json --search-dirs "$TEST_SEARCH_DIR" 2>/dev/null > "$RESULTS_DIR/P20.json" +assert_eq "P20 pip_available=false when PATH hides pip" "false" "$(jq -r '.pip_available' "$RESULTS_DIR/P20.json")" +assert_eq "P20 pip-002 still fires when pip absent" "1" "$(jq '[.findings[]? | select(.id=="pip-002")] | length' "$RESULTS_DIR/P20.json")" + +#============================================================================== +# Cross-cutting +#============================================================================== + +section "Cross-cutting" + +reset_state +"$BINARY" --pretty --color=never --search-dirs "$TEST_SEARCH_DIR" 2>/dev/null > "$RESULTS_DIR/X1.out" +assert_match "X1 --pretty has NPM CONFIG AUDIT block" "NPM CONFIG AUDIT" "$(cat "$RESULTS_DIR/X1.out")" +assert_match "X1 --pretty has PIP CONFIG AUDIT block" "PIP CONFIG AUDIT" "$(cat "$RESULTS_DIR/X1.out")" + +"$BINARY" --json --search-dirs "$TEST_SEARCH_DIR" 2>/dev/null > "$RESULTS_DIR/X2.json" +assert_eq "X2 --json has npmrc_audit" "true" "$(jq 'has("npmrc_audit")' "$RESULTS_DIR/X2.json")" +assert_eq "X2 --json has pip_audit" "true" "$(jq 'has("pip_audit")' "$RESULTS_DIR/X2.json")" +assert_eq "X2 --json still has ide_installations (no regression)" "true" "$(jq 'has("ide_installations")' "$RESULTS_DIR/X2.json")" + +"$BINARY" --pipconfig --json --search-dirs "$TEST_SEARCH_DIR" 2>/dev/null > "$RESULTS_DIR/X3.json" +assert_eq "X3 --pipconfig --json has pip_available" "true" "$(jq 'has("pip_available")' "$RESULTS_DIR/X3.json")" +assert_eq "X3 --pipconfig --json scoped (no ide_installations)" "false" "$(jq 'has("ide_installations")' "$RESULTS_DIR/X3.json")" + +#============================================================================== +# Edge cases +#============================================================================== + +section "Edge cases" + +# E1: pip.conf with mode 000 (unreadable) +reset_state +cat > "$HOME/.config/pip/pip.conf" <<'PIPEOF' +[global] +audit = false +PIPEOF +chmod 000 "$HOME/.config/pip/pip.conf" +run_pip E1 +chmod 0644 "$HOME/.config/pip/pip.conf" +assert_eq "E1 exists=true even when unreadable" "true" "$(jq -r "$USER_PIP | .exists" "$RESULTS_DIR/E1.json")" +assert_eq "E1 readable=false when mode 000" "false" "$(jq -r "$USER_PIP | .readable" "$RESULTS_DIR/E1.json")" +assert_match "E1 parse_error mentions 'read'" "read" "$(jq -r "$USER_PIP | .parse_error // \"\"" "$RESULTS_DIR/E1.json")" + +# E2: garbage bytes — verify run completes with valid JSON +reset_state +printf '\x00\x01\x02 garbage \xff\nrandom-key=value\n[realsection]\nkey=val\n' > "$HOME/.config/pip/pip.conf" +run_pip E2 +if jq empty "$RESULTS_DIR/E2.json" 2>/dev/null; then + pass "E2 run completes with valid JSON on garbage input" +else + fail "E2 run completes with valid JSON on garbage input" "JSON parse failed" +fi + +# E3: pip.conf path is a directory, not a file +reset_state +mkdir -p "$HOME/.config/pip/pip.conf" +run_pip E3 +assert_match "E3 parse_error says directory" "directory" "$(jq -r "$USER_PIP | .parse_error // \"\"" "$RESULTS_DIR/E3.json")" +rmdir "$HOME/.config/pip/pip.conf" 2>/dev/null + +# E4: pip.conf is a symlink to a real file +reset_state +LINK_TARGET="$(mktemp -t real-pipconf.XXXXXX)" +cat > "$LINK_TARGET" <<'PIPEOF' +[global] +audit-level = moderate +PIPEOF +ln -sf "$LINK_TARGET" "$HOME/.config/pip/pip.conf" +run_pip E4 +assert_eq "E4 symlinked file: exists=true" "true" "$(jq -r "$USER_PIP | .exists" "$RESULTS_DIR/E4.json")" +assert_eq "E4 symlinked file: readable=true" "true" "$(jq -r "$USER_PIP | .readable" "$RESULTS_DIR/E4.json")" +assert_eq "E4 parsed entries from symlink target" "1" "$(jq "$USER_PIP | (.sections // []) | [.[].entries[]] | length" "$RESULTS_DIR/E4.json")" +rm -f "$LINK_TARGET" + +#============================================================================== +# Summary +#============================================================================== + +printf "\n${BOLD}Summary:${RESET} ${GREEN}PASS=%d${RESET} ${RED}FAIL=%d${RESET} ${YELLOW}SKIP=%d${RESET}\n" "$PASS_COUNT" "$FAIL_COUNT" "$SKIP_COUNT" +exit "$FAIL_COUNT" From 5227516e1dc27a57b6d422f3d1d8a9f896e27d35 Mon Sep 17 00:00:00 2001 From: Swarit Pandey Date: Thu, 7 May 2026 15:54:57 +0530 Subject: [PATCH 6/6] fix(pipconfig): handle pip 24.x effective output with no 'from ' suffix --- internal/detector/configaudit/pipconfig.go | 19 +++++---- .../detector/configaudit/pipconfig_test.go | 39 +++++++++++++++++++ internal/output/pipconfig_verbose.go | 2 +- 3 files changed, 52 insertions(+), 8 deletions(-) diff --git a/internal/detector/configaudit/pipconfig.go b/internal/detector/configaudit/pipconfig.go index dc4ced3..073dcbf 100644 --- a/internal/detector/configaudit/pipconfig.go +++ b/internal/detector/configaudit/pipconfig.go @@ -469,15 +469,20 @@ func (d *PipConfigDetector) captureEffective(ctx context.Context) (*model.PipEff if !pipConfigListPrefix.MatchString(line) { continue } - // Split on the literal ` from ` boundary that separates the - // quoted value from the source. - // e.g. global.index-url='https://...' from /path/file - idx := strings.Index(line, "' from ") - if idx < 0 { + // Two shapes seen across pip versions: + // global.index-url='https://...' from /path/file (older) + // global.index-url='https://...' (pip 24.x) + // Split off the optional ` from ` trailer; fall back to + // the closing quote when it isn't there. + var header, source string + if idx := strings.Index(line, "' from "); idx >= 0 { + header = line[:idx] + source = strings.TrimSpace(line[idx+len("' from "):]) + } else if idx := strings.LastIndex(line, "'"); idx > 0 { + header = line[:idx] + } else { continue } - header := line[:idx] - source := strings.TrimSpace(line[idx+len("' from "):]) // header is `
.='` — strip the `
.=` prefix. eq := strings.IndexByte(header, '=') diff --git a/internal/detector/configaudit/pipconfig_test.go b/internal/detector/configaudit/pipconfig_test.go index 29fa254..1bd7466 100644 --- a/internal/detector/configaudit/pipconfig_test.go +++ b/internal/detector/configaudit/pipconfig_test.go @@ -245,6 +245,45 @@ install.no-build-isolation='true' from PIP_NO_BUILD_ISOLATION } } +// TestParseEffective_NoSourceSuffix locks in pip 24.x output where +// `pip config list -v` no longer emits the trailing ` from ` on +// each line. The parser must still capture the section.key=value, just +// with an empty SourceByKey entry. +// +// Reason: pip 24.3.1 on Fedora 42 emits lines like +// global.index-url='https://pypi.org/simple/' +// (no source). Earlier pip versions added ` from /etc/pip.conf`. +func TestParseEffective_NoSourceSuffix(t *testing.T) { + mock := executor.NewMock() + mock.SetPath("pip", "/usr/bin/pip") + mock.SetCommand("pip 24.3.1\n", "", 0, "pip", "--version") + mock.SetCommand("", "", 0, "pip", "config", "debug") + out := "For variant 'global', will try loading '/etc/pip.conf'\n" + + "global.index-url='https://pypi.org/simple/'\n" + + "global.timeout='60'\n" + mock.SetCommand(out, "", 0, "pip", "config", "list", "-v") + + d := NewPipConfigDetector(mock) + d.ownerLookup = fixedPipOwner() + d.gitTracked = func(_ context.Context, _ string) bool { return false } + d.inGitRepo = func(_ string) bool { return false } + + audit := d.Detect(context.Background(), nil) + if audit.Effective == nil { + t.Fatal("effective view nil") + } + if got := audit.Effective.Config["global.index-url"]; got != "https://pypi.org/simple/" { + t.Errorf("global.index-url = %q, want pypi.org", got) + } + if got := audit.Effective.Config["global.timeout"]; got != "60" { + t.Errorf("global.timeout = %q, want 60", got) + } + // Source unknown — must be empty string, not the line tail. + if got := audit.Effective.SourceByKey["global.index-url"]; got != "" { + t.Errorf("source for global.index-url = %q, want empty", got) + } +} + // TestParseEffective_RedactsEmbeddedCreds locks in the bug fix where // `pip config list -v` emits URL values verbatim — including any // embedded `user:pass@host` userinfo — and we used to copy them into diff --git a/internal/output/pipconfig_verbose.go b/internal/output/pipconfig_verbose.go index 2d37399..e0b4057 100644 --- a/internal/output/pipconfig_verbose.go +++ b/internal/output/pipconfig_verbose.go @@ -208,7 +208,7 @@ func printPipEffective(w io.Writer, c *colors, eff *model.PipEffective) { for _, k := range keys { src := eff.SourceByKey[k] if src == "" { - src = "default" + src = "(source not reported by pip)" } fmt.Fprintf(w, "│ %-40s = %s%s%s %sfrom %s%s\n", k, c.dim, eff.Config[k], c.reset, c.dim, src, c.reset)