diff --git a/cmd/stepsecurity-dev-machine-guard/main.go b/cmd/stepsecurity-dev-machine-guard/main.go index 3a6742f..69f0375 100644 --- a/cmd/stepsecurity-dev-machine-guard/main.go +++ b/cmd/stepsecurity-dev-machine-guard/main.go @@ -136,6 +136,18 @@ func main() { os.Exit(1) } + // On Linux, systemd.Install enabled the timer but did not start it. + // Start it now that the inline scan above has released the singleton + // lock, so the timer's first (Persistent=true catch-up) firing does + // not race with that scan (issue #62). Best-effort: a failure here + // means scheduled scans won't run until the next user-systemd + // reload, but the install is otherwise complete. + if runtime.GOOS == "linux" { + if err := systemd.StartTimer(exec, log); err != nil { + log.Progress("warning: timer start failed (%v) — scheduled scans will resume after the next user-systemd reload", err) + } + } + case "uninstall": _, _ = fmt.Fprintf(os.Stdout, "StepSecurity Dev Machine Guard v%s\n\n", buildinfo.Version) switch runtime.GOOS { diff --git a/internal/device/device_test.go b/internal/device/device_test.go index 76832b0..fad1658 100644 --- a/internal/device/device_test.go +++ b/internal/device/device_test.go @@ -2,6 +2,7 @@ package device import ( "context" + "errors" "testing" "github.com/step-security/dev-machine-guard/internal/executor" @@ -161,6 +162,50 @@ func TestGather_LinuxDistroOnly(t *testing.T) { } } +// TestGather_NoConsoleUser_DoesNotLeakRoot covers issue #63: when running as +// a daemon on macOS and LoggedInUser() reports an error (no GUI console +// user), getDeveloperIdentity must not silently fall back to the process's +// own user (which under a LaunchDaemon is "root"). Identity should resolve +// to "unknown" so the telemetry payload's no_user_logged_in flag fires. +func TestGather_NoConsoleUser_DoesNotLeakRoot(t *testing.T) { + mock := executor.NewMock() + mock.SetGOOS("darwin") + mock.SetIsRoot(true) + mock.SetUsername("root") // process-owner identity, the trap + mock.SetHostname("mac") + mock.SetCommand("SERIAL\n \"IOPlatformSerialNumber\" = \"SERIAL\"\n", "", 0, "ioreg", "-l") + mock.SetCommand("15.1\n", "", 0, "sw_vers", "-productVersion") + mock.SetLoggedInUserError(errors.New("no GUI console user (owner=\"_windowserver\")")) + + dev := Gather(context.Background(), mock) + + if dev.UserIdentity != "unknown" { + t.Errorf("user_identity: expected 'unknown' (no console user), got %q — root leak from CurrentUser fallback", dev.UserIdentity) + } +} + +// TestGather_NoConsoleUser_EnvVarRescues asserts that the env-var path in +// getDeveloperIdentity still works when LoggedInUser would error. Operators +// who populate USER_EMAIL/DEVELOPER_EMAIL in the daemon plist should still +// get correct identity attribution even on no-console-user hosts. +func TestGather_NoConsoleUser_EnvVarRescues(t *testing.T) { + mock := executor.NewMock() + mock.SetGOOS("darwin") + mock.SetIsRoot(true) + mock.SetUsername("root") + mock.SetHostname("mac") + mock.SetCommand("SERIAL\n \"IOPlatformSerialNumber\" = \"SERIAL\"\n", "", 0, "ioreg", "-l") + mock.SetCommand("15.1\n", "", 0, "sw_vers", "-productVersion") + mock.SetLoggedInUserError(errors.New("no console user")) + mock.SetEnv("USER_EMAIL", "operator@example.com") + + dev := Gather(context.Background(), mock) + + if dev.UserIdentity != "operator@example.com" { + t.Errorf("user_identity: expected USER_EMAIL value, got %q", dev.UserIdentity) + } +} + func TestGather_Windows(t *testing.T) { mock := executor.NewMock() mock.SetGOOS("windows") diff --git a/internal/executor/executor.go b/internal/executor/executor.go index 0068625..d411964 100644 --- a/internal/executor/executor.go +++ b/internal/executor/executor.go @@ -175,23 +175,32 @@ func (r *Real) LoggedInUser() (*user.User, error) { return r.CurrentUser() } - // On macOS running as root, detect the console user. - // This mirrors the bash script's get_logged_in_user_info() which uses - // stat -f%Su /dev/console to find who is actually logged in. + // On macOS running as root, detect the console user via + // stat -f%Su /dev/console (mirrors the bash script's + // get_logged_in_user_info()). + // + // When the lookup can't yield a real GUI user — stat errored, console + // is owned by root or _windowserver, or the resolved name doesn't + // exist in Directory Services — we must NOT fall back to + // r.CurrentUser(). Under a LaunchDaemon that returns the root user + // with err == nil, callers treat that as "root is the developer", and + // the telemetry pipeline ships user_identity="root" with + // no_user_logged_in=false (issue #63). Surface the absence as an + // error so callers can flag the run as no-user instead. ctx := context.Background() stdout, _, _, err := r.Run(ctx, "stat", "-f%Su", "/dev/console") if err != nil { - return r.CurrentUser() + return nil, fmt.Errorf("stat /dev/console failed: %w", err) } username := strings.TrimSpace(stdout) if username == "" || username == "root" || username == "_windowserver" { - return r.CurrentUser() + return nil, fmt.Errorf("no GUI console user (owner=%q)", username) } u, err := user.Lookup(username) if err != nil { - return r.CurrentUser() + return nil, fmt.Errorf("console user %q not in directory services: %w", username, err) } return u, nil diff --git a/internal/executor/mock.go b/internal/executor/mock.go index 6d27d94..1edeaf4 100644 --- a/internal/executor/mock.go +++ b/internal/executor/mock.go @@ -39,6 +39,11 @@ type Mock struct { // Symlink stubs: path -> resolved target symlinks map[string]string + + // LoggedInUser override — when set, LoggedInUser returns (nil, err) + // instead of falling through to CurrentUser. Used by tests covering + // the macOS+root "no console user" branch (issue #63). + loggedInUserErr error } type cmdResult struct { @@ -164,6 +169,14 @@ func (m *Mock) SetGOOS(goos string) { m.goos = goos } +// SetLoggedInUserError makes LoggedInUser return (nil, err). Pass nil to +// reset to default behavior (delegating to CurrentUser). +func (m *Mock) SetLoggedInUserError(err error) { + m.mu.Lock() + defer m.mu.Unlock() + m.loggedInUserErr = err +} + // --- Executor interface --- func (m *Mock) Run(_ context.Context, name string, args ...string) (string, string, int, error) { @@ -285,6 +298,12 @@ func (m *Mock) Glob(pattern string) ([]string, error) { } func (m *Mock) LoggedInUser() (*user.User, error) { + m.mu.RLock() + err := m.loggedInUserErr + m.mu.RUnlock() + if err != nil { + return nil, err + } return m.CurrentUser() } diff --git a/internal/systemd/systemd.go b/internal/systemd/systemd.go index 42dc132..3a15d37 100644 --- a/internal/systemd/systemd.go +++ b/internal/systemd/systemd.go @@ -78,7 +78,13 @@ func Install(exec executor.Executor, log *progress.Logger) error { return fmt.Errorf("daemon-reload failed (exit code %d): %s", daemonExitCode, daemonStderr) } - _, stderr, exitCode, err := exec.Run(ctx, "systemctl", "--user", "enable", "--now", unitName+".timer") + // Enable (without --now) so the unit is loaded across reboots. Activating + // the timer in this session is deferred to StartTimer, which the install + // command calls only after its inline post-install telemetry has released + // the singleton lock. If we used --now here, the timer's Persistent=true + + // already-elapsed OnBootSec would fire the service immediately and race + // with that inline run on the lockfile (issue #62). + _, stderr, exitCode, err := exec.Run(ctx, "systemctl", "--user", "enable", unitName+".timer") if err != nil { return fmt.Errorf("failed to enable timer: %w", err) } @@ -96,6 +102,27 @@ func Install(exec executor.Executor, log *progress.Logger) error { return nil } +// StartTimer activates the timer that Install enabled. Split out from Install +// so the install command can run its inline post-install telemetry first +// (and release the singleton lock) before the timer is allowed to fire its +// own first invocation. With Persistent=true on the timer unit, this start +// will trigger one immediate catch-up scan via the service unit — that's +// fine because the inline scan has already completed. +func StartTimer(exec executor.Executor, log *progress.Logger) error { + ctx := context.Background() + + _, stderr, exitCode, err := exec.Run(ctx, "systemctl", "--user", "start", unitName+".timer") + if err != nil { + return fmt.Errorf("failed to start timer: %w", err) + } + if exitCode != 0 { + return fmt.Errorf("failed to start timer (exit code %d): %s", exitCode, stderr) + } + + log.Progress("Timer started") + return nil +} + // Uninstall removes the systemd user timer and service units. func Uninstall(exec executor.Executor, log *progress.Logger) error { ctx := context.Background() diff --git a/internal/systemd/systemd_test.go b/internal/systemd/systemd_test.go new file mode 100644 index 0000000..59a7d38 --- /dev/null +++ b/internal/systemd/systemd_test.go @@ -0,0 +1,39 @@ +package systemd + +import ( + "strings" + "testing" + + "github.com/step-security/dev-machine-guard/internal/executor" + "github.com/step-security/dev-machine-guard/internal/progress" +) + +// TestStartTimer_IssuesPlainStart asserts the timer-activation command does +// not carry --now (which would be redundant for `start`) and targets the +// expected unit name. Deliberately separate from Install so the install-time +// race fix (issue #62) — enable without --now, then StartTimer after the +// inline scan — stays locked in. +func TestStartTimer_IssuesPlainStart(t *testing.T) { + mock := executor.NewMock() + mock.SetCommand("", "", 0, "systemctl", "--user", "start", unitName+".timer") + + if err := StartTimer(mock, progress.NewLogger(progress.LevelInfo)); err != nil { + t.Fatalf("StartTimer returned error: %v", err) + } +} + +// TestStartTimer_PropagatesFailure asserts the function surfaces a non-zero +// systemctl exit so the install command can warn the operator. +func TestStartTimer_PropagatesFailure(t *testing.T) { + mock := executor.NewMock() + mock.SetCommand("", "Failed to start: Unit not found", 1, + "systemctl", "--user", "start", unitName+".timer") + + err := StartTimer(mock, progress.NewLogger(progress.LevelInfo)) + if err == nil { + t.Fatal("expected error from non-zero systemctl exit, got nil") + } + if !strings.Contains(err.Error(), "exit code 1") { + t.Errorf("error should reference the exit code: %v", err) + } +} diff --git a/internal/telemetry/telemetry.go b/internal/telemetry/telemetry.go index 82a9e72..7bfa423 100644 --- a/internal/telemetry/telemetry.go +++ b/internal/telemetry/telemetry.go @@ -514,16 +514,23 @@ func Run(exec executor.Executor, log *progress.Logger, cfg *cli.Config) (err err // Build payload payload := &Payload{ - CustomerID: config.CustomerID, - DeviceID: dev.SerialNumber, - SerialNumber: dev.SerialNumber, - UserIdentity: dev.UserIdentity, - Hostname: dev.Hostname, - Platform: dev.Platform, - OSVersion: dev.OSVersion, - AgentVersion: buildinfo.Version, - CollectedAt: endTime.Unix(), - NoUserLoggedIn: dev.UserIdentity == "" || dev.UserIdentity == "unknown", + CustomerID: config.CustomerID, + DeviceID: dev.SerialNumber, + SerialNumber: dev.SerialNumber, + UserIdentity: dev.UserIdentity, + Hostname: dev.Hostname, + Platform: dev.Platform, + OSVersion: dev.OSVersion, + AgentVersion: buildinfo.Version, + CollectedAt: endTime.Unix(), + // Treat a daemon-context UserIdentity == "root" as "no real user" + // too. Even with the executor.LoggedInUser() fix (issue #63), this + // catches any other code path that ends up shipping "root" from + // under a LaunchDaemon (e.g., env-var path returning literal "root", + // future regressions in detection). + NoUserLoggedIn: dev.UserIdentity == "" || + dev.UserIdentity == "unknown" || + (dev.UserIdentity == "root" && exec.IsRoot()), IDEExtensions: extensions, IDEInstallations: ides,