step-security · swarit-stepsecurity · May 5, 2026
diff --git a/cmd/stepsecurity-dev-machine-guard/main.go b/cmd/stepsecurity-dev-machine-guard/main.go
@@ -136,6 +136,18 @@ func main() {
 			os.Exit(1)
 		}
 
+		// On Linux, systemd.Install enabled the timer but did not start it.
+		// Start it now that the inline scan above has released the singleton
+		// lock, so the timer's first (Persistent=true catch-up) firing does
+		// not race with that scan (issue #62). Best-effort: a failure here
+		// means scheduled scans won't run until the next user-systemd
+		// reload, but the install is otherwise complete.
+		if runtime.GOOS == "linux" {
+			if err := systemd.StartTimer(exec, log); err != nil {
+				log.Progress("warning: timer start failed (%v) — scheduled scans will resume after the next user-systemd reload", err)
+			}
+		}
+
 	case "uninstall":
 		_, _ = fmt.Fprintf(os.Stdout, "StepSecurity Dev Machine Guard v%s\n\n", buildinfo.Version)
 		switch runtime.GOOS {

diff --git a/internal/device/device_test.go b/internal/device/device_test.go
@@ -2,6 +2,7 @@ package device
 
 import (
 	"context"
+	"errors"
 	"testing"
 
 	"github.com/step-security/dev-machine-guard/internal/executor"
@@ -161,6 +162,50 @@ func TestGather_LinuxDistroOnly(t *testing.T) {
 	}
 }
 
+// TestGather_NoConsoleUser_DoesNotLeakRoot covers issue #63: when running as
+// a daemon on macOS and LoggedInUser() reports an error (no GUI console
+// user), getDeveloperIdentity must not silently fall back to the process's
+// own user (which under a LaunchDaemon is "root"). Identity should resolve
+// to "unknown" so the telemetry payload's no_user_logged_in flag fires.
+func TestGather_NoConsoleUser_DoesNotLeakRoot(t *testing.T) {
+	mock := executor.NewMock()
+	mock.SetGOOS("darwin")
+	mock.SetIsRoot(true)
+	mock.SetUsername("root") // process-owner identity, the trap
+	mock.SetHostname("mac")
+	mock.SetCommand("SERIAL\n    \"IOPlatformSerialNumber\" = \"SERIAL\"\n", "", 0, "ioreg", "-l")
+	mock.SetCommand("15.1\n", "", 0, "sw_vers", "-productVersion")
+	mock.SetLoggedInUserError(errors.New("no GUI console user (owner=\"_windowserver\")"))
+
+	dev := Gather(context.Background(), mock)
+
+	if dev.UserIdentity != "unknown" {
+		t.Errorf("user_identity: expected 'unknown' (no console user), got %q — root leak from CurrentUser fallback", dev.UserIdentity)
+	}
+}
+
+// TestGather_NoConsoleUser_EnvVarRescues asserts that the env-var path in
+// getDeveloperIdentity still works when LoggedInUser would error. Operators
+// who populate USER_EMAIL/DEVELOPER_EMAIL in the daemon plist should still
+// get correct identity attribution even on no-console-user hosts.
+func TestGather_NoConsoleUser_EnvVarRescues(t *testing.T) {
+	mock := executor.NewMock()
+	mock.SetGOOS("darwin")
+	mock.SetIsRoot(true)
+	mock.SetUsername("root")
+	mock.SetHostname("mac")
+	mock.SetCommand("SERIAL\n    \"IOPlatformSerialNumber\" = \"SERIAL\"\n", "", 0, "ioreg", "-l")
+	mock.SetCommand("15.1\n", "", 0, "sw_vers", "-productVersion")
+	mock.SetLoggedInUserError(errors.New("no console user"))
+	mock.SetEnv("USER_EMAIL", "operator@example.com")
+
+	dev := Gather(context.Background(), mock)
+
+	if dev.UserIdentity != "operator@example.com" {
+		t.Errorf("user_identity: expected USER_EMAIL value, got %q", dev.UserIdentity)
+	}
+}
+
 func TestGather_Windows(t *testing.T) {
 	mock := executor.NewMock()
 	mock.SetGOOS("windows")

diff --git a/internal/executor/executor.go b/internal/executor/executor.go
@@ -175,23 +175,32 @@ func (r *Real) LoggedInUser() (*user.User, error) {
 		return r.CurrentUser()
 	}
 
-	// On macOS running as root, detect the console user.
-	// This mirrors the bash script's get_logged_in_user_info() which uses
-	// stat -f%Su /dev/console to find who is actually logged in.
+	// On macOS running as root, detect the console user via
+	// stat -f%Su /dev/console (mirrors the bash script's
+	// get_logged_in_user_info()).
+	//
+	// When the lookup can't yield a real GUI user — stat errored, console
+	// is owned by root or _windowserver, or the resolved name doesn't
+	// exist in Directory Services — we must NOT fall back to
+	// r.CurrentUser(). Under a LaunchDaemon that returns the root user
+	// with err == nil, callers treat that as "root is the developer", and
+	// the telemetry pipeline ships user_identity="root" with
+	// no_user_logged_in=false (issue #63). Surface the absence as an
+	// error so callers can flag the run as no-user instead.
 	ctx := context.Background()
 	stdout, _, _, err := r.Run(ctx, "stat", "-f%Su", "/dev/console")
 	if err != nil {
-		return r.CurrentUser()
+		return nil, fmt.Errorf("stat /dev/console failed: %w", err)
 	}
 
 	username := strings.TrimSpace(stdout)
 	if username == "" || username == "root" || username == "_windowserver" {
-		return r.CurrentUser()
+		return nil, fmt.Errorf("no GUI console user (owner=%q)", username)
 	}
 
 	u, err := user.Lookup(username)
 	if err != nil {
-		return r.CurrentUser()
+		return nil, fmt.Errorf("console user %q not in directory services: %w", username, err)
 	}
 
 	return u, nil

diff --git a/internal/executor/mock.go b/internal/executor/mock.go
@@ -39,6 +39,11 @@ type Mock struct {
 
 	// Symlink stubs: path -> resolved target
 	symlinks map[string]string
+
+	// LoggedInUser override — when set, LoggedInUser returns (nil, err)
+	// instead of falling through to CurrentUser. Used by tests covering
+	// the macOS+root "no console user" branch (issue #63).
+	loggedInUserErr error
 }
 
 type cmdResult struct {
@@ -164,6 +169,14 @@ func (m *Mock) SetGOOS(goos string) {
 	m.goos = goos
 }
 
+// SetLoggedInUserError makes LoggedInUser return (nil, err). Pass nil to
+// reset to default behavior (delegating to CurrentUser).
+func (m *Mock) SetLoggedInUserError(err error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.loggedInUserErr = err
+}
+
 // --- Executor interface ---
 
 func (m *Mock) Run(_ context.Context, name string, args ...string) (string, string, int, error) {
@@ -285,6 +298,12 @@ func (m *Mock) Glob(pattern string) ([]string, error) {
 }
 
 func (m *Mock) LoggedInUser() (*user.User, error) {
+	m.mu.RLock()
+	err := m.loggedInUserErr
+	m.mu.RUnlock()
+	if err != nil {
+		return nil, err
+	}
 	return m.CurrentUser()
 }
 

diff --git a/internal/systemd/systemd.go b/internal/systemd/systemd.go
@@ -78,7 +78,13 @@ func Install(exec executor.Executor, log *progress.Logger) error {
 		return fmt.Errorf("daemon-reload failed (exit code %d): %s", daemonExitCode, daemonStderr)
 	}
 
-	_, stderr, exitCode, err := exec.Run(ctx, "systemctl", "--user", "enable", "--now", unitName+".timer")
+	// Enable (without --now) so the unit is loaded across reboots. Activating
+	// the timer in this session is deferred to StartTimer, which the install
+	// command calls only after its inline post-install telemetry has released
+	// the singleton lock. If we used --now here, the timer's Persistent=true +
+	// already-elapsed OnBootSec would fire the service immediately and race
+	// with that inline run on the lockfile (issue #62).
+	_, stderr, exitCode, err := exec.Run(ctx, "systemctl", "--user", "enable", unitName+".timer")
 	if err != nil {
 		return fmt.Errorf("failed to enable timer: %w", err)
 	}
@@ -96,6 +102,27 @@ func Install(exec executor.Executor, log *progress.Logger) error {
 	return nil
 }
 
+// StartTimer activates the timer that Install enabled. Split out from Install
+// so the install command can run its inline post-install telemetry first
+// (and release the singleton lock) before the timer is allowed to fire its
+// own first invocation. With Persistent=true on the timer unit, this start
+// will trigger one immediate catch-up scan via the service unit — that's
+// fine because the inline scan has already completed.
+func StartTimer(exec executor.Executor, log *progress.Logger) error {
+	ctx := context.Background()
+
+	_, stderr, exitCode, err := exec.Run(ctx, "systemctl", "--user", "start", unitName+".timer")
+	if err != nil {
+		return fmt.Errorf("failed to start timer: %w", err)
+	}
+	if exitCode != 0 {
+		return fmt.Errorf("failed to start timer (exit code %d): %s", exitCode, stderr)
+	}
+
+	log.Progress("Timer started")
+	return nil
+}
+
 // Uninstall removes the systemd user timer and service units.
 func Uninstall(exec executor.Executor, log *progress.Logger) error {
 	ctx := context.Background()

diff --git a/internal/systemd/systemd_test.go b/internal/systemd/systemd_test.go
@@ -0,0 +1,39 @@
+package systemd
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/step-security/dev-machine-guard/internal/executor"
+	"github.com/step-security/dev-machine-guard/internal/progress"
+)
+
+// TestStartTimer_IssuesPlainStart asserts the timer-activation command does
+// not carry --now (which would be redundant for `start`) and targets the
+// expected unit name. Deliberately separate from Install so the install-time
+// race fix (issue #62) — enable without --now, then StartTimer after the
+// inline scan — stays locked in.
+func TestStartTimer_IssuesPlainStart(t *testing.T) {
+	mock := executor.NewMock()
+	mock.SetCommand("", "", 0, "systemctl", "--user", "start", unitName+".timer")
+
+	if err := StartTimer(mock, progress.NewLogger(progress.LevelInfo)); err != nil {
+		t.Fatalf("StartTimer returned error: %v", err)
+	}
+}
+
+// TestStartTimer_PropagatesFailure asserts the function surfaces a non-zero
+// systemctl exit so the install command can warn the operator.
+func TestStartTimer_PropagatesFailure(t *testing.T) {
+	mock := executor.NewMock()
+	mock.SetCommand("", "Failed to start: Unit not found", 1,
+		"systemctl", "--user", "start", unitName+".timer")
+
+	err := StartTimer(mock, progress.NewLogger(progress.LevelInfo))
+	if err == nil {
+		t.Fatal("expected error from non-zero systemctl exit, got nil")
+	}
+	if !strings.Contains(err.Error(), "exit code 1") {
+		t.Errorf("error should reference the exit code: %v", err)
+	}
+}
diff --git a/internal/telemetry/telemetry.go b/internal/telemetry/telemetry.go
@@ -514,16 +514,23 @@ func Run(exec executor.Executor, log *progress.Logger, cfg *cli.Config) (err err
 
 	// Build payload
 	payload := &Payload{
-		CustomerID:     config.CustomerID,
-		DeviceID:       dev.SerialNumber,
-		SerialNumber:   dev.SerialNumber,
-		UserIdentity:   dev.UserIdentity,
-		Hostname:       dev.Hostname,
-		Platform:       dev.Platform,
-		OSVersion:      dev.OSVersion,
-		AgentVersion:   buildinfo.Version,
-		CollectedAt:    endTime.Unix(),
-		NoUserLoggedIn: dev.UserIdentity == "" || dev.UserIdentity == "unknown",
+		CustomerID:   config.CustomerID,
+		DeviceID:     dev.SerialNumber,
+		SerialNumber: dev.SerialNumber,
+		UserIdentity: dev.UserIdentity,
+		Hostname:     dev.Hostname,
+		Platform:     dev.Platform,
+		OSVersion:    dev.OSVersion,
+		AgentVersion: buildinfo.Version,
+		CollectedAt:  endTime.Unix(),
+		// Treat a daemon-context UserIdentity == "root" as "no real user"
+		// too. Even with the executor.LoggedInUser() fix (issue #63), this
+		// catches any other code path that ends up shipping "root" from
+		// under a LaunchDaemon (e.g., env-var path returning literal "root",
+		// future regressions in detection).
+		NoUserLoggedIn: dev.UserIdentity == "" ||
+			dev.UserIdentity == "unknown" ||
+			(dev.UserIdentity == "root" && exec.IsRoot()),
 
 		IDEExtensions:        extensions,
 		IDEInstallations:     ides,