Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions cmd/stepsecurity-dev-machine-guard/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,18 @@ func main() {
os.Exit(1)
}

// On Linux, systemd.Install enabled the timer but did not start it.
// Start it now that the inline scan above has released the singleton
// lock, so the timer's first (Persistent=true catch-up) firing does
// not race with that scan (issue #62). Best-effort: a failure here
// means scheduled scans won't run until the next user-systemd
// reload, but the install is otherwise complete.
if runtime.GOOS == "linux" {
if err := systemd.StartTimer(exec, log); err != nil {
log.Progress("warning: timer start failed (%v) — scheduled scans will resume after the next user-systemd reload", err)
}
}

case "uninstall":
_, _ = fmt.Fprintf(os.Stdout, "StepSecurity Dev Machine Guard v%s\n\n", buildinfo.Version)
switch runtime.GOOS {
Expand Down
45 changes: 45 additions & 0 deletions internal/device/device_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package device

import (
"context"
"errors"
"testing"

"github.com/step-security/dev-machine-guard/internal/executor"
Expand Down Expand Up @@ -161,6 +162,50 @@ func TestGather_LinuxDistroOnly(t *testing.T) {
}
}

// TestGather_NoConsoleUser_DoesNotLeakRoot covers issue #63: when running as
// a daemon on macOS and LoggedInUser() reports an error (no GUI console
// user), getDeveloperIdentity must not silently fall back to the process's
// own user (which under a LaunchDaemon is "root"). Identity should resolve
// to "unknown" so the telemetry payload's no_user_logged_in flag fires.
func TestGather_NoConsoleUser_DoesNotLeakRoot(t *testing.T) {
mock := executor.NewMock()
mock.SetGOOS("darwin")
mock.SetIsRoot(true)
mock.SetUsername("root") // process-owner identity, the trap
mock.SetHostname("mac")
mock.SetCommand("SERIAL\n \"IOPlatformSerialNumber\" = \"SERIAL\"\n", "", 0, "ioreg", "-l")
mock.SetCommand("15.1\n", "", 0, "sw_vers", "-productVersion")
mock.SetLoggedInUserError(errors.New("no GUI console user (owner=\"_windowserver\")"))

dev := Gather(context.Background(), mock)

if dev.UserIdentity != "unknown" {
t.Errorf("user_identity: expected 'unknown' (no console user), got %q — root leak from CurrentUser fallback", dev.UserIdentity)
}
}

// TestGather_NoConsoleUser_EnvVarRescues asserts that the env-var path in
// getDeveloperIdentity still works when LoggedInUser would error. Operators
// who populate USER_EMAIL/DEVELOPER_EMAIL in the daemon plist should still
// get correct identity attribution even on no-console-user hosts.
func TestGather_NoConsoleUser_EnvVarRescues(t *testing.T) {
mock := executor.NewMock()
mock.SetGOOS("darwin")
mock.SetIsRoot(true)
mock.SetUsername("root")
mock.SetHostname("mac")
mock.SetCommand("SERIAL\n \"IOPlatformSerialNumber\" = \"SERIAL\"\n", "", 0, "ioreg", "-l")
mock.SetCommand("15.1\n", "", 0, "sw_vers", "-productVersion")
mock.SetLoggedInUserError(errors.New("no console user"))
mock.SetEnv("USER_EMAIL", "operator@example.com")

dev := Gather(context.Background(), mock)

if dev.UserIdentity != "operator@example.com" {
t.Errorf("user_identity: expected USER_EMAIL value, got %q", dev.UserIdentity)
}
}

func TestGather_Windows(t *testing.T) {
mock := executor.NewMock()
mock.SetGOOS("windows")
Expand Down
21 changes: 15 additions & 6 deletions internal/executor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,23 +175,32 @@ func (r *Real) LoggedInUser() (*user.User, error) {
return r.CurrentUser()
}

// On macOS running as root, detect the console user.
// This mirrors the bash script's get_logged_in_user_info() which uses
// stat -f%Su /dev/console to find who is actually logged in.
// On macOS running as root, detect the console user via
// stat -f%Su /dev/console (mirrors the bash script's
// get_logged_in_user_info()).
//
// When the lookup can't yield a real GUI user — stat errored, console
// is owned by root or _windowserver, or the resolved name doesn't
// exist in Directory Services — we must NOT fall back to
// r.CurrentUser(). Under a LaunchDaemon that returns the root user
// with err == nil, callers treat that as "root is the developer", and
// the telemetry pipeline ships user_identity="root" with
// no_user_logged_in=false (issue #63). Surface the absence as an
// error so callers can flag the run as no-user instead.
ctx := context.Background()
stdout, _, _, err := r.Run(ctx, "stat", "-f%Su", "/dev/console")
if err != nil {
return r.CurrentUser()
return nil, fmt.Errorf("stat /dev/console failed: %w", err)
}

username := strings.TrimSpace(stdout)
if username == "" || username == "root" || username == "_windowserver" {
return r.CurrentUser()
return nil, fmt.Errorf("no GUI console user (owner=%q)", username)
}

u, err := user.Lookup(username)
if err != nil {
return r.CurrentUser()
return nil, fmt.Errorf("console user %q not in directory services: %w", username, err)
}

return u, nil
Expand Down
19 changes: 19 additions & 0 deletions internal/executor/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ type Mock struct {

// Symlink stubs: path -> resolved target
symlinks map[string]string

// LoggedInUser override — when set, LoggedInUser returns (nil, err)
// instead of falling through to CurrentUser. Used by tests covering
// the macOS+root "no console user" branch (issue #63).
loggedInUserErr error
}

type cmdResult struct {
Expand Down Expand Up @@ -164,6 +169,14 @@ func (m *Mock) SetGOOS(goos string) {
m.goos = goos
}

// SetLoggedInUserError makes LoggedInUser return (nil, err). Pass nil to
// reset to default behavior (delegating to CurrentUser).
func (m *Mock) SetLoggedInUserError(err error) {
m.mu.Lock()
defer m.mu.Unlock()
m.loggedInUserErr = err
}

// --- Executor interface ---

func (m *Mock) Run(_ context.Context, name string, args ...string) (string, string, int, error) {
Expand Down Expand Up @@ -285,6 +298,12 @@ func (m *Mock) Glob(pattern string) ([]string, error) {
}

func (m *Mock) LoggedInUser() (*user.User, error) {
m.mu.RLock()
err := m.loggedInUserErr
m.mu.RUnlock()
if err != nil {
return nil, err
}
return m.CurrentUser()
}

Expand Down
29 changes: 28 additions & 1 deletion internal/systemd/systemd.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,13 @@ func Install(exec executor.Executor, log *progress.Logger) error {
return fmt.Errorf("daemon-reload failed (exit code %d): %s", daemonExitCode, daemonStderr)
}

_, stderr, exitCode, err := exec.Run(ctx, "systemctl", "--user", "enable", "--now", unitName+".timer")
// Enable (without --now) so the unit is loaded across reboots. Activating
// the timer in this session is deferred to StartTimer, which the install
// command calls only after its inline post-install telemetry has released
// the singleton lock. If we used --now here, the timer's Persistent=true +
// already-elapsed OnBootSec would fire the service immediately and race
// with that inline run on the lockfile (issue #62).
_, stderr, exitCode, err := exec.Run(ctx, "systemctl", "--user", "enable", unitName+".timer")
if err != nil {
return fmt.Errorf("failed to enable timer: %w", err)
}
Expand All @@ -96,6 +102,27 @@ func Install(exec executor.Executor, log *progress.Logger) error {
return nil
}

// StartTimer activates the timer that Install enabled. Split out from Install
// so the install command can run its inline post-install telemetry first
// (and release the singleton lock) before the timer is allowed to fire its
// own first invocation. With Persistent=true on the timer unit, this start
// will trigger one immediate catch-up scan via the service unit — that's
// fine because the inline scan has already completed.
func StartTimer(exec executor.Executor, log *progress.Logger) error {
ctx := context.Background()

_, stderr, exitCode, err := exec.Run(ctx, "systemctl", "--user", "start", unitName+".timer")
if err != nil {
return fmt.Errorf("failed to start timer: %w", err)
}
if exitCode != 0 {
return fmt.Errorf("failed to start timer (exit code %d): %s", exitCode, stderr)
}

log.Progress("Timer started")
return nil
}

// Uninstall removes the systemd user timer and service units.
func Uninstall(exec executor.Executor, log *progress.Logger) error {
ctx := context.Background()
Expand Down
39 changes: 39 additions & 0 deletions internal/systemd/systemd_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package systemd

import (
"strings"
"testing"

"github.com/step-security/dev-machine-guard/internal/executor"
"github.com/step-security/dev-machine-guard/internal/progress"
)

// TestStartTimer_IssuesPlainStart asserts the timer-activation command does
// not carry --now (which would be redundant for `start`) and targets the
// expected unit name. Deliberately separate from Install so the install-time
// race fix (issue #62) — enable without --now, then StartTimer after the
// inline scan — stays locked in.
func TestStartTimer_IssuesPlainStart(t *testing.T) {
mock := executor.NewMock()
mock.SetCommand("", "", 0, "systemctl", "--user", "start", unitName+".timer")

if err := StartTimer(mock, progress.NewLogger(progress.LevelInfo)); err != nil {
t.Fatalf("StartTimer returned error: %v", err)
}
}

// TestStartTimer_PropagatesFailure asserts the function surfaces a non-zero
// systemctl exit so the install command can warn the operator.
func TestStartTimer_PropagatesFailure(t *testing.T) {
mock := executor.NewMock()
mock.SetCommand("", "Failed to start: Unit not found", 1,
"systemctl", "--user", "start", unitName+".timer")

err := StartTimer(mock, progress.NewLogger(progress.LevelInfo))
if err == nil {
t.Fatal("expected error from non-zero systemctl exit, got nil")
}
if !strings.Contains(err.Error(), "exit code 1") {
t.Errorf("error should reference the exit code: %v", err)
}
}
27 changes: 17 additions & 10 deletions internal/telemetry/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -514,16 +514,23 @@ func Run(exec executor.Executor, log *progress.Logger, cfg *cli.Config) (err err

// Build payload
payload := &Payload{
CustomerID: config.CustomerID,
DeviceID: dev.SerialNumber,
SerialNumber: dev.SerialNumber,
UserIdentity: dev.UserIdentity,
Hostname: dev.Hostname,
Platform: dev.Platform,
OSVersion: dev.OSVersion,
AgentVersion: buildinfo.Version,
CollectedAt: endTime.Unix(),
NoUserLoggedIn: dev.UserIdentity == "" || dev.UserIdentity == "unknown",
CustomerID: config.CustomerID,
DeviceID: dev.SerialNumber,
SerialNumber: dev.SerialNumber,
UserIdentity: dev.UserIdentity,
Hostname: dev.Hostname,
Platform: dev.Platform,
OSVersion: dev.OSVersion,
AgentVersion: buildinfo.Version,
CollectedAt: endTime.Unix(),
// Treat a daemon-context UserIdentity == "root" as "no real user"
// too. Even with the executor.LoggedInUser() fix (issue #63), this
// catches any other code path that ends up shipping "root" from
// under a LaunchDaemon (e.g., env-var path returning literal "root",
// future regressions in detection).
NoUserLoggedIn: dev.UserIdentity == "" ||
dev.UserIdentity == "unknown" ||
(dev.UserIdentity == "root" && exec.IsRoot()),

IDEExtensions: extensions,
IDEInstallations: ides,
Expand Down
Loading