diff --git a/apps/penpal/ERD.md b/apps/penpal/ERD.md index 0ff706c24..2b915d0c2 100644 --- a/apps/penpal/ERD.md +++ b/apps/penpal/ERD.md @@ -104,7 +104,7 @@ see-also: - **E-PENPAL-CACHE**: An in-memory cache (`sync.RWMutex`-protected) holds the full project list and per-project file lists. `RefreshProject()` walks the filesystem for full rescans; `RefreshAllProjects()` runs in parallel with a concurrency limit of 4. `RescanWith()` replaces the project list while preserving git enrichment and cached file data for unchanged projects — only new or source-changed projects are rescanned. Incremental mutations (`UpsertFile`, `RemoveFile`) update individual cache entries without walking the filesystem. ← [P-PENPAL-PROJECT-FILE-TREE](PRODUCT.md#P-PENPAL-PROJECT-FILE-TREE) -- **E-PENPAL-SCAN**: `scanProjectSources()` walks `RootPath` recursively for tree sources, skipping `.git`-file directories (nested worktrees), gitignored directories (via `git check-ignore`), source-type `SkipDirs`, and non-`.md` files. Gitignore checking is initialized once per scan via `newGitIgnoreChecker(projectPath)`, which detects whether the project is a git repo; non-git projects skip the gitignore check gracefully. On write or read failure (partial 4-field response), the checker disables itself (`isGitRepo=false`) to prevent permanent stream desync. The source's own `rootPath` is never checked against gitignore (the `path != rootPath` guard ensures registered sources always scan). Files returning `""` from `ClassifyFile()` are hidden. Files are de-duplicated by project-relative path (first source wins) and sorted by `ModTime` descending. `EnsureProjectScanned()` is the lazy-scan entry point — it uses write-lock gating (`projectScanned` set under `mu.Lock` before scanning) to prevent concurrent requests from triggering duplicate filesystem walks. `projectHasAnyMarkdown()` performs a cheap startup check that aligns with the full scan: it uses the same gitignore checking, skips `.git`, `node_modules`, `.hg`, `.svn`, and nested worktree directories, and stops at the first `.md` file found. `CheckAllProjectsHasFiles()` runs with a concurrency limit of 4 to cap subprocess spawning. `ResolveFileInfo()` resolves source membership for a single absolute path without spawning a git check-ignore process — it applies the same source-priority, SkipDirs, RequireSibling, and ClassifyFile rules as the full walk. +- **E-PENPAL-SCAN**: `scanProjectSources()` walks `RootPath` recursively for tree sources, skipping `.git`-file directories (nested worktrees), gitignored directories (via a pure-Go `gitignore.Matcher`), source-type `SkipDirs`, and non-`.md` files. Gitignore matching is initialized once per scan via `newGitIgnoreMatcher(projectPath)`, which parses `.gitignore` files, `.git/info/exclude`, and the global gitignore in-process — no subprocesses are spawned. Non-git projects return a nil matcher that never reports paths as ignored. The source's own `rootPath` is never checked against gitignore (the `path != rootPath` guard ensures registered sources always scan). Files returning `""` from `ClassifyFile()` are hidden. Files are de-duplicated by project-relative path (first source wins) and sorted by `ModTime` descending. `EnsureProjectScanned()` is the lazy-scan entry point — it uses write-lock gating (`projectScanned` set under `mu.Lock` before scanning) to prevent concurrent requests from triggering duplicate filesystem walks. `projectHasAnyMarkdown()` performs a cheap startup check: it skips `.git`, `node_modules`, `.hg`, `.svn`, and nested worktree directories, and stops at the first `.md` file found — it does not use gitignore matching (false positives are harmless since the full scan applies proper filtering). `CheckAllProjectsHasFiles()` runs with a concurrency limit of 4. `ResolveFileInfo()` resolves source membership for a single absolute path using the same pure-Go gitignore matcher — it applies the same source-priority, SkipDirs, RequireSibling, and ClassifyFile rules as the full walk, and checks ancestor directories against gitignore without spawning subprocesses. ← [P-PENPAL-PROJECT-FILE-TREE](PRODUCT.md#P-PENPAL-PROJECT-FILE-TREE), [P-PENPAL-FILE-TYPES](PRODUCT.md#P-PENPAL-FILE-TYPES), [P-PENPAL-SRC-DEDUP](PRODUCT.md#P-PENPAL-SRC-DEDUP), [P-PENPAL-SRC-GITIGNORE](PRODUCT.md#P-PENPAL-SRC-GITIGNORE) - **E-PENPAL-TITLE-EXTRACT**: `EnrichTitles()` reads the first 20 lines of each file to extract H1 headings. Titles are cached and shown as the primary display name when present. diff --git a/apps/penpal/TESTING.md b/apps/penpal/TESTING.md index bee14564c..f1349f334 100644 --- a/apps/penpal/TESTING.md +++ b/apps/penpal/TESTING.md @@ -65,7 +65,7 @@ see-also: | Source Types — claude-plans (P-PENPAL-SRC-CLAUDE-PLANS) | — | — | — | — | | Source Types — manual (E-PENPAL-SRC-MANUAL) | — | — | grouping_test.go (TestBuildFileGroups_ManualSourceDirHeadings) | — | | Favorites (P-PENPAL-FAVORITES, P-PENPAL-FAVORITE-ACTIONS, E-PENPAL-FAVORITES) | api_favorites_test.go (TestBuildFavoriteEntries_TreeFallsBackWithoutAllMarkdown) | — | api_favorites_test.go (TestAPIFavorites_ListExistingManualSources, TestAPIFavorites_AddAndRemove) | — | -| Cache & File Scanning (E-PENPAL-CACHE, SCAN) | cache_test.go (TestCheckAllProjectsHasFiles, TestProjectHasAnyMarkdown_IgnoresGitignore, TestProjectHasAnyMarkdown_SkipsVCSDirs, TestAllFiles_DeduplicatesAllMarkdown, TestEnsureProjectScanned_NoDuplicateScans, TestResolveFileInfo, TestUpsertFile, TestRemoveFile, TestRescanWith_PreservesUnchangedProjects, TestSourcesChanged) | — | — | — | +| Cache & File Scanning (E-PENPAL-CACHE, SCAN) | cache_test.go (TestCheckAllProjectsHasFiles, TestProjectHasAnyMarkdown_IgnoresGitignore, TestProjectHasAnyMarkdown_SkipsVCSDirs, TestAllFiles_DeduplicatesAllMarkdown, TestEnsureProjectScanned_NoDuplicateScans, TestResolveFileInfo, TestResolveFileInfo_SkipsGitignored, TestUpsertFile, TestRemoveFile, TestRescanWith_PreservesUnchangedProjects, TestSourcesChanged), gitignore_test.go (TestParseLine, TestGlobMatch, TestIsIgnoredDir_BasicPatterns, TestIsIgnoredDir_WildcardPatterns, TestIsIgnoredDir_Negation, TestIsIgnoredDir_DoubleStarPattern, TestIsIgnoredDir_NestedGitignore, TestIsIgnoredDir_AnchoredPattern, TestIsIgnoredDir_GitInfoExclude, TestIsIgnoredDir_Caching, TestIsIgnoredDir_PatternWithoutTrailingSlash) | — | — | — | | Worktree Support (P-PENPAL-WORKTREE) | discovery/worktree_test.go, cache/worktree_test.go | Layout.test.tsx | worktree_test.go (API + MCP) | — | | Worktree Watch (E-PENPAL-WORKTREE-WATCH) | watcher_test.go | — | — | — | | Worktree Dropdown (P-PENPAL-PROJECT-WORKTREE-DROPDOWN) | — | Layout.test.tsx | — | — | diff --git a/apps/penpal/cmd/penpal-server/main.go b/apps/penpal/cmd/penpal-server/main.go index fd0109114..baae5c60b 100644 --- a/apps/penpal/cmd/penpal-server/main.go +++ b/apps/penpal/cmd/penpal-server/main.go @@ -68,8 +68,6 @@ func runServe(port int, rootOverride string) { saveTimer.Stop() } saveTimer = time.AfterFunc(5*time.Second, func() { - saveMu.Lock() - defer saveMu.Unlock() if err := act.Save(activityPath); err != nil { log.Printf("Warning: could not save activity: %v", err) } diff --git a/apps/penpal/frontend/src-tauri/src/lib.rs b/apps/penpal/frontend/src-tauri/src/lib.rs index fca95d252..19caf2f54 100644 --- a/apps/penpal/frontend/src-tauri/src/lib.rs +++ b/apps/penpal/frontend/src-tauri/src/lib.rs @@ -39,6 +39,29 @@ struct SessionState { /// In-memory geometry registry, updated on move/resize events. struct GeoRegistry(Mutex>); +// E-PENPAL-GEO-TRACK: ensure a geometry entry exists for the given window label, +// inserting a new one from the current window state if absent. +fn ensure_geo_entry<'a>( + map: &'a mut HashMap, + label: &str, + app: &tauri::AppHandle, +) -> Option<&'a mut WindowGeometry> { + if !map.contains_key(label) { + let win = app.get_webview_window(label)?; + let pos = win.outer_position().unwrap_or(tauri::PhysicalPosition { x: 0, y: 0 }); + let size = win.outer_size().unwrap_or(tauri::PhysicalSize { width: 1200, height: 800 }); + map.insert(label.to_string(), WindowGeometry { + label: label.to_string(), + x: pos.x, + y: pos.y, + width: size.width, + height: size.height, + active_path: String::new(), + }); + } + map.get_mut(label) +} + fn session_file_path() -> std::path::PathBuf { let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); std::path::Path::new(&home).join(".config/penpal/window-state.json") @@ -300,37 +323,17 @@ pub fn run() { match win_event { tauri::WindowEvent::Moved(pos) => { if let Ok(mut map) = app_handle.state::().0.lock() { - if let Some(entry) = map.get_mut(label) { + if let Some(entry) = ensure_geo_entry(&mut map, label, app_handle) { entry.x = pos.x; entry.y = pos.y; - } else if let Some(win) = app_handle.get_webview_window(label) { - let size = win.outer_size().unwrap_or(tauri::PhysicalSize { width: 1200, height: 800 }); - map.insert(label.to_string(), WindowGeometry { - label: label.to_string(), - x: pos.x, - y: pos.y, - width: size.width, - height: size.height, - active_path: String::new(), - }); } } } tauri::WindowEvent::Resized(size) => { if let Ok(mut map) = app_handle.state::().0.lock() { - if let Some(entry) = map.get_mut(label) { + if let Some(entry) = ensure_geo_entry(&mut map, label, app_handle) { entry.width = size.width; entry.height = size.height; - } else if let Some(win) = app_handle.get_webview_window(label) { - let pos = win.outer_position().unwrap_or(tauri::PhysicalPosition { x: 0, y: 0 }); - map.insert(label.to_string(), WindowGeometry { - label: label.to_string(), - x: pos.x, - y: pos.y, - width: size.width, - height: size.height, - active_path: String::new(), - }); } } } @@ -343,14 +346,11 @@ pub fn run() { .. } = &event { - // Remove from geometry registry so closed windows aren't persisted. - // On non-macOS, the last window close triggers Exit immediately after - // Destroyed, so save the session while the registry still has this entry. + // E-PENPAL-SESSION-FILE: save session before removing this window so the + // Exit handler always has a recent snapshot even if the map is partially drained. if let Ok(mut map) = app_handle.state::().0.lock() { - if map.len() == 1 && map.contains_key(label) { - let windows: Vec = map.values().cloned().collect(); - save_session(&windows); - } + let windows: Vec = map.values().cloned().collect(); + save_session(&windows); map.remove(label); } #[cfg(target_os = "macos")] diff --git a/apps/penpal/frontend/src/hooks/useTabs.test.ts b/apps/penpal/frontend/src/hooks/useTabs.test.ts index 0960b273c..2e02f33b5 100644 --- a/apps/penpal/frontend/src/hooks/useTabs.test.ts +++ b/apps/penpal/frontend/src/hooks/useTabs.test.ts @@ -165,6 +165,24 @@ describe('useTabs persistence', () => { expect(ids[1]).toMatch(/^tab-/); }); + // E-PENPAL-TAB-PERSIST: restores valid persisted tabs from localStorage. + it('restores persisted tabs from localStorage', () => { + const persistedTabs = [ + { id: 'tab-aaa', path: '/recent', title: 'Recent', history: ['/recent'], historyIndex: 0 }, + { id: 'tab-bbb', path: '/in-review', title: 'In Review', history: ['/in-review'], historyIndex: 0 }, + ]; + localStorage.setItem('penpal:tabs:browser', JSON.stringify({ version: 1, activeTabId: 'tab-bbb', tabs: persistedTabs })); + const reviewWrapper = ({ children }: { children: ReactNode }) => + createElement(MemoryRouter, { initialEntries: ['/in-review'] }, children); + const { result } = renderHook(() => useTabs(), { wrapper: reviewWrapper }); + expect(result.current.tabs).toHaveLength(2); + expect(result.current.tabs[0].path).toBe('/recent'); + expect(result.current.tabs[0].title).toBe('Recent'); + expect(result.current.tabs[1].path).toBe('/in-review'); + expect(result.current.tabs[1].title).toBe('In Review'); + expect(result.current.activeTabId).toBe('tab-bbb'); + }); + // E-PENPAL-SESSION-FALLBACK: corrupt localStorage gracefully falls back. it('falls back to default tab when localStorage is corrupt', () => { localStorage.setItem('penpal:tabs:browser', 'not-json'); diff --git a/apps/penpal/frontend/src/hooks/useTabs.ts b/apps/penpal/frontend/src/hooks/useTabs.ts index 7cb7a7a5e..b28177ccc 100644 --- a/apps/penpal/frontend/src/hooks/useTabs.ts +++ b/apps/penpal/frontend/src/hooks/useTabs.ts @@ -125,24 +125,23 @@ export function useTabs(): TabsState { locationRef.current = location; const windowLabelRef = useRef(resolveWindowLabelSync()); - const [tabs, setTabs] = useState(() => { - // E-PENPAL-TAB-PERSIST: try to restore from localStorage synchronously. - // In browser mode the label is available immediately. In desktop mode - // the label may not be available yet — the async useEffect handles that. + // E-PENPAL-TAB-PERSIST: try to restore from localStorage synchronously. + // In browser mode the label is available immediately. In desktop mode + // the label may not be available yet — the async useEffect handles that. + // Parse once to avoid inconsistent state from double localStorage reads. + const initialPersisted = (() => { const label = windowLabelRef.current; - if (label) { - const persisted = loadPersistedTabs(label); - if (persisted) return persisted.tabs; - } + if (label) return loadPersistedTabs(label); + return null; + })(); + + const [tabs, setTabs] = useState(() => { + if (initialPersisted) return initialPersisted.tabs; const path = location.pathname + location.search; return [{ id: nextTabId(), path, title: deriveTitleFromPath(path), history: [path], historyIndex: 0 }]; }); const [activeTabId, setActiveTabId] = useState(() => { - const label = windowLabelRef.current; - if (label) { - const persisted = loadPersistedTabs(label); - if (persisted) return persisted.activeTabId; - } + if (initialPersisted) return initialPersisted.activeTabId; return tabs[0].id; }); const tabsRef = useRef(tabs); diff --git a/apps/penpal/internal/cache/cache.go b/apps/penpal/internal/cache/cache.go index 9173689ac..e9f422c16 100644 --- a/apps/penpal/internal/cache/cache.go +++ b/apps/penpal/internal/cache/cache.go @@ -2,12 +2,9 @@ package cache import ( "bufio" - "bytes" "errors" - "io" "io/fs" "os" - "os/exec" "path/filepath" "sort" "strings" @@ -15,96 +12,16 @@ import ( "time" "github.com/loganj/penpal/internal/discovery" + "github.com/loganj/penpal/internal/gitignore" ) -// gitIgnoreChecker uses a persistent `git check-ignore --stdin` process to -// test whether paths are gitignored. A single subprocess handles all queries, -// avoiding per-directory process spawn overhead. -// E-PENPAL-SCAN: gitignore-aware directory skipping. -type gitIgnoreChecker struct { - isGitRepo bool - stdin io.WriteCloser - scanner *bufio.Scanner - cmd *exec.Cmd -} - -func newGitIgnoreChecker(projectPath string) *gitIgnoreChecker { - g := &gitIgnoreChecker{} - if exec.Command("git", "-C", projectPath, "rev-parse", "--git-dir").Run() != nil { - return g - } - g.isGitRepo = true - // Start a persistent check-ignore process. With -v -n -z, every input - // path produces exactly 4 NUL-delimited fields: source, linenum, pattern, - // pathname. For non-ignored paths, source is empty. - g.cmd = exec.Command("git", "-C", projectPath, "check-ignore", "--stdin", "-z", "-v", "-n") - stdin, err := g.cmd.StdinPipe() - if err != nil { - g.isGitRepo = false - return g - } - stdout, err := g.cmd.StdoutPipe() - if err != nil { - g.isGitRepo = false - return g - } - if err := g.cmd.Start(); err != nil { - g.isGitRepo = false - return g - } - g.stdin = stdin - g.scanner = bufio.NewScanner(stdout) - g.scanner.Split(scanNul) - return g -} - -func (g *gitIgnoreChecker) IsIgnored(path string) bool { - if !g.isGitRepo { - return false - } - // Write path + NUL to the persistent process. - if _, err := g.stdin.Write(append([]byte(path), 0)); err != nil { - // E-PENPAL-SCAN: disable checker on write failure to prevent desync. - g.isGitRepo = false - return false - } - // Read 4 NUL-terminated fields: source, linenum, pattern, pathname. - var source string - for i := 0; i < 4; i++ { - if !g.scanner.Scan() { - // E-PENPAL-SCAN: partial read leaves stream out of sync; disable. - g.isGitRepo = false - return false - } - if i == 0 { - source = g.scanner.Text() - } - } - // Non-empty source means a gitignore rule matched. - return source != "" -} - -func (g *gitIgnoreChecker) Close() { - if g.stdin != nil { - g.stdin.Close() - } - if g.cmd != nil { - g.cmd.Wait() - } -} - -// scanNul is a bufio.SplitFunc that splits on NUL bytes. -func scanNul(data []byte, atEOF bool) (advance int, token []byte, err error) { - if atEOF && len(data) == 0 { - return 0, nil, nil - } - if i := bytes.IndexByte(data, 0); i >= 0 { - return i + 1, data[:i], nil - } - if atEOF { - return len(data), data, nil - } - return 0, nil, nil +// newGitIgnoreMatcher creates a pure-Go gitignore matcher for the given +// project path. Returns nil if the project is not inside a git repo +// (non-git directories are handled gracefully — no paths are reported +// as ignored). +// E-PENPAL-SCAN: gitignore-aware directory skipping — zero subprocess overhead. +func newGitIgnoreMatcher(projectPath string) *gitignore.Matcher { + return gitignore.New(projectPath) } // FileInfo represents a cached file @@ -133,6 +50,10 @@ type Cache struct { projectFiles map[string][]FileInfo // projectScanned tracks which projects have had a full file scan projectScanned map[string]bool + // matcherCache caches gitignore matchers by project path to avoid + // re-parsing .gitignore files on every file event. + // E-PENPAL-SCAN: caches gitignore matchers for hot-path performance. + matcherCache map[string]*gitignore.Matcher } // New creates a new cache @@ -140,7 +61,28 @@ func New() *Cache { return &Cache{ projectFiles: make(map[string][]FileInfo), projectScanned: make(map[string]bool), + matcherCache: make(map[string]*gitignore.Matcher), + } +} + +// getOrCreateMatcher returns a cached gitignore matcher for the project path, +// creating one if needed. Thread-safe. +// E-PENPAL-SCAN: caches gitignore matchers to avoid re-parsing on every file event. +func (c *Cache) getOrCreateMatcher(projectPath string) *gitignore.Matcher { + c.mu.RLock() + if m, ok := c.matcherCache[projectPath]; ok { + c.mu.RUnlock() + return m } + c.mu.RUnlock() + + m := gitignore.New(projectPath) + + c.mu.Lock() + c.matcherCache[projectPath] = m + c.mu.Unlock() + + return m } // SetProjects updates the projects list @@ -581,7 +523,7 @@ func (c *Cache) RescanWith(projects []discovery.Project) { } } - // Clean up removed projects + // Clean up removed projects and invalidate matcher cache c.mu.Lock() for name := range existingFiles { if !newNames[name] { @@ -589,6 +531,8 @@ func (c *Cache) RescanWith(projects []discovery.Project) { delete(c.projectScanned, name) } } + // Clear matcher cache so rescanned projects pick up any .gitignore changes. + c.matcherCache = make(map[string]*gitignore.Matcher) c.mu.Unlock() // Scan only the projects that need it, with concurrency limit @@ -679,19 +623,13 @@ func extractTitle(path string) string { return "" } -// ResolveFileInfo resolves source membership for a single absolute .md file path -// within a project. It applies the same source-priority, SkipDirs, RequireSibling, -// and ClassifyFile rules as scanProjectSources but without walking the filesystem -// or spawning a git check-ignore process. Returns FileInfo entries for each source -// that claims the file (typically one typed source + __all_markdown__). Returns nil -// if no source claims the file. -// E-PENPAL-SCAN: single-file source resolution for incremental cache updates. // ResolveFileInfo resolves source membership for a single absolute .md file path // without a filesystem walk. It applies the same exclusion rules as // scanProjectSources: nested git worktree/submodule detection, gitignore // ancestor-directory checks (P-PENPAL-SRC-GITIGNORE), SkipDirs filtering, and -// RequireSibling validation. -func ResolveFileInfo(project *discovery.Project, absPath string) []FileInfo { +// RequireSibling validation. Uses a pure-Go gitignore matcher — no subprocesses. +// E-PENPAL-SCAN: single-file source resolution for incremental cache updates. +func ResolveFileInfo(project *discovery.Project, absPath string, matcher *gitignore.Matcher) []FileInfo { if !strings.HasSuffix(absPath, ".md") { return nil } @@ -734,7 +672,7 @@ func ResolveFileInfo(project *discovery.Project, absPath string) []FileInfo { // P-PENPAL-SRC-GITIGNORE: skip files whose ancestor directory // is gitignored (source root itself is exempt). - if isAncestorDirGitIgnored(absPath, rootPath, project.Path) { + if isAncestorDirGitIgnored(matcher, absPath, rootPath) { continue } @@ -883,15 +821,14 @@ func isUnderNestedGitRepo(absPath, rootPath string) bool { } // isAncestorDirGitIgnored walks parent directories from absPath up to (but not -// including) rootPath, running a one-shot `git check-ignore -q` on each. +// including) rootPath, checking each against the gitignore matcher. // Returns true if any ancestor directory is gitignored. // P-PENPAL-SRC-GITIGNORE: the source root itself is exempt (always scanned). -func isAncestorDirGitIgnored(absPath, rootPath, projectPath string) bool { +func isAncestorDirGitIgnored(m *gitignore.Matcher, absPath, rootPath string) bool { dir := filepath.Dir(absPath) for dir != rootPath && strings.HasPrefix(dir, rootPath+"/") { - cmd := exec.Command("git", "-C", projectPath, "check-ignore", "-q", dir) - if cmd.Run() == nil { - return true // exit code 0 means ignored + if m.IsIgnoredDir(dir) { + return true } dir = filepath.Dir(dir) } @@ -915,7 +852,8 @@ func (c *Cache) UpsertFile(projectName string, project *discovery.Project, absPa } title := extractTitle(absPath) - resolved := filterManualFileInfos(project, ResolveFileInfo(project, absPath)) + matcher := c.getOrCreateMatcher(project.Path) + resolved := filterManualFileInfos(project, ResolveFileInfo(project, absPath, matcher)) // Acquire lock only for the short critical section that mutates the cache. c.mu.Lock() @@ -1072,8 +1010,7 @@ func ScanProjectSourcesForWorktree(project *discovery.Project, worktreePath stri func scanProjectSources(project *discovery.Project) []FileInfo { var files []FileInfo seen := make(map[string]bool) // project-relative paths already claimed - gitChecker := newGitIgnoreChecker(project.Path) - defer gitChecker.Close() + matcher := newGitIgnoreMatcher(project.Path) for _, source := range project.Sources { if source.Type == "thoughts" || source.Type == "tree" { @@ -1107,7 +1044,7 @@ func scanProjectSources(project *discovery.Project) []FileInfo { } // P-PENPAL-SRC-GITIGNORE: registered source roots are // always scanned even if gitignored. - if path != rootPath && gitChecker.IsIgnored(path) { + if path != rootPath && matcher.IsIgnoredDir(path) { return filepath.SkipDir } if st != nil && st.SkipDirs[d.Name()] { diff --git a/apps/penpal/internal/cache/cache_test.go b/apps/penpal/internal/cache/cache_test.go index b0feafc80..fae973161 100644 --- a/apps/penpal/internal/cache/cache_test.go +++ b/apps/penpal/internal/cache/cache_test.go @@ -383,7 +383,7 @@ func TestScanProjectSources_SkipsNestedWorktrees(t *testing.T) { func TestScanProjectSources_SkipsGitignored(t *testing.T) { tmpDir := t.TempDir() - // Initialise a git repo so git check-ignore works. + // Initialise a git repo so gitignore matching works. runGit(t, tmpDir, "init") runGit(t, tmpDir, "config", "user.email", "test@test.com") runGit(t, tmpDir, "config", "user.name", "test") @@ -473,13 +473,14 @@ func TestScanProjectSources_GitignoreDoesNotSkipSourceRoot(t *testing.T) { } // E-PENPAL-SCAN: non-git directories work without errors. -func TestGitIgnoreChecker_NonGitDir(t *testing.T) { +func TestGitIgnoreMatcher_NonGitDir(t *testing.T) { tmpDir := t.TempDir() - checker := newGitIgnoreChecker(tmpDir) - if checker.isGitRepo { - t.Fatal("expected non-git dir to be detected") + matcher := newGitIgnoreMatcher(tmpDir) + if matcher != nil { + t.Fatal("expected nil matcher for non-git dir") } - if checker.IsIgnored(filepath.Join(tmpDir, "anything")) { + // nil matcher should never report paths as ignored (safe to call). + if matcher.IsIgnoredDir(filepath.Join(tmpDir, "anything")) { t.Error("non-git dir should never report paths as ignored") } } @@ -716,7 +717,8 @@ func TestResolveFileInfo_ThoughtsSource(t *testing.T) { }, } - results := ResolveFileInfo(project, filePath) + matcher := newGitIgnoreMatcher(tmpDir) + results := ResolveFileInfo(project, filePath, matcher) if len(results) != 2 { t.Fatalf("expected 2 results (thoughts + __all_markdown__), got %d", len(results)) } @@ -756,7 +758,8 @@ func TestResolveFileInfo_SkipDirs(t *testing.T) { }, } - results := ResolveFileInfo(project, filePath) + matcher := newGitIgnoreMatcher(tmpDir) + results := ResolveFileInfo(project, filePath, matcher) // __all_markdown__ has SkipDirs for node_modules if len(results) != 0 { t.Fatalf("expected 0 results (node_modules is in SkipDirs), got %d", len(results)) @@ -786,14 +789,16 @@ func TestResolveFileInfo_RequireSibling(t *testing.T) { }, } + matcher := newGitIgnoreMatcher(tmpDir) + // File with sibling should be included - results := ResolveFileInfo(project, filepath.Join(withSibling, "PRODUCT.md")) + results := ResolveFileInfo(project, filepath.Join(withSibling, "PRODUCT.md"), matcher) if len(results) != 1 { t.Fatalf("expected 1 result for file with ANCHORS.md sibling, got %d", len(results)) } // File without sibling should be excluded - results = ResolveFileInfo(project, filepath.Join(withoutSibling, "PRODUCT.md")) + results = ResolveFileInfo(project, filepath.Join(withoutSibling, "PRODUCT.md"), matcher) if len(results) != 0 { t.Fatalf("expected 0 results for file without ANCHORS.md sibling, got %d", len(results)) } @@ -813,7 +818,8 @@ func TestResolveFileInfo_NonMdFile(t *testing.T) { }, } - results := ResolveFileInfo(project, filePath) + matcher := newGitIgnoreMatcher(tmpDir) + results := ResolveFileInfo(project, filePath, matcher) if len(results) != 0 { t.Fatalf("expected 0 results for non-.md file, got %d", len(results)) } @@ -838,7 +844,8 @@ func TestResolveFileInfo_SourcePriority(t *testing.T) { }, } - results := ResolveFileInfo(project, filePath) + matcher := newGitIgnoreMatcher(tmpDir) + results := ResolveFileInfo(project, filePath, matcher) // Should get 2: first typed source (thoughts) + __all_markdown__ // The second typed source (manual) should be skipped if len(results) != 2 { @@ -1177,3 +1184,43 @@ func TestRescanWith_RemovesOldProjects(t *testing.T) { t.Errorf("expected kept project to preserve 1 cached file, got %d", len(filesKeep)) } } + +// E-PENPAL-SCAN, P-PENPAL-SRC-GITIGNORE: verifies ResolveFileInfo skips gitignored ancestor dirs. +func TestResolveFileInfo_SkipsGitignored(t *testing.T) { + tmpDir := t.TempDir() + runGit(t, tmpDir, "init") + runGit(t, tmpDir, "config", "user.email", "test@test.com") + runGit(t, tmpDir, "config", "user.name", "test") + + os.WriteFile(filepath.Join(tmpDir, ".gitignore"), []byte("build/\n"), 0644) + + // Create a .md file inside a gitignored directory. + os.MkdirAll(filepath.Join(tmpDir, "build", "docs"), 0755) + os.WriteFile(filepath.Join(tmpDir, "build", "docs", "api.md"), []byte("# API"), 0644) + + // Create a .md file outside gitignored directory. + os.MkdirAll(filepath.Join(tmpDir, "docs"), 0755) + os.WriteFile(filepath.Join(tmpDir, "docs", "readme.md"), []byte("# Readme"), 0644) + + project := &discovery.Project{ + Name: "test", + Path: tmpDir, + Sources: []discovery.FileSource{ + {Name: "all", Type: "tree", RootPath: tmpDir}, + }, + } + + matcher := newGitIgnoreMatcher(tmpDir) + + // File in gitignored dir should return nil. + results := ResolveFileInfo(project, filepath.Join(tmpDir, "build", "docs", "api.md"), matcher) + if len(results) != 0 { + t.Errorf("expected no results for gitignored file, got %d", len(results)) + } + + // File outside gitignored dir should return results. + results = ResolveFileInfo(project, filepath.Join(tmpDir, "docs", "readme.md"), matcher) + if len(results) == 0 { + t.Error("expected results for non-gitignored file") + } +} diff --git a/apps/penpal/internal/gitignore/gitignore.go b/apps/penpal/internal/gitignore/gitignore.go new file mode 100644 index 000000000..a28b57e9c --- /dev/null +++ b/apps/penpal/internal/gitignore/gitignore.go @@ -0,0 +1,566 @@ +// Package gitignore provides a pure-Go gitignore matcher that replaces +// subprocess calls to `git check-ignore`. It parses .gitignore files, +// .git/info/exclude, and the global gitignore, then answers directory-ignore +// queries in-process. +// E-PENPAL-SCAN: pure-Go gitignore matching — zero subprocess overhead. +package gitignore + +import ( + "bufio" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" +) + +// Matcher evaluates whether directories are gitignored within a repository. +// It loads and caches .gitignore files, .git/info/exclude, and the global +// gitignore, then answers IsIgnoredDir queries without spawning subprocesses. +type Matcher struct { + repoRoot string // absolute path to repo root (contains .git dir) + gitDir string // absolute path to the git directory + + mu sync.RWMutex + global []rule // global gitignore rules + exclude []rule // .git/info/exclude rules + fileCache map[string][]rule // abs dir path -> parsed .gitignore rules (nil = no file) + dirCache map[string]bool // abs dir path -> ignored? +} + +// New creates a Matcher for the given project path. It walks upward to find +// the git repository root. Returns nil if the path is not inside a git repo. +func New(projectPath string) *Matcher { + repoRoot, gitDir := findRepo(projectPath) + if repoRoot == "" { + return nil + } + + m := &Matcher{ + repoRoot: repoRoot, + gitDir: gitDir, + fileCache: make(map[string][]rule), + dirCache: make(map[string]bool), + } + + // Load global gitignore. + if globalPath := globalGitignorePath(); globalPath != "" { + m.global = parseFile(globalPath, m.repoRoot) + } + + // Load .git/info/exclude. + excludePath := filepath.Join(gitDir, "info", "exclude") + m.exclude = parseFile(excludePath, m.repoRoot) + + return m +} + +// IsIgnoredDir returns true if the given absolute directory path is gitignored. +// Results are cached for the lifetime of the Matcher. +func (m *Matcher) IsIgnoredDir(absDir string) bool { + if m == nil { + return false + } + + m.mu.RLock() + if result, ok := m.dirCache[absDir]; ok { + m.mu.RUnlock() + return result + } + m.mu.RUnlock() + + result := m.computeIgnored(absDir) + + m.mu.Lock() + m.dirCache[absDir] = result + m.mu.Unlock() + + return result +} + +// computeIgnored evaluates whether absDir is ignored by walking up the +// directory tree and applying gitignore rules in precedence order. +func (m *Matcher) computeIgnored(absDir string) bool { + // The repo root itself is never ignored. + if absDir == m.repoRoot { + return false + } + + // If absDir is outside the repo, not ignored. + rel, err := filepath.Rel(m.repoRoot, absDir) + if err != nil || strings.HasPrefix(rel, "..") { + return false + } + + // Check parent first — if parent is ignored, child is too + // (negation cannot re-include under an ignored parent). + parent := filepath.Dir(absDir) + if parent != m.repoRoot && m.IsIgnoredDir(parent) { + return true + } + + // Build the path relative to repo root using forward slashes. + relSlash := filepath.ToSlash(rel) + + // Evaluate rules in precedence order (last match wins within each level, + // higher-precedence levels override lower). Precedence order: + // 1. global gitignore (lowest) + // 2. .git/info/exclude + // 3. .gitignore files from repo root down to parent of absDir (highest) + // + // We scan all rule sources and track the last match. + matched := false + ignored := false + + // Global rules. + if hit, neg := matchRules(m.global, relSlash, true); hit { + matched = true + ignored = !neg + } + + // .git/info/exclude rules. + if hit, neg := matchRules(m.exclude, relSlash, true); hit { + matched = true + ignored = !neg + } + + // Per-directory .gitignore files, from repo root down to parent(absDir). + dirs := ancestorDirs(m.repoRoot, absDir) + for _, dir := range dirs { + rules := m.loadGitignore(dir) + if len(rules) == 0 { + continue + } + // For .gitignore in a subdirectory, paths are relative to that dir. + subRel, _ := filepath.Rel(dir, absDir) + subRelSlash := filepath.ToSlash(subRel) + if hit, neg := matchRules(rules, subRelSlash, true); hit { + matched = true + ignored = !neg + } + } + + if !matched { + return false + } + return ignored +} + +// loadGitignore returns parsed rules for the .gitignore in dir, using cache. +func (m *Matcher) loadGitignore(dir string) []rule { + m.mu.RLock() + if rules, ok := m.fileCache[dir]; ok { + m.mu.RUnlock() + return rules + } + m.mu.RUnlock() + + path := filepath.Join(dir, ".gitignore") + rules := parseFile(path, dir) + + m.mu.Lock() + m.fileCache[dir] = rules + m.mu.Unlock() + + return rules +} + +// ancestorDirs returns directories from repoRoot down to (and including) +// the parent of target. The list is ordered root-first. +func ancestorDirs(repoRoot, target string) []string { + var dirs []string + dir := filepath.Dir(target) // parent of target + for { + dirs = append(dirs, dir) + if dir == repoRoot { + break + } + next := filepath.Dir(dir) + if next == dir { + break + } + dir = next + } + // Reverse to get root-first order. + for i, j := 0, len(dirs)-1; i < j; i, j = i+1, j-1 { + dirs[i], dirs[j] = dirs[j], dirs[i] + } + return dirs +} + +// findRepo walks upward from path to find the git repository root. +// Returns (repoRoot, gitDir) or ("", "") if not in a git repo. +func findRepo(path string) (string, string) { + abs, err := filepath.Abs(path) + if err != nil { + return "", "" + } + dir := abs + for { + gitEntry := filepath.Join(dir, ".git") + fi, err := os.Lstat(gitEntry) + if err == nil { + if fi.IsDir() { + return dir, gitEntry + } + // .git file (worktree/submodule) — parse gitdir line. + if gitDir := parseGitFile(gitEntry, dir); gitDir != "" { + return dir, gitDir + } + } + parent := filepath.Dir(dir) + if parent == dir { + return "", "" + } + dir = parent + } +} + +// parseGitFile reads a .git file (worktree/submodule format) and returns +// the resolved gitdir path. +func parseGitFile(path, baseDir string) string { + data, err := os.ReadFile(path) + if err != nil { + return "" + } + line := strings.TrimSpace(string(data)) + if !strings.HasPrefix(line, "gitdir: ") { + return "" + } + gitDir := strings.TrimPrefix(line, "gitdir: ") + if !filepath.IsAbs(gitDir) { + gitDir = filepath.Join(baseDir, gitDir) + } + gitDir = filepath.Clean(gitDir) + if fi, err := os.Stat(gitDir); err == nil && fi.IsDir() { + return gitDir + } + return "" +} + +var ( + globalIgnoreOnce sync.Once + globalIgnorePath string +) + +// globalGitignorePath returns the path to the global gitignore file. +// E-PENPAL-SCAN: cached via sync.Once to avoid repeated subprocess calls. +func globalGitignorePath() string { + globalIgnoreOnce.Do(func() { + // Try git config first + out, err := exec.Command("git", "config", "core.excludesFile").Output() + if err == nil { + p := strings.TrimSpace(string(out)) + if p != "" { + if strings.HasPrefix(p, "~/") { + if home, err := os.UserHomeDir(); err == nil { + globalIgnorePath = filepath.Join(home, p[2:]) + return + } + } + globalIgnorePath = p + return + } + } + // Default location + home, err := os.UserHomeDir() + if err != nil { + return + } + globalIgnorePath = filepath.Join(home, ".config", "git", "ignore") + }) + return globalIgnorePath +} + +// --- Pattern parsing --- + +type rule struct { + negated bool + dirOnly bool + anchored bool // pattern contains "/" or starts with "/" + pattern string +} + +// parseFile reads and parses a .gitignore file. Returns nil if the file +// doesn't exist. baseDir is the directory containing the ignore file +// (used for anchored pattern matching). +func parseFile(path, baseDir string) []rule { + f, err := os.Open(path) + if err != nil { + return nil + } + defer f.Close() + + var rules []rule + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Text() + r, ok := parseLine(line) + if ok { + rules = append(rules, r) + } + } + return rules +} + +// trimTrailingWhitespace trims trailing spaces/tabs, but preserves a space +// escaped with a preceding backslash (e.g., "foo\ " keeps the trailing space). +func trimTrailingWhitespace(s string) string { + end := len(s) + for end > 0 && (s[end-1] == ' ' || s[end-1] == '\t') { + end-- + } + // If the last non-whitespace char is a backslash and there was whitespace + // after it, keep exactly one space (the escaped one). + if end > 0 && end < len(s) && s[end-1] == '\\' { + return s[:end-1] + " " + } + return s[:end] +} + +// parseLine parses a single .gitignore line into a rule. +func parseLine(line string) (rule, bool) { + // Strip trailing whitespace (unless escaped with backslash). + // E-PENPAL-SCAN: preserve backslash-escaped trailing space per gitignore spec. + line = trimTrailingWhitespace(line) + + // Skip empty lines and comments. + if line == "" || line[0] == '#' { + return rule{}, false + } + + var r rule + + // Handle negation. + if line[0] == '!' { + r.negated = true + line = line[1:] + if line == "" { + return rule{}, false + } + } + + // Handle leading backslash escape (e.g., \# or \!). + if line[0] == '\\' && len(line) > 1 && (line[1] == '#' || line[1] == '!' || line[1] == ' ') { + line = line[1:] + } + + // Trailing "/" means directory-only. + if strings.HasSuffix(line, "/") { + r.dirOnly = true + line = strings.TrimRight(line, "/") + if line == "" { + return rule{}, false + } + } + + // Leading "/" means anchored to the base directory. + if line[0] == '/' { + r.anchored = true + line = line[1:] + if line == "" { + return rule{}, false + } + } + + // If the pattern contains a "/" (after stripping leading/trailing), + // it's anchored. + if strings.Contains(line, "/") { + r.anchored = true + } + + r.pattern = line + return r, true +} + +// --- Pattern matching --- + +// matchRules evaluates rules against a slash-separated relative path. +// Returns (matched, negated). Last matching rule wins. +func matchRules(rules []rule, relPath string, isDir bool) (bool, bool) { + matched := false + negated := false + + for _, r := range rules { + // dirOnly patterns only match directories. + if r.dirOnly && !isDir { + continue + } + + if r.anchored { + // Anchored: match against the full relative path. + if globMatch(r.pattern, relPath) { + matched = true + negated = r.negated + } + } else { + // Unanchored: match against basename, or against each + // suffix of the path. + if !strings.Contains(r.pattern, "/") { + // Simple basename match. + base := relPath + if idx := strings.LastIndex(relPath, "/"); idx >= 0 { + base = relPath[idx+1:] + } + if globMatch(r.pattern, base) { + matched = true + negated = r.negated + } + } else { + // Pattern with "/" but not anchored — match against + // path suffixes. + if globMatch(r.pattern, relPath) { + matched = true + negated = r.negated + } + // Also try matching against each path suffix. + for i := 0; i < len(relPath); i++ { + if relPath[i] == '/' { + suffix := relPath[i+1:] + if globMatch(r.pattern, suffix) { + matched = true + negated = r.negated + } + } + } + } + } + } + + return matched, negated +} + +// globMatch matches a gitignore-style glob pattern against a string. +// Supports *, **, ?, and [abc] character classes. +// Paths use forward slashes. +func globMatch(pattern, name string) bool { + return doMatch(pattern, name) +} + +func doMatch(pattern, name string) bool { + for len(pattern) > 0 { + switch pattern[0] { + case '*': + if len(pattern) > 1 && pattern[1] == '*' { + // "**" — match zero or more path segments. + pattern = pattern[2:] + + // "**/" at start or middle — skip the slash. + if len(pattern) > 0 && pattern[0] == '/' { + pattern = pattern[1:] + } + + // If pattern is exhausted, match everything. + if len(pattern) == 0 { + return true + } + + // Try matching the remainder against every suffix. + for i := 0; i <= len(name); i++ { + if doMatch(pattern, name[i:]) { + return true + } + // Only try positions at start or after '/'. + for i < len(name) && name[i] != '/' { + i++ + } + } + return false + } + + // Single "*" — match any characters except "/". + pattern = pattern[1:] + if len(pattern) == 0 { + // "*" at end — match if no "/" remains. + return !strings.Contains(name, "/") + } + for i := 0; i <= len(name); i++ { + if doMatch(pattern, name[i:]) { + return true + } + if i < len(name) && name[i] == '/' { + break + } + } + return false + + case '?': + if len(name) == 0 || name[0] == '/' { + return false + } + pattern = pattern[1:] + name = name[1:] + + case '[': + if len(name) == 0 || name[0] == '/' { + return false + } + // Parse character class. + end := strings.IndexByte(pattern, ']') + if end < 0 { + return false // malformed + } + class := pattern[1:end] + ch := name[0] + matched := matchCharClass(class, ch) + if !matched { + return false + } + pattern = pattern[end+1:] + name = name[1:] + + case '\\': + // Escape next character. + if len(pattern) > 1 { + pattern = pattern[1:] + if len(name) == 0 || name[0] != pattern[0] { + return false + } + pattern = pattern[1:] + name = name[1:] + } else { + return false + } + + default: + if len(name) == 0 || pattern[0] != name[0] { + return false + } + pattern = pattern[1:] + name = name[1:] + } + } + + return len(name) == 0 +} + +// matchCharClass checks if ch matches a [...] character class. +func matchCharClass(class string, ch byte) bool { + negated := false + if len(class) > 0 && class[0] == '!' { + negated = true + class = class[1:] + } + + matched := false + i := 0 + for i < len(class) { + if i+2 < len(class) && class[i+1] == '-' { + // Range: a-z. + if ch >= class[i] && ch <= class[i+2] { + matched = true + } + i += 3 + } else { + if ch == class[i] { + matched = true + } + i++ + } + } + + if negated { + return !matched + } + return matched +} diff --git a/apps/penpal/internal/gitignore/gitignore_test.go b/apps/penpal/internal/gitignore/gitignore_test.go new file mode 100644 index 000000000..a5ffd099a --- /dev/null +++ b/apps/penpal/internal/gitignore/gitignore_test.go @@ -0,0 +1,423 @@ +package gitignore + +import ( + "os" + "os/exec" + "path/filepath" + "testing" +) + +func runGit(t *testing.T, dir string, args ...string) { + t.Helper() + cmd := exec.Command("git", append([]string{"-C", dir}, args...)...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + t.Fatalf("git %v failed: %v", args, err) + } +} + +func TestNew_NonGitDir(t *testing.T) { + m := New(t.TempDir()) + if m != nil { + t.Error("expected nil matcher for non-git directory") + } +} + +func TestNew_GitRepo(t *testing.T) { + dir := t.TempDir() + runGit(t, dir, "init") + m := New(dir) + if m == nil { + t.Fatal("expected non-nil matcher for git repo") + } + if m.repoRoot != dir { + t.Errorf("repoRoot = %s, want %s", m.repoRoot, dir) + } +} + +func TestIsIgnoredDir_NilMatcher(t *testing.T) { + var m *Matcher + if m.IsIgnoredDir("/anything") { + t.Error("nil matcher should never report ignored") + } +} + +func TestIsIgnoredDir_BasicPatterns(t *testing.T) { + dir := t.TempDir() + runGit(t, dir, "init") + + os.WriteFile(filepath.Join(dir, ".gitignore"), []byte("build/\nvendor/\n"), 0644) + os.MkdirAll(filepath.Join(dir, "build", "out"), 0755) + os.MkdirAll(filepath.Join(dir, "vendor", "lib"), 0755) + os.MkdirAll(filepath.Join(dir, "docs"), 0755) + + m := New(dir) + if m == nil { + t.Fatal("expected matcher") + } + + tests := []struct { + path string + ignored bool + }{ + {filepath.Join(dir, "build"), true}, + {filepath.Join(dir, "build", "out"), true}, + {filepath.Join(dir, "vendor"), true}, + {filepath.Join(dir, "vendor", "lib"), true}, + {filepath.Join(dir, "docs"), false}, + } + + for _, tt := range tests { + got := m.IsIgnoredDir(tt.path) + if got != tt.ignored { + t.Errorf("IsIgnoredDir(%s) = %v, want %v", tt.path, got, tt.ignored) + } + } +} + +func TestIsIgnoredDir_WildcardPatterns(t *testing.T) { + dir := t.TempDir() + runGit(t, dir, "init") + + os.WriteFile(filepath.Join(dir, ".gitignore"), []byte("*.tmp\n__pycache__\n"), 0644) + os.MkdirAll(filepath.Join(dir, "foo.tmp"), 0755) + os.MkdirAll(filepath.Join(dir, "src", "__pycache__"), 0755) + os.MkdirAll(filepath.Join(dir, "src", "main"), 0755) + + m := New(dir) + if m == nil { + t.Fatal("expected matcher") + } + + tests := []struct { + path string + ignored bool + }{ + {filepath.Join(dir, "foo.tmp"), true}, + {filepath.Join(dir, "src", "__pycache__"), true}, + {filepath.Join(dir, "src", "main"), false}, + } + + for _, tt := range tests { + got := m.IsIgnoredDir(tt.path) + if got != tt.ignored { + t.Errorf("IsIgnoredDir(%s) = %v, want %v", tt.path, got, tt.ignored) + } + } +} + +func TestIsIgnoredDir_Negation(t *testing.T) { + dir := t.TempDir() + runGit(t, dir, "init") + + os.WriteFile(filepath.Join(dir, ".gitignore"), []byte("logs/\n!logs/important/\n"), 0644) + os.MkdirAll(filepath.Join(dir, "logs", "important"), 0755) + os.MkdirAll(filepath.Join(dir, "logs", "debug"), 0755) + + m := New(dir) + if m == nil { + t.Fatal("expected matcher") + } + + // logs/ is ignored. + if !m.IsIgnoredDir(filepath.Join(dir, "logs")) { + t.Error("expected logs/ to be ignored") + } + + // logs/important/ has a negation rule, but its parent (logs/) is ignored, + // so it stays ignored (git behavior: cannot re-include under ignored parent). + if !m.IsIgnoredDir(filepath.Join(dir, "logs", "important")) { + t.Error("expected logs/important/ to be ignored (parent is ignored)") + } + + // logs/debug/ is under ignored parent. + if !m.IsIgnoredDir(filepath.Join(dir, "logs", "debug")) { + t.Error("expected logs/debug/ to be ignored") + } +} + +func TestIsIgnoredDir_DoubleStarPattern(t *testing.T) { + dir := t.TempDir() + runGit(t, dir, "init") + + os.WriteFile(filepath.Join(dir, ".gitignore"), []byte("**/cache\n"), 0644) + os.MkdirAll(filepath.Join(dir, "cache"), 0755) + os.MkdirAll(filepath.Join(dir, "a", "b", "cache"), 0755) + os.MkdirAll(filepath.Join(dir, "src"), 0755) + + m := New(dir) + if m == nil { + t.Fatal("expected matcher") + } + + tests := []struct { + path string + ignored bool + }{ + {filepath.Join(dir, "cache"), true}, + {filepath.Join(dir, "a", "b", "cache"), true}, + {filepath.Join(dir, "src"), false}, + } + + for _, tt := range tests { + got := m.IsIgnoredDir(tt.path) + if got != tt.ignored { + t.Errorf("IsIgnoredDir(%s) = %v, want %v", tt.path, got, tt.ignored) + } + } +} + +func TestIsIgnoredDir_NestedGitignore(t *testing.T) { + dir := t.TempDir() + runGit(t, dir, "init") + + // Root .gitignore ignores "tmp/" everywhere. + os.WriteFile(filepath.Join(dir, ".gitignore"), []byte("tmp/\n"), 0644) + + // Subdirectory .gitignore adds its own ignore. + os.MkdirAll(filepath.Join(dir, "src"), 0755) + os.WriteFile(filepath.Join(dir, "src", ".gitignore"), []byte("generated/\n"), 0644) + + os.MkdirAll(filepath.Join(dir, "src", "generated"), 0755) + os.MkdirAll(filepath.Join(dir, "src", "main"), 0755) + os.MkdirAll(filepath.Join(dir, "tmp"), 0755) + os.MkdirAll(filepath.Join(dir, "src", "tmp"), 0755) + + m := New(dir) + if m == nil { + t.Fatal("expected matcher") + } + + tests := []struct { + path string + ignored bool + }{ + {filepath.Join(dir, "tmp"), true}, + {filepath.Join(dir, "src", "tmp"), true}, + {filepath.Join(dir, "src", "generated"), true}, + {filepath.Join(dir, "src", "main"), false}, + } + + for _, tt := range tests { + got := m.IsIgnoredDir(tt.path) + if got != tt.ignored { + t.Errorf("IsIgnoredDir(%s) = %v, want %v", tt.path, got, tt.ignored) + } + } +} + +func TestIsIgnoredDir_AnchoredPattern(t *testing.T) { + dir := t.TempDir() + runGit(t, dir, "init") + + // Leading "/" anchors the pattern to the repo root. + os.WriteFile(filepath.Join(dir, ".gitignore"), []byte("/build\n"), 0644) + os.MkdirAll(filepath.Join(dir, "build"), 0755) + os.MkdirAll(filepath.Join(dir, "src", "build"), 0755) + + m := New(dir) + if m == nil { + t.Fatal("expected matcher") + } + + // /build matches only at root. + if !m.IsIgnoredDir(filepath.Join(dir, "build")) { + t.Error("expected build/ at root to be ignored") + } + // src/build should NOT match since pattern is anchored. + if m.IsIgnoredDir(filepath.Join(dir, "src", "build")) { + t.Error("expected src/build/ to NOT be ignored (anchored pattern)") + } +} + +func TestIsIgnoredDir_GitInfoExclude(t *testing.T) { + dir := t.TempDir() + runGit(t, dir, "init") + + // Write to .git/info/exclude instead of .gitignore. + infoDir := filepath.Join(dir, ".git", "info") + os.MkdirAll(infoDir, 0755) + os.WriteFile(filepath.Join(infoDir, "exclude"), []byte("secret/\n"), 0644) + + os.MkdirAll(filepath.Join(dir, "secret"), 0755) + os.MkdirAll(filepath.Join(dir, "public"), 0755) + + m := New(dir) + if m == nil { + t.Fatal("expected matcher") + } + + if !m.IsIgnoredDir(filepath.Join(dir, "secret")) { + t.Error("expected secret/ to be ignored via .git/info/exclude") + } + if m.IsIgnoredDir(filepath.Join(dir, "public")) { + t.Error("expected public/ to NOT be ignored") + } +} + +func TestIsIgnoredDir_RepoRoot(t *testing.T) { + dir := t.TempDir() + runGit(t, dir, "init") + + m := New(dir) + if m == nil { + t.Fatal("expected matcher") + } + + if m.IsIgnoredDir(dir) { + t.Error("repo root should never be ignored") + } +} + +func TestIsIgnoredDir_Caching(t *testing.T) { + dir := t.TempDir() + runGit(t, dir, "init") + + os.WriteFile(filepath.Join(dir, ".gitignore"), []byte("cache/\n"), 0644) + os.MkdirAll(filepath.Join(dir, "cache"), 0755) + + m := New(dir) + if m == nil { + t.Fatal("expected matcher") + } + + // First call computes. + if !m.IsIgnoredDir(filepath.Join(dir, "cache")) { + t.Error("expected cache/ to be ignored") + } + + // Second call uses cache — same result. + if !m.IsIgnoredDir(filepath.Join(dir, "cache")) { + t.Error("expected cache/ to still be ignored (from cache)") + } + + // Verify the cache entry exists. + m.mu.RLock() + _, ok := m.dirCache[filepath.Join(dir, "cache")] + m.mu.RUnlock() + if !ok { + t.Error("expected dirCache entry to exist") + } +} + +// E-PENPAL-SCAN: patterns without trailing slash match directories too. +func TestIsIgnoredDir_PatternWithoutTrailingSlash(t *testing.T) { + dir := t.TempDir() + runGit(t, dir, "init") + + os.WriteFile(filepath.Join(dir, ".gitignore"), []byte("node_modules\n.env\n"), 0644) + os.MkdirAll(filepath.Join(dir, "node_modules"), 0755) + os.MkdirAll(filepath.Join(dir, ".env"), 0755) + os.MkdirAll(filepath.Join(dir, "src"), 0755) + os.MkdirAll(filepath.Join(dir, "src", "node_modules"), 0755) + + m := New(dir) + if m == nil { + t.Fatal("expected matcher") + } + + tests := []struct { + path string + ignored bool + }{ + {filepath.Join(dir, "node_modules"), true}, + {filepath.Join(dir, ".env"), true}, + {filepath.Join(dir, "src", "node_modules"), true}, + {filepath.Join(dir, "src"), false}, + } + + for _, tt := range tests { + got := m.IsIgnoredDir(tt.path) + if got != tt.ignored { + t.Errorf("IsIgnoredDir(%s) = %v, want %v", tt.path, got, tt.ignored) + } + } +} + +// --- parseLine tests --- + +func TestParseLine(t *testing.T) { + tests := []struct { + input string + ok bool + negated bool + dirOnly bool + anchored bool + pattern string + }{ + {"", false, false, false, false, ""}, + {"# comment", false, false, false, false, ""}, + {"build/", true, false, true, false, "build"}, + {"!important/", true, true, true, false, "important"}, + {"/root-only", true, false, false, true, "root-only"}, + {"*.log", true, false, false, false, "*.log"}, + {"a/b", true, false, false, true, "a/b"}, + {"**/cache", true, false, false, true, "**/cache"}, + {" ", false, false, false, false, ""}, + {"foo\\ ", true, false, false, false, "foo "}, + } + + for _, tt := range tests { + r, ok := parseLine(tt.input) + if ok != tt.ok { + t.Errorf("parseLine(%q): ok = %v, want %v", tt.input, ok, tt.ok) + continue + } + if !ok { + continue + } + if r.negated != tt.negated { + t.Errorf("parseLine(%q): negated = %v, want %v", tt.input, r.negated, tt.negated) + } + if r.dirOnly != tt.dirOnly { + t.Errorf("parseLine(%q): dirOnly = %v, want %v", tt.input, r.dirOnly, tt.dirOnly) + } + if r.anchored != tt.anchored { + t.Errorf("parseLine(%q): anchored = %v, want %v", tt.input, r.anchored, tt.anchored) + } + if r.pattern != tt.pattern { + t.Errorf("parseLine(%q): pattern = %q, want %q", tt.input, r.pattern, tt.pattern) + } + } +} + +// --- globMatch tests --- + +func TestGlobMatch(t *testing.T) { + tests := []struct { + pattern string + name string + match bool + }{ + {"foo", "foo", true}, + {"foo", "bar", false}, + {"*.log", "error.log", true}, + {"*.log", "dir/error.log", false}, + {"*", "anything", true}, + {"*", "a/b", false}, + {"?", "a", true}, + {"?", "ab", false}, + {"[abc]", "a", true}, + {"[abc]", "d", false}, + {"[a-z]", "m", true}, + {"[a-z]", "M", false}, + {"[!a-z]", "M", true}, + {"**", "a/b/c", true}, + {"**/foo", "foo", true}, + {"**/foo", "a/foo", true}, + {"**/foo", "a/b/foo", true}, + {"a/**/b", "a/b", true}, + {"a/**/b", "a/x/b", true}, + {"a/**/b", "a/x/y/b", true}, + {"foo/**", "foo/bar", true}, + {"foo/**", "foo/bar/baz", true}, + } + + for _, tt := range tests { + got := globMatch(tt.pattern, tt.name) + if got != tt.match { + t.Errorf("globMatch(%q, %q) = %v, want %v", tt.pattern, tt.name, got, tt.match) + } + } +}