Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

## 2024-05-24 - Swift Actor Isolation limits parallel synchronous I/O
**Learning:** Using `actor` for parallel scanners (like `CacheScanner` and `NodeModulesScanner`) that launch background tasks with `withTaskGroup` severely limits concurrency if those tasks run synchronous file I/O operations directly on the actor. Because `actor` methods are strictly serialized, `group.addTask { await self.synchronousMethod() }` forces the synchronous work to execute one-by-one, destroying the intended parallelism of `withTaskGroup`.
**Action:** For concurrent file system scanners that only rely on thread-safe dependencies (like `FileManager.default`) and don't maintain mutable internal state, use a `struct` instead of an `actor`. By making the inner scanning functions synchronous and isolated to the `Task` rather than an `actor` instance, the standard `TaskGroup` parallelism operates freely across CPU cores.
10 changes: 4 additions & 6 deletions Sources/Cacheout/Scanner/CacheScanner.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,11 @@

import Foundation

actor CacheScanner {
private let fileManager = FileManager.default

struct CacheScanner {
func scanAll(_ categories: [CacheCategory]) async -> [ScanResult] {
await withTaskGroup(of: ScanResult.self) { group in
for category in categories {
group.addTask { await self.scanCategory(category) }
group.addTask { self.scanCategory(category) }
}
var results: [ScanResult] = []
for await result in group {
Expand All @@ -42,7 +40,7 @@ actor CacheScanner {
}
}

func scanCategory(_ category: CacheCategory) async -> ScanResult {
func scanCategory(_ category: CacheCategory) -> ScanResult {
let resolvedPaths = category.resolvedPaths
guard !resolvedPaths.isEmpty else {
return ScanResult(category: category, sizeBytes: 0, itemCount: 0, exists: false)
Expand Down Expand Up @@ -70,7 +68,7 @@ actor CacheScanner {
var itemCount = 0

// Use allocatedSizeOfDirectory for actual disk usage (handles sparse files)
guard let enumerator = fileManager.enumerator(
guard let enumerator = FileManager.default.enumerator(
at: url,
includingPropertiesForKeys: [.totalFileAllocatedSizeKey, .fileAllocatedSizeKey],
options: [.skipsHiddenFiles, .skipsPackageDescendants]
Expand Down
22 changes: 10 additions & 12 deletions Sources/Cacheout/Scanner/NodeModulesScanner.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,7 @@

import Foundation

actor NodeModulesScanner {
private let fileManager = FileManager.default

struct NodeModulesScanner {
/// Common directories where developers keep projects
private static let searchRoots: [String] = [
"Documents",
Expand All @@ -53,16 +51,16 @@ actor NodeModulesScanner {
]

func scan(maxDepth: Int = 6) async -> [NodeModulesItem] {
let home = fileManager.homeDirectoryForCurrentUser
let home = FileManager.default.homeDirectoryForCurrentUser
var allItems: [NodeModulesItem] = []

// Scan each search root in parallel
await withTaskGroup(of: [NodeModulesItem].self) { group in
for root in Self.searchRoots {
let rootURL = home.appendingPathComponent(root)
guard fileManager.fileExists(atPath: rootURL.path) else { continue }
guard FileManager.default.fileExists(atPath: rootURL.path) else { continue }
group.addTask {
await self.findNodeModules(in: rootURL, maxDepth: maxDepth)
self.findNodeModules(in: rootURL, maxDepth: maxDepth)
}
}
for await items in group {
Expand All @@ -77,18 +75,18 @@ actor NodeModulesScanner {
.sorted { $0.sizeBytes > $1.sizeBytes }
}

private func findNodeModules(in directory: URL, maxDepth: Int, currentDepth: Int = 0) async -> [NodeModulesItem] {
private func findNodeModules(in directory: URL, maxDepth: Int, currentDepth: Int = 0) -> [NodeModulesItem] {
guard currentDepth < maxDepth else { return [] }

var results: [NodeModulesItem] = []
let nodeModulesURL = directory.appendingPathComponent("node_modules")

// Check if this directory contains node_modules
var isDir: ObjCBool = false
if fileManager.fileExists(atPath: nodeModulesURL.path, isDirectory: &isDir), isDir.boolValue {
if FileManager.default.fileExists(atPath: nodeModulesURL.path, isDirectory: &isDir), isDir.boolValue {
let size = directorySize(at: nodeModulesURL)
if size > 0 {
let lastMod = try? fileManager.attributesOfItem(atPath: nodeModulesURL.path)[.modificationDate] as? Date
let lastMod = try? FileManager.default.attributesOfItem(atPath: nodeModulesURL.path)[.modificationDate] as? Date
let projectName = directory.lastPathComponent
results.append(NodeModulesItem(
projectName: projectName,
Expand All @@ -103,7 +101,7 @@ actor NodeModulesScanner {
}

// Recurse into subdirectories
guard let contents = try? fileManager.contentsOfDirectory(
guard let contents = try? FileManager.default.contentsOfDirectory(
at: directory,
includingPropertiesForKeys: [.isDirectoryKey],
options: [.skipsHiddenFiles]
Expand All @@ -113,7 +111,7 @@ actor NodeModulesScanner {
let name = item.lastPathComponent
guard !Self.skipDirs.contains(name) else { continue }
guard (try? item.resourceValues(forKeys: [.isDirectoryKey]).isDirectory) == true else { continue }
let subResults = await findNodeModules(in: item, maxDepth: maxDepth, currentDepth: currentDepth + 1)
let subResults = self.findNodeModules(in: item, maxDepth: maxDepth, currentDepth: currentDepth + 1)
results.append(contentsOf: subResults)
}

Expand All @@ -122,7 +120,7 @@ actor NodeModulesScanner {

private func directorySize(at url: URL) -> Int64 {
var total: Int64 = 0
guard let enumerator = fileManager.enumerator(
guard let enumerator = FileManager.default.enumerator(
at: url,
includingPropertiesForKeys: [.totalFileAllocatedSizeKey],
options: [.skipsHiddenFiles]
Expand Down