diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..b99881e --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,4 @@ + +## 2024-05-24 - Swift Actor Isolation limits parallel synchronous I/O +**Learning:** Using `actor` for parallel scanners (like `CacheScanner` and `NodeModulesScanner`) that launch background tasks with `withTaskGroup` severely limits concurrency if those tasks run synchronous file I/O operations directly on the actor. Because `actor` methods are strictly serialized, `group.addTask { await self.synchronousMethod() }` forces the synchronous work to execute one-by-one, destroying the intended parallelism of `withTaskGroup`. +**Action:** For concurrent file system scanners that only rely on thread-safe dependencies (like `FileManager.default`) and don't maintain mutable internal state, use a `struct` instead of an `actor`. By making the inner scanning functions synchronous and isolated to the `Task` rather than an `actor` instance, the standard `TaskGroup` parallelism operates freely across CPU cores. diff --git a/Sources/Cacheout/Scanner/CacheScanner.swift b/Sources/Cacheout/Scanner/CacheScanner.swift index 3ce3e9c..6710c87 100644 --- a/Sources/Cacheout/Scanner/CacheScanner.swift +++ b/Sources/Cacheout/Scanner/CacheScanner.swift @@ -26,13 +26,11 @@ import Foundation -actor CacheScanner { - private let fileManager = FileManager.default - +struct CacheScanner { func scanAll(_ categories: [CacheCategory]) async -> [ScanResult] { await withTaskGroup(of: ScanResult.self) { group in for category in categories { - group.addTask { await self.scanCategory(category) } + group.addTask { self.scanCategory(category) } } var results: [ScanResult] = [] for await result in group { @@ -42,7 +40,7 @@ actor CacheScanner { } } - func scanCategory(_ category: CacheCategory) async -> ScanResult { + func scanCategory(_ category: CacheCategory) -> ScanResult { let resolvedPaths = category.resolvedPaths guard !resolvedPaths.isEmpty else { return ScanResult(category: category, sizeBytes: 0, itemCount: 0, exists: false) @@ -70,7 +68,7 @@ actor CacheScanner { var itemCount = 0 // Use allocatedSizeOfDirectory for actual disk usage (handles sparse files) - guard let enumerator = fileManager.enumerator( + guard let enumerator = FileManager.default.enumerator( at: url, includingPropertiesForKeys: [.totalFileAllocatedSizeKey, .fileAllocatedSizeKey], options: [.skipsHiddenFiles, .skipsPackageDescendants] diff --git a/Sources/Cacheout/Scanner/NodeModulesScanner.swift b/Sources/Cacheout/Scanner/NodeModulesScanner.swift index 3ed4d8c..e4122f6 100644 --- a/Sources/Cacheout/Scanner/NodeModulesScanner.swift +++ b/Sources/Cacheout/Scanner/NodeModulesScanner.swift @@ -28,9 +28,7 @@ import Foundation -actor NodeModulesScanner { - private let fileManager = FileManager.default - +struct NodeModulesScanner { /// Common directories where developers keep projects private static let searchRoots: [String] = [ "Documents", @@ -53,16 +51,16 @@ actor NodeModulesScanner { ] func scan(maxDepth: Int = 6) async -> [NodeModulesItem] { - let home = fileManager.homeDirectoryForCurrentUser + let home = FileManager.default.homeDirectoryForCurrentUser var allItems: [NodeModulesItem] = [] // Scan each search root in parallel await withTaskGroup(of: [NodeModulesItem].self) { group in for root in Self.searchRoots { let rootURL = home.appendingPathComponent(root) - guard fileManager.fileExists(atPath: rootURL.path) else { continue } + guard FileManager.default.fileExists(atPath: rootURL.path) else { continue } group.addTask { - await self.findNodeModules(in: rootURL, maxDepth: maxDepth) + self.findNodeModules(in: rootURL, maxDepth: maxDepth) } } for await items in group { @@ -77,7 +75,7 @@ actor NodeModulesScanner { .sorted { $0.sizeBytes > $1.sizeBytes } } - private func findNodeModules(in directory: URL, maxDepth: Int, currentDepth: Int = 0) async -> [NodeModulesItem] { + private func findNodeModules(in directory: URL, maxDepth: Int, currentDepth: Int = 0) -> [NodeModulesItem] { guard currentDepth < maxDepth else { return [] } var results: [NodeModulesItem] = [] @@ -85,10 +83,10 @@ actor NodeModulesScanner { // Check if this directory contains node_modules var isDir: ObjCBool = false - if fileManager.fileExists(atPath: nodeModulesURL.path, isDirectory: &isDir), isDir.boolValue { + if FileManager.default.fileExists(atPath: nodeModulesURL.path, isDirectory: &isDir), isDir.boolValue { let size = directorySize(at: nodeModulesURL) if size > 0 { - let lastMod = try? fileManager.attributesOfItem(atPath: nodeModulesURL.path)[.modificationDate] as? Date + let lastMod = try? FileManager.default.attributesOfItem(atPath: nodeModulesURL.path)[.modificationDate] as? Date let projectName = directory.lastPathComponent results.append(NodeModulesItem( projectName: projectName, @@ -103,7 +101,7 @@ actor NodeModulesScanner { } // Recurse into subdirectories - guard let contents = try? fileManager.contentsOfDirectory( + guard let contents = try? FileManager.default.contentsOfDirectory( at: directory, includingPropertiesForKeys: [.isDirectoryKey], options: [.skipsHiddenFiles] @@ -113,7 +111,7 @@ actor NodeModulesScanner { let name = item.lastPathComponent guard !Self.skipDirs.contains(name) else { continue } guard (try? item.resourceValues(forKeys: [.isDirectoryKey]).isDirectory) == true else { continue } - let subResults = await findNodeModules(in: item, maxDepth: maxDepth, currentDepth: currentDepth + 1) + let subResults = self.findNodeModules(in: item, maxDepth: maxDepth, currentDepth: currentDepth + 1) results.append(contentsOf: subResults) } @@ -122,7 +120,7 @@ actor NodeModulesScanner { private func directorySize(at url: URL) -> Int64 { var total: Int64 = 0 - guard let enumerator = fileManager.enumerator( + guard let enumerator = FileManager.default.enumerator( at: url, includingPropertiesForKeys: [.totalFileAllocatedSizeKey], options: [.skipsHiddenFiles]