diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 204ed945..6d6a8324 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -18,8 +18,12 @@ on: paths: - 'docs/src/content/docs/progress/**' - 'docs/astro.config.mjs' + - 'scripts/benchmark_manifest_ops.py' pull_request: - paths: ['docs/**'] + paths: + - 'docs/**' + - 'scripts/benchmark_manifest_ops.py' + - '.github/workflows/docs.yml' workflow_dispatch: permissions: @@ -49,6 +53,9 @@ jobs: working-directory: ./docs run: npm ci + - name: Generate manifest benchmark results + run: python3 scripts/benchmark_manifest_ops.py --work-dir "$RUNNER_TEMP" --markdown --update-doc docs/src/content/docs/progress/autoloop-go-migration.mdx + - name: Build documentation working-directory: ./docs run: npm run build diff --git a/docs/src/content/docs/progress/autoloop-go-migration.mdx b/docs/src/content/docs/progress/autoloop-go-migration.mdx index 51a3eb66..2da7d7cc 100644 --- a/docs/src/content/docs/progress/autoloop-go-migration.mdx +++ b/docs/src/content/docs/progress/autoloop-go-migration.mdx @@ -201,15 +201,19 @@ Autoloop tracks `python_lines_migrated_pct = (migrated_python_lines / original_p ### Manifest operations benchmark (`scripts/benchmark_manifest_ops.py`) -The script `scripts/benchmark_manifest_ops.py` exists in the repository. A local run was attempted but could not complete (permission denied in the sandbox environment). Results from the previous documented run (2026-05-13) are shown below for reference; re-run locally to get current values. +`scripts/benchmark_manifest_ops.py` runs in CI with scratch space rooted in the runner temp directory, and the docs workflow regenerates this block before publishing to GitHub Pages. + +{/* benchmark_manifest_ops:start */} +The table below is generated by `scripts/benchmark_manifest_ops.py` during the docs build. | Scale | `check_collision` speedup | `sync_remove_files` speedup | `cleanup_empty_parents` speedup | Scoped uninstall speedup | |---|---:|---:|---:|---:| -| Current: 10 pkgs, 50 paths | 18.1x | 0.8x | 0.7x | 1.4x | -| Growing: 50 pkgs, 250 paths | 17.4x | 1.6x | 0.5x | 12.2x | -| Large monorepo: 100 pkgs, 2,000 paths | 1,606.6x | 2.2x | 0.6x | 26.0x | +| Current: 10 pkgs, 50 paths | 22.1x | 1.8x | 1.0x | 2.0x | +| Growing: 50 pkgs, 250 paths | 55.4x | 2.1x | 0.5x | 9.2x | +| Large monorepo: 100 pkgs, 2,000 paths | 1018.7x | 1.7x | 0.5x | 17.8x | -`cleanup_empty_parents` shows a small regression at scale (0.5x-0.9x) because the batch bottom-up algorithm has higher constant overhead than the legacy per-file walk-up at low deleted-file counts. This is expected and acceptable given the gains on the other three operations. +`cleanup_empty_parents` may show a small regression at low deleted-file counts because the batch bottom-up algorithm has higher constant overhead than the legacy per-file walk-up. This is expected and acceptable given the gains on the other three operations. +{/* benchmark_manifest_ops:end */} ### Go build/test validation diff --git a/scripts/benchmark_manifest_ops.py b/scripts/benchmark_manifest_ops.py index a7556be3..941249ef 100644 --- a/scripts/benchmark_manifest_ops.py +++ b/scripts/benchmark_manifest_ops.py @@ -8,14 +8,14 @@ - Optimization 4: Scoped uninstall file set (removed packages only) Usage: - uv run python scripts/benchmark_manifest_ops.py + python3 scripts/benchmark_manifest_ops.py + python3 scripts/benchmark_manifest_ops.py --work-dir "$RUNNER_TEMP" --markdown --update-doc docs/src/content/docs/progress/autoloop-go-migration.mdx """ -import os +import argparse +import shutil import tempfile import time -import sys -import shutil from pathlib import Path # --------------------------------------------------------------------------- @@ -35,6 +35,14 @@ PACKAGES = 50 FILES_PER_PACKAGE = 5 INTEGRATOR_TYPES = 6 # prompts, agents-gh, agents-cl, commands, skills, hooks +DOC_START = "{/* benchmark_manifest_ops:start */}" +DOC_END = "{/* benchmark_manifest_ops:end */}" +HELP_DESCRIPTION = "Benchmark manifest-based collision detection and sync operations." +SCALES = [ + ("Current (10 pkgs x 5 files = 50 paths)", "Current: 10 pkgs, 50 paths", 10, 5), + ("Growing (50 pkgs x 5 files = 250 paths)", "Growing: 50 pkgs, 250 paths", 50, 5), + ("Large monorepo (100 pkgs x 20 files = 2000 paths)", "Large monorepo: 100 pkgs, 2,000 paths", 100, 20), +] def build_managed_files(n_packages: int, files_per_pkg: int) -> set: @@ -53,9 +61,7 @@ def build_managed_files(n_packages: int, files_per_pkg: int) -> set: def check_collision_OLD(rel_path: str, managed_files: set) -> bool: """Original O(M) per call — rebuilds normalized set.""" - if rel_path.replace("\\", "/") in {p.replace("\\", "/") for p in managed_files}: - return False - return True + return rel_path.replace("\\", "/") not in {p.replace("\\", "/") for p in managed_files} # --------------------------------------------------------------------------- @@ -68,9 +74,7 @@ def normalize_managed_files(managed_files: set) -> set: def check_collision_NEW(rel_path: str, managed_files_normalized: set) -> bool: """Optimized O(1) lookup against pre-normalized set.""" - if rel_path.replace("\\", "/") in managed_files_normalized: - return False - return True + return rel_path.replace("\\", "/") not in managed_files_normalized # --------------------------------------------------------------------------- @@ -125,39 +129,57 @@ def timeit(fn, *args, iterations: int = 1000) -> float: return (time.perf_counter() - start) * 1000 -def run_benchmarks(): - print("=" * 72) - print("APM Manifest Operations Benchmark") - print("=" * 72) +def _make_temp_dir(work_dir: Path | None) -> Path: + """Create benchmark scratch space inside a caller-approved directory.""" + if work_dir is not None: + work_dir.mkdir(parents=True, exist_ok=True) + return Path( + tempfile.mkdtemp( + prefix="apm-manifest-ops-", + dir=str(work_dir) if work_dir is not None else None, + ) + ) + - for scale_label, n_pkgs, n_files in [ - ("Current (10 pkgs × 5 files = 50 paths)", 10, 5), - ("Growing (50 pkgs × 5 files = 250 paths)", 50, 5), - ("Large monorepo (100 pkgs × 20 files = 2000 paths)", 100, 20), - ]: +def _format_speedup(speedup: float) -> str: + if speedup == float("inf"): + return "inf x" + return f"{speedup:.1f}x" + + +def run_benchmarks(work_dir: Path | None = None, emit_text: bool = True) -> list[dict[str, str]]: + results = [] + if emit_text: + print("=" * 72) + print("APM Manifest Operations Benchmark") + print("=" * 72) + + for scale_label, markdown_label, n_pkgs, n_files in SCALES: managed = build_managed_files(n_pkgs, n_files) M = len(managed) - print(f"\n{'─' * 72}") - print(f"Scale: {scale_label} (M={M})") - print(f"{'─' * 72}") + if emit_text: + print(f"\n{'─' * 72}") + print(f"Scale: {scale_label} (M={M})") + print(f"{'─' * 72}") # -- Benchmark 1: check_collision ---------------------------------- # # Simulate: P=n_pkgs packages × F=n_files files × I=6 integrators # Each call does one collision check. calls = n_pkgs * n_files * INTEGRATOR_TYPES - test_path = f".github/prompts/pkg-0-file-0.md" + test_path = ".github/prompts/pkg-0-file-0.md" old_time = timeit(check_collision_OLD, test_path, managed, iterations=calls) normalized = normalize_managed_files(managed) norm_time = timeit(normalize_managed_files, managed, iterations=1) new_time = norm_time + timeit(check_collision_NEW, test_path, normalized, iterations=calls) - print(f"\n check_collision ({calls:,} calls):") - print(f" OLD (set rebuild per call): {old_time:>8.2f} ms") - print(f" NEW (pre-normalized O(1)): {new_time:>8.2f} ms") speedup = old_time / new_time if new_time > 0 else float("inf") - print(f" Speedup: {speedup:>8.1f}×") + if emit_text: + print(f"\n check_collision ({calls:,} calls):") + print(f" OLD (set rebuild per call): {old_time:>8.2f} ms") + print(f" NEW (pre-normalized O(1)): {new_time:>8.2f} ms") + print(f" Speedup: {speedup:>8.1f}x") # -- Benchmark 2: sync_remove_files -------------------------------- # @@ -182,11 +204,12 @@ def run_benchmarks(): sync_remove_new(buckets[prefix]) new_sync += (time.perf_counter() - t0) * 1000 - print(f"\n sync_remove_files ({iters} uninstall cycles × 6 integrators):") - print(f" OLD (6× full-set scan): {old_sync:>8.2f} ms") - print(f" NEW (pre-partitioned): {new_sync:>8.2f} ms") speedup2 = old_sync / new_sync if new_sync > 0 else float("inf") - print(f" Speedup: {speedup2:>8.1f}×") + if emit_text: + print(f"\n sync_remove_files ({iters} uninstall cycles x 6 integrators):") + print(f" OLD (6x full-set scan): {old_sync:>8.2f} ms") + print(f" NEW (pre-partitioned): {new_sync:>8.2f} ms") + print(f" Speedup: {speedup2:>8.1f}x") # -- Benchmark 3: empty-parent cleanup ---------------------------- # @@ -208,7 +231,7 @@ def _make_tree(base: Path, count: int, nest: int): return paths # OLD: per-file walk-up - tmp_old = Path(tempfile.mkdtemp()) + tmp_old = _make_temp_dir(work_dir) try: files_old = _make_tree(tmp_old, n_deleted, depth) for f in files_old: @@ -230,7 +253,7 @@ def _make_tree(base: Path, count: int, nest: int): shutil.rmtree(tmp_old, ignore_errors=True) # NEW: batch bottom-up - tmp_new = Path(tempfile.mkdtemp()) + tmp_new = _make_temp_dir(work_dir) try: files_new = _make_tree(tmp_new, n_deleted, depth) for f in files_new: @@ -253,11 +276,12 @@ def _make_tree(base: Path, count: int, nest: int): finally: shutil.rmtree(tmp_new, ignore_errors=True) - print(f"\n cleanup_empty_parents ({n_deleted} deleted files, depth={depth}):") - print(f" OLD (per-file walk-up): {old_parent_ms:>8.2f} ms") - print(f" NEW (batch bottom-up): {new_parent_ms:>8.2f} ms") speedup3 = old_parent_ms / new_parent_ms if new_parent_ms > 0 else float("inf") - print(f" Speedup: {speedup3:>8.1f}×") + if emit_text: + print(f"\n cleanup_empty_parents ({n_deleted} deleted files, depth={depth}):") + print(f" OLD (per-file walk-up): {old_parent_ms:>8.2f} ms") + print(f" NEW (batch bottom-up): {new_parent_ms:>8.2f} ms") + print(f" Speedup: {speedup3:>8.1f}x") # -- Benchmark 4: scoped vs. union-all deployed files -------------- # @@ -294,15 +318,93 @@ def _make_tree(base: Path, count: int, nest: int): _ = [p for p in removed_files if p.startswith(prefix)] new_scope_ms = (time.perf_counter() - t0) * 1000 - print(f"\n scoped uninstall set (removing {removed_count}/{n_pkgs} pkgs, {iters4} cycles):") - print(f" OLD (union ALL {len(all_files)} paths): {old_scope_ms:>8.2f} ms") - print(f" NEW (union removed {len(removed_files)} paths): {new_scope_ms:>8.2f} ms") speedup4 = old_scope_ms / new_scope_ms if new_scope_ms > 0 else float("inf") - print(f" Speedup: {speedup4:>8.1f}×") - - print(f"\n{'=' * 72}") - print("Done.") + if emit_text: + print(f"\n scoped uninstall set (removing {removed_count}/{n_pkgs} pkgs, {iters4} cycles):") + print(f" OLD (union ALL {len(all_files)} paths): {old_scope_ms:>8.2f} ms") + print(f" NEW (union removed {len(removed_files)} paths): {new_scope_ms:>8.2f} ms") + print(f" Speedup: {speedup4:>8.1f}x") + + results.append( + { + "scale": markdown_label, + "check_collision": _format_speedup(speedup), + "sync_remove_files": _format_speedup(speedup2), + "cleanup_empty_parents": _format_speedup(speedup3), + "scoped_uninstall": _format_speedup(speedup4), + } + ) + + if emit_text: + print(f"\n{'=' * 72}") + print("Done.") + + return results + + +def render_markdown(results: list[dict[str, str]]) -> str: + lines = [ + DOC_START, + "The table below is generated by `scripts/benchmark_manifest_ops.py` during the docs build.", + "", + "| Scale | `check_collision` speedup | `sync_remove_files` speedup | `cleanup_empty_parents` speedup | Scoped uninstall speedup |", + "|---|---:|---:|---:|---:|", + ] + for result in results: + lines.append( + f"| {result['scale']} | {result['check_collision']} | {result['sync_remove_files']} | " + f"{result['cleanup_empty_parents']} | {result['scoped_uninstall']} |" + ) + lines.extend( + [ + "", + "`cleanup_empty_parents` may show a small regression at low deleted-file counts because the batch bottom-up algorithm has higher constant overhead than the legacy per-file walk-up. This is expected and acceptable given the gains on the other three operations.", + DOC_END, + ] + ) + return "\n".join(lines) + + +def update_doc(path: Path, markdown: str) -> None: + content = path.read_text(encoding="utf-8") + if DOC_START not in content: + raise ValueError(f"Could not update {path}: missing {DOC_START} marker") + start = content.index(DOC_START) + if DOC_END not in content[start:]: + raise ValueError(f"Could not update {path}: missing {DOC_END} marker") + end = content.index(DOC_END, start) + len(DOC_END) + path.write_text(f"{content[:start]}{markdown}{content[end:]}", encoding="utf-8") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=HELP_DESCRIPTION) + parser.add_argument( + "--work-dir", + type=Path, + default=None, + help="Writable directory for temporary benchmark trees (defaults to Python's temp dir).", + ) + parser.add_argument( + "--markdown", + action="store_true", + help="Emit the docs markdown benchmark table instead of the console report.", + ) + parser.add_argument( + "--update-doc", + type=Path, + default=None, + help="Replace the generated benchmark block in the given docs page.", + ) + return parser.parse_args() if __name__ == "__main__": - run_benchmarks() + args = parse_args() + benchmark_results = run_benchmarks(work_dir=args.work_dir, emit_text=not args.markdown) + if args.markdown or args.update_doc is not None: + benchmark_markdown = render_markdown(benchmark_results) + if args.update_doc is not None: + update_doc(args.update_doc, benchmark_markdown) + print(f"Updated {args.update_doc}") + else: + print(benchmark_markdown) diff --git a/tests/scripts/test_benchmark_manifest_ops.py b/tests/scripts/test_benchmark_manifest_ops.py new file mode 100644 index 00000000..3ee32d43 --- /dev/null +++ b/tests/scripts/test_benchmark_manifest_ops.py @@ -0,0 +1,54 @@ +import importlib.util +from pathlib import Path + + +def load_benchmark_module(): + script_path = Path(__file__).resolve().parents[2] / "scripts" / "benchmark_manifest_ops.py" + spec = importlib.util.spec_from_file_location("benchmark_manifest_ops", script_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_run_benchmarks_uses_configured_work_dir(tmp_path, monkeypatch): + benchmark = load_benchmark_module() + created_paths = [] + original_mkdtemp = benchmark.tempfile.mkdtemp + + def recording_mkdtemp(*args, **kwargs): + path = Path(original_mkdtemp(*args, **kwargs)) + created_paths.append(path) + return str(path) + + monkeypatch.setattr(benchmark.tempfile, "mkdtemp", recording_mkdtemp) + + results = benchmark.run_benchmarks(work_dir=tmp_path, emit_text=False) + + assert [result["scale"] for result in results] == [ + "Current: 10 pkgs, 50 paths", + "Growing: 50 pkgs, 250 paths", + "Large monorepo: 100 pkgs, 2,000 paths", + ] + assert created_paths + paths_outside_work_dir = [path for path in created_paths if not path.is_relative_to(tmp_path)] + assert not paths_outside_work_dir, f"Paths outside work dir: {paths_outside_work_dir}" + assert not any(tmp_path.iterdir()) + + markdown = benchmark.render_markdown(results) + assert benchmark.DOC_START in markdown + assert benchmark.DOC_END in markdown + assert "| Current: 10 pkgs, 50 paths |" in markdown + + +def test_update_doc_replaces_generated_block(tmp_path): + benchmark = load_benchmark_module() + doc_path = tmp_path / "page.mdx" + doc_path.write_text( + f"before\n{benchmark.DOC_START}\nold content\n{benchmark.DOC_END}\nafter\n" + ) + + benchmark.update_doc(doc_path, f"{benchmark.DOC_START}\nnew content\n{benchmark.DOC_END}") + + assert doc_path.read_text() == ( + f"before\n{benchmark.DOC_START}\nnew content\n{benchmark.DOC_END}\nafter\n" + )