From 7858c178579a6023e47d0c4690dc5ec2188ef89a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 15 May 2026 03:42:42 +0000 Subject: [PATCH 1/4] Initial plan From 3aea6cf03da1dd892fb48e8502b35f3b06ff7534 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 15 May 2026 03:48:49 +0000 Subject: [PATCH 2/4] fix benchmark docs generation Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com> --- .github/workflows/docs.yml | 9 +- .../docs/progress/autoloop-go-migration.mdx | 14 +- scripts/benchmark_manifest_ops.py | 187 +++++++++++++----- tests/scripts/test_benchmark_manifest_ops.py | 42 ++++ 4 files changed, 201 insertions(+), 51 deletions(-) create mode 100644 tests/scripts/test_benchmark_manifest_ops.py diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 204ed945..6d6a8324 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -18,8 +18,12 @@ on: paths: - 'docs/src/content/docs/progress/**' - 'docs/astro.config.mjs' + - 'scripts/benchmark_manifest_ops.py' pull_request: - paths: ['docs/**'] + paths: + - 'docs/**' + - 'scripts/benchmark_manifest_ops.py' + - '.github/workflows/docs.yml' workflow_dispatch: permissions: @@ -49,6 +53,9 @@ jobs: working-directory: ./docs run: npm ci + - name: Generate manifest benchmark results + run: python3 scripts/benchmark_manifest_ops.py --work-dir "$RUNNER_TEMP" --markdown --update-doc docs/src/content/docs/progress/autoloop-go-migration.mdx + - name: Build documentation working-directory: ./docs run: npm run build diff --git a/docs/src/content/docs/progress/autoloop-go-migration.mdx b/docs/src/content/docs/progress/autoloop-go-migration.mdx index 51a3eb66..2da7d7cc 100644 --- a/docs/src/content/docs/progress/autoloop-go-migration.mdx +++ b/docs/src/content/docs/progress/autoloop-go-migration.mdx @@ -201,15 +201,19 @@ Autoloop tracks `python_lines_migrated_pct = (migrated_python_lines / original_p ### Manifest operations benchmark (`scripts/benchmark_manifest_ops.py`) -The script `scripts/benchmark_manifest_ops.py` exists in the repository. A local run was attempted but could not complete (permission denied in the sandbox environment). Results from the previous documented run (2026-05-13) are shown below for reference; re-run locally to get current values. +`scripts/benchmark_manifest_ops.py` runs in CI with scratch space rooted in the runner temp directory, and the docs workflow regenerates this block before publishing to GitHub Pages. + +{/* benchmark_manifest_ops:start */} +The table below is generated by `scripts/benchmark_manifest_ops.py` during the docs build. | Scale | `check_collision` speedup | `sync_remove_files` speedup | `cleanup_empty_parents` speedup | Scoped uninstall speedup | |---|---:|---:|---:|---:| -| Current: 10 pkgs, 50 paths | 18.1x | 0.8x | 0.7x | 1.4x | -| Growing: 50 pkgs, 250 paths | 17.4x | 1.6x | 0.5x | 12.2x | -| Large monorepo: 100 pkgs, 2,000 paths | 1,606.6x | 2.2x | 0.6x | 26.0x | +| Current: 10 pkgs, 50 paths | 22.1x | 1.8x | 1.0x | 2.0x | +| Growing: 50 pkgs, 250 paths | 55.4x | 2.1x | 0.5x | 9.2x | +| Large monorepo: 100 pkgs, 2,000 paths | 1018.7x | 1.7x | 0.5x | 17.8x | -`cleanup_empty_parents` shows a small regression at scale (0.5x-0.9x) because the batch bottom-up algorithm has higher constant overhead than the legacy per-file walk-up at low deleted-file counts. This is expected and acceptable given the gains on the other three operations. +`cleanup_empty_parents` may show a small regression at low deleted-file counts because the batch bottom-up algorithm has higher constant overhead than the legacy per-file walk-up. This is expected and acceptable given the gains on the other three operations. +{/* benchmark_manifest_ops:end */} ### Go build/test validation diff --git a/scripts/benchmark_manifest_ops.py b/scripts/benchmark_manifest_ops.py index a7556be3..3f01d1fb 100644 --- a/scripts/benchmark_manifest_ops.py +++ b/scripts/benchmark_manifest_ops.py @@ -8,14 +8,14 @@ - Optimization 4: Scoped uninstall file set (removed packages only) Usage: - uv run python scripts/benchmark_manifest_ops.py + python3 scripts/benchmark_manifest_ops.py + python3 scripts/benchmark_manifest_ops.py --work-dir "$RUNNER_TEMP" --markdown --update-doc docs/src/content/docs/progress/autoloop-go-migration.mdx """ -import os +import argparse +import shutil import tempfile import time -import sys -import shutil from pathlib import Path # --------------------------------------------------------------------------- @@ -35,6 +35,13 @@ PACKAGES = 50 FILES_PER_PACKAGE = 5 INTEGRATOR_TYPES = 6 # prompts, agents-gh, agents-cl, commands, skills, hooks +DOC_START = "{/* benchmark_manifest_ops:start */}" +DOC_END = "{/* benchmark_manifest_ops:end */}" +SCALES = [ + ("Current (10 pkgs x 5 files = 50 paths)", "Current: 10 pkgs, 50 paths", 10, 5), + ("Growing (50 pkgs x 5 files = 250 paths)", "Growing: 50 pkgs, 250 paths", 50, 5), + ("Large monorepo (100 pkgs x 20 files = 2000 paths)", "Large monorepo: 100 pkgs, 2,000 paths", 100, 20), +] def build_managed_files(n_packages: int, files_per_pkg: int) -> set: @@ -53,9 +60,7 @@ def build_managed_files(n_packages: int, files_per_pkg: int) -> set: def check_collision_OLD(rel_path: str, managed_files: set) -> bool: """Original O(M) per call — rebuilds normalized set.""" - if rel_path.replace("\\", "/") in {p.replace("\\", "/") for p in managed_files}: - return False - return True + return rel_path.replace("\\", "/") not in {p.replace("\\", "/") for p in managed_files} # --------------------------------------------------------------------------- @@ -68,9 +73,7 @@ def normalize_managed_files(managed_files: set) -> set: def check_collision_NEW(rel_path: str, managed_files_normalized: set) -> bool: """Optimized O(1) lookup against pre-normalized set.""" - if rel_path.replace("\\", "/") in managed_files_normalized: - return False - return True + return rel_path.replace("\\", "/") not in managed_files_normalized # --------------------------------------------------------------------------- @@ -125,39 +128,57 @@ def timeit(fn, *args, iterations: int = 1000) -> float: return (time.perf_counter() - start) * 1000 -def run_benchmarks(): - print("=" * 72) - print("APM Manifest Operations Benchmark") - print("=" * 72) +def _make_temp_dir(work_dir: Path | None) -> Path: + """Create benchmark scratch space inside a caller-approved directory.""" + if work_dir is not None: + work_dir.mkdir(parents=True, exist_ok=True) + return Path( + tempfile.mkdtemp( + prefix="apm-manifest-ops-", + dir=str(work_dir) if work_dir is not None else None, + ) + ) + - for scale_label, n_pkgs, n_files in [ - ("Current (10 pkgs × 5 files = 50 paths)", 10, 5), - ("Growing (50 pkgs × 5 files = 250 paths)", 50, 5), - ("Large monorepo (100 pkgs × 20 files = 2000 paths)", 100, 20), - ]: +def _format_speedup(speedup: float) -> str: + if speedup == float("inf"): + return "∞x" + return f"{speedup:.1f}x" + + +def run_benchmarks(work_dir: Path | None = None, emit_text: bool = True) -> list[dict[str, str]]: + results = [] + if emit_text: + print("=" * 72) + print("APM Manifest Operations Benchmark") + print("=" * 72) + + for scale_label, markdown_label, n_pkgs, n_files in SCALES: managed = build_managed_files(n_pkgs, n_files) M = len(managed) - print(f"\n{'─' * 72}") - print(f"Scale: {scale_label} (M={M})") - print(f"{'─' * 72}") + if emit_text: + print(f"\n{'─' * 72}") + print(f"Scale: {scale_label} (M={M})") + print(f"{'─' * 72}") # -- Benchmark 1: check_collision ---------------------------------- # # Simulate: P=n_pkgs packages × F=n_files files × I=6 integrators # Each call does one collision check. calls = n_pkgs * n_files * INTEGRATOR_TYPES - test_path = f".github/prompts/pkg-0-file-0.md" + test_path = ".github/prompts/pkg-0-file-0.md" old_time = timeit(check_collision_OLD, test_path, managed, iterations=calls) normalized = normalize_managed_files(managed) norm_time = timeit(normalize_managed_files, managed, iterations=1) new_time = norm_time + timeit(check_collision_NEW, test_path, normalized, iterations=calls) - print(f"\n check_collision ({calls:,} calls):") - print(f" OLD (set rebuild per call): {old_time:>8.2f} ms") - print(f" NEW (pre-normalized O(1)): {new_time:>8.2f} ms") speedup = old_time / new_time if new_time > 0 else float("inf") - print(f" Speedup: {speedup:>8.1f}×") + if emit_text: + print(f"\n check_collision ({calls:,} calls):") + print(f" OLD (set rebuild per call): {old_time:>8.2f} ms") + print(f" NEW (pre-normalized O(1)): {new_time:>8.2f} ms") + print(f" Speedup: {speedup:>8.1f}x") # -- Benchmark 2: sync_remove_files -------------------------------- # @@ -182,11 +203,12 @@ def run_benchmarks(): sync_remove_new(buckets[prefix]) new_sync += (time.perf_counter() - t0) * 1000 - print(f"\n sync_remove_files ({iters} uninstall cycles × 6 integrators):") - print(f" OLD (6× full-set scan): {old_sync:>8.2f} ms") - print(f" NEW (pre-partitioned): {new_sync:>8.2f} ms") speedup2 = old_sync / new_sync if new_sync > 0 else float("inf") - print(f" Speedup: {speedup2:>8.1f}×") + if emit_text: + print(f"\n sync_remove_files ({iters} uninstall cycles x 6 integrators):") + print(f" OLD (6x full-set scan): {old_sync:>8.2f} ms") + print(f" NEW (pre-partitioned): {new_sync:>8.2f} ms") + print(f" Speedup: {speedup2:>8.1f}x") # -- Benchmark 3: empty-parent cleanup ---------------------------- # @@ -208,7 +230,7 @@ def _make_tree(base: Path, count: int, nest: int): return paths # OLD: per-file walk-up - tmp_old = Path(tempfile.mkdtemp()) + tmp_old = _make_temp_dir(work_dir) try: files_old = _make_tree(tmp_old, n_deleted, depth) for f in files_old: @@ -230,7 +252,7 @@ def _make_tree(base: Path, count: int, nest: int): shutil.rmtree(tmp_old, ignore_errors=True) # NEW: batch bottom-up - tmp_new = Path(tempfile.mkdtemp()) + tmp_new = _make_temp_dir(work_dir) try: files_new = _make_tree(tmp_new, n_deleted, depth) for f in files_new: @@ -253,11 +275,12 @@ def _make_tree(base: Path, count: int, nest: int): finally: shutil.rmtree(tmp_new, ignore_errors=True) - print(f"\n cleanup_empty_parents ({n_deleted} deleted files, depth={depth}):") - print(f" OLD (per-file walk-up): {old_parent_ms:>8.2f} ms") - print(f" NEW (batch bottom-up): {new_parent_ms:>8.2f} ms") speedup3 = old_parent_ms / new_parent_ms if new_parent_ms > 0 else float("inf") - print(f" Speedup: {speedup3:>8.1f}×") + if emit_text: + print(f"\n cleanup_empty_parents ({n_deleted} deleted files, depth={depth}):") + print(f" OLD (per-file walk-up): {old_parent_ms:>8.2f} ms") + print(f" NEW (batch bottom-up): {new_parent_ms:>8.2f} ms") + print(f" Speedup: {speedup3:>8.1f}x") # -- Benchmark 4: scoped vs. union-all deployed files -------------- # @@ -294,15 +317,89 @@ def _make_tree(base: Path, count: int, nest: int): _ = [p for p in removed_files if p.startswith(prefix)] new_scope_ms = (time.perf_counter() - t0) * 1000 - print(f"\n scoped uninstall set (removing {removed_count}/{n_pkgs} pkgs, {iters4} cycles):") - print(f" OLD (union ALL {len(all_files)} paths): {old_scope_ms:>8.2f} ms") - print(f" NEW (union removed {len(removed_files)} paths): {new_scope_ms:>8.2f} ms") speedup4 = old_scope_ms / new_scope_ms if new_scope_ms > 0 else float("inf") - print(f" Speedup: {speedup4:>8.1f}×") - - print(f"\n{'=' * 72}") - print("Done.") + if emit_text: + print(f"\n scoped uninstall set (removing {removed_count}/{n_pkgs} pkgs, {iters4} cycles):") + print(f" OLD (union ALL {len(all_files)} paths): {old_scope_ms:>8.2f} ms") + print(f" NEW (union removed {len(removed_files)} paths): {new_scope_ms:>8.2f} ms") + print(f" Speedup: {speedup4:>8.1f}x") + + results.append( + { + "scale": markdown_label, + "check_collision": _format_speedup(speedup), + "sync_remove_files": _format_speedup(speedup2), + "cleanup_empty_parents": _format_speedup(speedup3), + "scoped_uninstall": _format_speedup(speedup4), + } + ) + + if emit_text: + print(f"\n{'=' * 72}") + print("Done.") + + return results + + +def render_markdown(results: list[dict[str, str]]) -> str: + lines = [ + DOC_START, + "The table below is generated by `scripts/benchmark_manifest_ops.py` during the docs build.", + "", + "| Scale | `check_collision` speedup | `sync_remove_files` speedup | `cleanup_empty_parents` speedup | Scoped uninstall speedup |", + "|---|---:|---:|---:|---:|", + ] + for result in results: + lines.append( + f"| {result['scale']} | {result['check_collision']} | {result['sync_remove_files']} | " + f"{result['cleanup_empty_parents']} | {result['scoped_uninstall']} |" + ) + lines.extend( + [ + "", + "`cleanup_empty_parents` may show a small regression at low deleted-file counts because the batch bottom-up algorithm has higher constant overhead than the legacy per-file walk-up. This is expected and acceptable given the gains on the other three operations.", + DOC_END, + ] + ) + return "\n".join(lines) + + +def update_doc(path: Path, markdown: str) -> None: + content = path.read_text() + start = content.index(DOC_START) + end = content.index(DOC_END, start) + len(DOC_END) + path.write_text(f"{content[:start]}{markdown}{content[end:]}") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--work-dir", + type=Path, + default=None, + help="Writable directory for temporary benchmark trees (defaults to Python's temp dir).", + ) + parser.add_argument( + "--markdown", + action="store_true", + help="Emit the docs markdown benchmark table instead of the console report.", + ) + parser.add_argument( + "--update-doc", + type=Path, + default=None, + help="Replace the generated benchmark block in the given docs page.", + ) + return parser.parse_args() if __name__ == "__main__": - run_benchmarks() + args = parse_args() + benchmark_results = run_benchmarks(work_dir=args.work_dir, emit_text=not args.markdown) + if args.markdown or args.update_doc is not None: + benchmark_markdown = render_markdown(benchmark_results) + if args.update_doc is not None: + update_doc(args.update_doc, benchmark_markdown) + print(f"Updated {args.update_doc}") + else: + print(benchmark_markdown) diff --git a/tests/scripts/test_benchmark_manifest_ops.py b/tests/scripts/test_benchmark_manifest_ops.py new file mode 100644 index 00000000..c40c3f08 --- /dev/null +++ b/tests/scripts/test_benchmark_manifest_ops.py @@ -0,0 +1,42 @@ +import importlib.util +from pathlib import Path + + +def load_benchmark_module(): + script_path = Path(__file__).resolve().parents[2] / "scripts" / "benchmark_manifest_ops.py" + spec = importlib.util.spec_from_file_location("benchmark_manifest_ops", script_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_run_benchmarks_uses_configured_work_dir(tmp_path): + benchmark = load_benchmark_module() + + results = benchmark.run_benchmarks(work_dir=tmp_path, emit_text=False) + + assert [result["scale"] for result in results] == [ + "Current: 10 pkgs, 50 paths", + "Growing: 50 pkgs, 250 paths", + "Large monorepo: 100 pkgs, 2,000 paths", + ] + assert not any(tmp_path.iterdir()) + + markdown = benchmark.render_markdown(results) + assert benchmark.DOC_START in markdown + assert benchmark.DOC_END in markdown + assert "| Current: 10 pkgs, 50 paths |" in markdown + + +def test_update_doc_replaces_generated_block(tmp_path): + benchmark = load_benchmark_module() + doc_path = tmp_path / "page.mdx" + doc_path.write_text( + f"before\n{benchmark.DOC_START}\nold content\n{benchmark.DOC_END}\nafter\n" + ) + + benchmark.update_doc(doc_path, f"{benchmark.DOC_START}\nnew content\n{benchmark.DOC_END}") + + assert doc_path.read_text() == ( + f"before\n{benchmark.DOC_START}\nnew content\n{benchmark.DOC_END}\nafter\n" + ) From 9b0d74df980901d489c92412f84f3362a9e49128 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 15 May 2026 03:50:22 +0000 Subject: [PATCH 3/4] address benchmark review feedback Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com> --- scripts/benchmark_manifest_ops.py | 6 +++++- tests/scripts/test_benchmark_manifest_ops.py | 13 ++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/scripts/benchmark_manifest_ops.py b/scripts/benchmark_manifest_ops.py index 3f01d1fb..e4d9f01c 100644 --- a/scripts/benchmark_manifest_ops.py +++ b/scripts/benchmark_manifest_ops.py @@ -142,7 +142,7 @@ def _make_temp_dir(work_dir: Path | None) -> Path: def _format_speedup(speedup: float) -> str: if speedup == float("inf"): - return "∞x" + return "inf x" return f"{speedup:.1f}x" @@ -366,7 +366,11 @@ def render_markdown(results: list[dict[str, str]]) -> str: def update_doc(path: Path, markdown: str) -> None: content = path.read_text() + if DOC_START not in content: + raise ValueError(f"Could not update {path}: missing {DOC_START} marker") start = content.index(DOC_START) + if DOC_END not in content[start:]: + raise ValueError(f"Could not update {path}: missing {DOC_END} marker") end = content.index(DOC_END, start) + len(DOC_END) path.write_text(f"{content[:start]}{markdown}{content[end:]}") diff --git a/tests/scripts/test_benchmark_manifest_ops.py b/tests/scripts/test_benchmark_manifest_ops.py index c40c3f08..d2c7519f 100644 --- a/tests/scripts/test_benchmark_manifest_ops.py +++ b/tests/scripts/test_benchmark_manifest_ops.py @@ -10,8 +10,17 @@ def load_benchmark_module(): return module -def test_run_benchmarks_uses_configured_work_dir(tmp_path): +def test_run_benchmarks_uses_configured_work_dir(tmp_path, monkeypatch): benchmark = load_benchmark_module() + created_paths = [] + original_mkdtemp = benchmark.tempfile.mkdtemp + + def recording_mkdtemp(*args, **kwargs): + path = Path(original_mkdtemp(*args, **kwargs)) + created_paths.append(path) + return str(path) + + monkeypatch.setattr(benchmark.tempfile, "mkdtemp", recording_mkdtemp) results = benchmark.run_benchmarks(work_dir=tmp_path, emit_text=False) @@ -20,6 +29,8 @@ def test_run_benchmarks_uses_configured_work_dir(tmp_path): "Growing: 50 pkgs, 250 paths", "Large monorepo: 100 pkgs, 2,000 paths", ] + assert created_paths + assert all(path.is_relative_to(tmp_path) for path in created_paths) assert not any(tmp_path.iterdir()) markdown = benchmark.render_markdown(results) From 279de78ba3ebd80afbe312c63f674b300bdc4b49 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 15 May 2026 03:51:16 +0000 Subject: [PATCH 4/4] polish benchmark docs helpers Co-authored-by: mrjf <180956+mrjf@users.noreply.github.com> --- scripts/benchmark_manifest_ops.py | 7 ++++--- tests/scripts/test_benchmark_manifest_ops.py | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/benchmark_manifest_ops.py b/scripts/benchmark_manifest_ops.py index e4d9f01c..941249ef 100644 --- a/scripts/benchmark_manifest_ops.py +++ b/scripts/benchmark_manifest_ops.py @@ -37,6 +37,7 @@ INTEGRATOR_TYPES = 6 # prompts, agents-gh, agents-cl, commands, skills, hooks DOC_START = "{/* benchmark_manifest_ops:start */}" DOC_END = "{/* benchmark_manifest_ops:end */}" +HELP_DESCRIPTION = "Benchmark manifest-based collision detection and sync operations." SCALES = [ ("Current (10 pkgs x 5 files = 50 paths)", "Current: 10 pkgs, 50 paths", 10, 5), ("Growing (50 pkgs x 5 files = 250 paths)", "Growing: 50 pkgs, 250 paths", 50, 5), @@ -365,18 +366,18 @@ def render_markdown(results: list[dict[str, str]]) -> str: def update_doc(path: Path, markdown: str) -> None: - content = path.read_text() + content = path.read_text(encoding="utf-8") if DOC_START not in content: raise ValueError(f"Could not update {path}: missing {DOC_START} marker") start = content.index(DOC_START) if DOC_END not in content[start:]: raise ValueError(f"Could not update {path}: missing {DOC_END} marker") end = content.index(DOC_END, start) + len(DOC_END) - path.write_text(f"{content[:start]}{markdown}{content[end:]}") + path.write_text(f"{content[:start]}{markdown}{content[end:]}", encoding="utf-8") def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser(description=__doc__) + parser = argparse.ArgumentParser(description=HELP_DESCRIPTION) parser.add_argument( "--work-dir", type=Path, diff --git a/tests/scripts/test_benchmark_manifest_ops.py b/tests/scripts/test_benchmark_manifest_ops.py index d2c7519f..3ee32d43 100644 --- a/tests/scripts/test_benchmark_manifest_ops.py +++ b/tests/scripts/test_benchmark_manifest_ops.py @@ -30,7 +30,8 @@ def recording_mkdtemp(*args, **kwargs): "Large monorepo: 100 pkgs, 2,000 paths", ] assert created_paths - assert all(path.is_relative_to(tmp_path) for path in created_paths) + paths_outside_work_dir = [path for path in created_paths if not path.is_relative_to(tmp_path)] + assert not paths_outside_work_dir, f"Paths outside work dir: {paths_outside_work_dir}" assert not any(tmp_path.iterdir()) markdown = benchmark.render_markdown(results)