diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 71e7e082..0a19f532 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -21,8 +21,8 @@ jobs: - "ReleaseSmall" options: - "" - - "-Djit_always_on" - - "-Djit_hotspot_always_on" + - "-Djit_always_on -Djit_asynchronous=false" + - "-Djit_hotspot_always_on -Djit_asynchronous=false" steps: - name: Checkout project diff --git a/AGENTS.md b/AGENTS.md index 62fbbf9a..8fad3d26 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -32,7 +32,8 @@ FFI, Debugger, and LSP are immature. Treat changes there as higher risk. - Before editing, check the working tree. - If the working tree is clean, make the requested changes normally. - If the working tree only has untracked files, edits are allowed. Do not overwrite or delete unrelated untracked files. -- If the working tree has tracked modifications or staged changes, continue investigation read-only, then show the diff you would have applied and say that no files were modified because the working copy was not clean. +- If the working tree has tracked modifications or staged changes that were not initiated by the current agent/session, continue investigation read-only, then show the diff you would have applied and say that no files were modified because the working copy was not clean. +- If tracked modifications were initiated by the current agent/session for the active task, the agent may continue editing those files and related files needed to finish the same task. - Do not overwrite or revert user changes. Agents may delete files they generated during their own work. Do not delete files that were already tracked by git. @@ -146,9 +147,23 @@ Useful build flags include: - `-Dgc_debug=true`, `-Dgc_debug_light=true`, and `-Dgc_debug_access=true` for GC debugging. - `-Djit_debug=true` for JIT debugging. - `-Djit=false` to disable JIT while isolating runtime issues. +- `-Djit_asynchronous=` controls whether JIT jobs run on the worker thread. Keep it enabled by default unless isolating an async publication issue. +- `-Djit_call_threshold=` is the function call count before a function is considered for JIT compilation. +- `-Djit_score_threshold=` is the function score gate. Function score is call count multiplied by chunk complexity. +- `-Djit_hotspot_threshold=` is the loop/hotspot execution count before a hotspot is considered for JIT compilation. +- `-Djit_hotspot_score_threshold=` is the hotspot score gate. Hotspot score is execution count multiplied by AST hotspot complexity. - `-Dcycle_limit=` to limit bytecode execution, noting that it disables JIT compilation. - `-Dmemory_limit=` to reproduce or bound memory behavior. +Current default JIT thresholds are intentionally conservative: call threshold `1024`, function score threshold `65535`, hotspot threshold `256`, hotspot score threshold `65535`, async enabled. When tuning, compare against the full `tests/bench` matrix instead of optimizing a single benchmark: + +```sh +scripts/jit_bench_matrix.sh quick +scripts/jit_bench_matrix.sh final nojit current sync-current hotspot-only +``` + +The matrix writes timings and output-hash comparisons under `zig-cache/jit-bench/`. + ## Debugging Guidance - For parser/typechecker/codegen issues, prefer the smallest `.buzz` regression test that reproduces the behavior. diff --git a/CHANGELOG.md b/CHANGELOG.md index c7e76b27..5bd93a77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Unreleased -This release builds with zig 0.16.0. We will only use tagged version of zig from now on. +> [!NOTE] +> This release builds with zig 0.16.0. We will only use tagged version of zig from now on. ## Added @@ -22,7 +23,10 @@ This release builds with zig 0.16.0. We will only use tagged version of zig from ## Internal +- JIT compiler works in a separate thread +- Better JIT thresholds based of functions/hotspots complexity scores - The standard libraries are now statically loaded which gives a small speed boost +- `-Dshow_perf` now show detailed rundown of the time spent in each component of buzz # 0.5.0 (01-24-2025) diff --git a/README.md b/README.md index fad7dbc7..231341d6 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,8 @@ A small/lightweight statically typed scripting language written in Zig HomepageDiscord

-_buzz is in alpha and is **not** ready any professional or production use_ +> [!WARNING] +> buzz is in alpha and is **not** ready any professional or production use ## Features diff --git a/build.zig b/build.zig index 567b58d2..3cab294d 100644 --- a/build.zig +++ b/build.zig @@ -904,9 +904,9 @@ const BuildOptions = struct { "jit_hotspot_always_on", "JIT compiler will compile any hotspot encountered", ) orelse false, - .hotspot_on = !is_wasm and b.option( + .hotspot = !is_wasm and b.option( bool, - "jit_hotspot_on", + "jit_hotspot", "JIT compiler will compile hotspot when threshold reached", ) orelse true, .on = !is_wasm and b.option( @@ -918,12 +918,27 @@ const BuildOptions = struct { bool, "jit_asynchronous", "JIT will work in a dedicated thread", - ) orelse false, - .prof_threshold = b.option( - f32, - "jit_prof_threshold", - "Threshold to determine if a function is hot. If the numbers of calls to it makes this percentage of all calls, it's considered hot and will be JIT compiled.", - ) orelse 0.05, + ) orelse true, + .call_threshold = b.option( + u16, + "jit_call_threshold", + "Call count threshold above which the function is being considered for JIT compilation.", + ) orelse 1024, + .score_threshold = b.option( + u16, + "jit_score_threshold", + "Complexity score threshold above which the function will be JIT compiled.", + ) orelse 65535, + .hotspot_threshold = b.option( + u16, + "jit_hotspot_threshold", + "Loop count threshold above which a loop is being considered for JIT compilation.", + ) orelse 256, + .hotspot_score_threshold = b.option( + u16, + "jit_hotspot_score_threshold", + "Complexity score threshold above which a loop node will be JIT compiled.", + ) orelse 65535, }, }; } @@ -971,9 +986,12 @@ const BuildOptions = struct { on: bool, always_on: bool, hotspot_always_on: bool, - hotspot_on: bool, + hotspot: bool, debug: bool, - prof_threshold: f32 = 0.05, + call_threshold: u16 = 1024, + score_threshold: u16 = 65535, + hotspot_threshold: u16 = 256, + hotspot_score_threshold: u16 = 65535, asynchronous: bool, pub fn step(self: JITOptions, options: *Build.Step.Options) void { @@ -981,9 +999,12 @@ const BuildOptions = struct { options.addOption(@TypeOf(self.always_on), "jit_always_on", self.always_on); options.addOption(@TypeOf(self.hotspot_always_on), "jit_hotspot_always_on", self.hotspot_always_on); options.addOption(@TypeOf(self.on), "jit", self.on); - options.addOption(@TypeOf(self.prof_threshold), "jit_prof_threshold", self.prof_threshold); - options.addOption(@TypeOf(self.hotspot_on), "jit_hotspot_on", self.hotspot_on); + options.addOption(@TypeOf(self.call_threshold), "jit_call_threshold", self.call_threshold); + options.addOption(@TypeOf(self.score_threshold), "jit_score_threshold", self.score_threshold); + options.addOption(@TypeOf(self.hotspot), "jit_hotspot", self.hotspot); options.addOption(@TypeOf(self.asynchronous), "jit_asynchronous", self.asynchronous); + options.addOption(@TypeOf(self.hotspot_threshold), "jit_hotspot_threshold", self.hotspot_threshold); + options.addOption(@TypeOf(self.hotspot_score_threshold), "jit_hotspot_score_threshold", self.hotspot_score_threshold); } }; diff --git a/scripts/perf_compare_commits.py b/scripts/perf_compare_commits.py deleted file mode 100755 index 4d3f1968..00000000 --- a/scripts/perf_compare_commits.py +++ /dev/null @@ -1,702 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import csv -import datetime as dt -import json -import math -import os -import pathlib -import shutil -import subprocess -import sys -import tempfile - - -FIRST_RUN_SLOW_WARNING = "first benchmarking run for this command was significantly slower" - - -BENCHMARKS = [ - { - "name": "006_vm_arithmetic_dispatch", - "path": "tests/perf/006_vm_arithmetic_dispatch.buzz", - }, - { - "name": "009_vm_object_properties", - "path": "tests/perf/009_vm_object_properties.buzz", - }, - { - "name": "013_vm_concat_clone", - "path": "tests/perf/013_vm_concat_clone.buzz", - }, - { - "name": "014_jit_cheap_hotspot", - "path": "tests/perf/014_jit_cheap_hotspot.buzz", - }, - { - "name": "015_jit_object_heavy_hotspot", - "path": "tests/perf/015_jit_object_heavy_hotspot.buzz", - }, - { - "name": "bench_001_btree_depth14", - "path": "tests/bench/btree.buzz", - "args": ["14"], - }, - { - "name": "bench_002_merkle_depth12", - "path": "tests/bench/merkle.buzz", - "args": ["12"], - }, - { - "name": "bench_005_k_nucleoide", - "path": "tests/bench/k-nucleoide.buzz", - "stdin": "tests/bench/reference/knucleotide-input.txt", - }, - { - "name": "bench_007_fib", - "path": "tests/bench/fib.buzz", - }, - { - "name": "bench_008_for", - "path": "tests/bench/for.buzz", - }, - { - "name": "bench_009_grid_1000x800", - "path": "tests/bench/grid.buzz", - "args": ["1000", "800"], - }, - { - "name": "bench_010_ackermann_3_9", - "path": "tests/bench/ackermann.buzz", - "args": ["3", "9"], - }, - { - "name": "bench_011_bubble_sort_3000", - "path": "tests/bench/bubble-sort.buzz", - "args": ["3000"], - }, -] - - -def run(args, cwd=None, capture=False, check=True): - result = subprocess.run( - args, - cwd=cwd, - check=check, - text=True, - stdout=subprocess.PIPE if capture else None, - stderr=subprocess.PIPE if capture else None, - ) - return result.stdout.strip() if capture else "" - - -def require_cmd(name): - if shutil.which(name) is None: - raise SystemExit(f"error: missing required command: {name}") - - -def split_env(name, default): - value = os.environ.get(name, default) - return value.split() if value else [] - - -def commit_subject(repo, commit): - return run(["git", "log", "-1", "--format=%s", commit], cwd=repo, capture=True) - - -def short_sha(repo, commit): - return run(["git", "rev-parse", "--short=12", commit], cwd=repo, capture=True) - - -def commit_range(repo, start_sha, head_sha): - has_parent = subprocess.run( - ["git", "rev-parse", "--verify", f"{start_sha}^"], - cwd=repo, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ).returncode == 0 - - if has_parent: - out = run( - ["git", "rev-list", "--reverse", "--ancestry-path", f"{start_sha}^..{head_sha}"], - cwd=repo, - capture=True, - ) - return [line for line in out.splitlines() if line] - - out = run( - ["git", "rev-list", "--reverse", "--ancestry-path", f"{start_sha}..{head_sha}"], - cwd=repo, - capture=True, - ) - return [start_sha] + [line for line in out.splitlines() if line] - - -def benchmark_command_for(benchmark): - args = " ".join(benchmark.get("args", [])) - if args: - args = f" {args}" - - stdin = benchmark.get("stdin") - stdin_redirect = f" < {stdin}" if stdin else "" - - return f"./zig-out/bin/buzz {benchmark['path']}{args}{stdin_redirect} >/dev/null" - - -def benchmark_source_paths(): - paths = [] - seen = set() - for benchmark in BENCHMARKS: - for key in ("path", "stdin"): - path = benchmark.get(key) - if path and path not in seen: - seen.add(path) - paths.append(path) - return paths - - -def copy_benchmark_sources(repo, destination): - for relative in benchmark_source_paths(): - source = repo / relative - target = destination / relative - if not source.is_file(): - raise SystemExit(f"error: missing benchmark source: {relative}") - - target.parent.mkdir(parents=True, exist_ok=True) - shutil.copy2(source, target) - - -def hyperfine_warnings(output): - warnings = [] - - for line in output.splitlines(): - stripped = line.strip() - if stripped.startswith("Warning:"): - warnings.append(stripped.removeprefix("Warning:").strip()) - - return warnings - - -def has_first_run_slow_warning(output): - return FIRST_RUN_SLOW_WARNING in output.lower() - - -def run_hyperfine(hyperfine_cmd, cwd, name): - attempts = [] - - for attempt in range(1, 3): - result = subprocess.run( - hyperfine_cmd, - cwd=cwd, - text=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - output = result.stdout + result.stderr - print(output, end="") - - warnings = hyperfine_warnings(output) - attempts.append( - { - "returncode": result.returncode, - "warnings": warnings, - } - ) - - if result.returncode != 0: - return result.returncode, attempts - - if attempt == 1 and has_first_run_slow_warning(output): - print( - f"warning: benchmark `{name}` had a slow first run; rerunning once", - file=sys.stderr, - ) - continue - - return 0, attempts - - return 0, attempts - - -def merge_hyperfine_result(source_json, combined_results, attempts): - with source_json.open() as f: - payload = json.load(f) - - reran = len(attempts) > 1 - warnings = attempts[-1]["warnings"] if attempts else [] - - for result in payload.get("results", []): - result["hyperfine_warnings"] = warnings - result["reran_after_first_run_warning"] = reran - combined_results.append(result) - - -def update_submodules(worktree): - run(["git", "-C", str(worktree), "submodule", "sync", "--recursive"]) - run(["git", "-C", str(worktree), "submodule", "update", "--init", "--recursive"]) - - -def fmt_seconds(value): - if value is None or math.isnan(value): - return "" - if value >= 1: - return f"{value:.3f}s" - if value >= 0.001: - return f"{value * 1000:.2f}ms" - return f"{value * 1_000_000:.2f}us" - - -def gain(base, current): - if base is None or current is None or base == 0: - return None - return (base - current) / base * 100.0 - - -def coefficient_of_variation(result): - if result is None: - return None - - mean = result.get("mean") - stddev = result.get("stddev") - if mean is None or stddev is None or mean == 0: - return None - - return stddev / mean - - -def fmt_percent(value): - return "" if value is None else f"{value:+.2f}%" - - -def fmt_cv(result): - cv = coefficient_of_variation(result) - return "" if cv is None else f"{cv * 100:.1f}%" - - -def unstable_result(result): - if result is None: - return False - - cv = coefficient_of_variation(result) - has_warning = len(result.get("hyperfine_warnings", [])) > 0 - - return has_warning or (cv is not None and cv > 0.20) - - -def fmt_warnings(result): - if result is None: - return "" - - warnings = result.get("hyperfine_warnings", []) - if not warnings: - return "" - - return "; ".join(warnings) - - -def load_results(commits): - data = {} - failures = {} - tests = [benchmark["name"] for benchmark in BENCHMARKS] - for commit in commits: - with commit["json"].open() as f: - payload = json.load(f) - - per_test = {} - for result in payload.get("results", []): - name = result["command"] - per_test[name] = result - - data[commit["short"]] = per_test - failures[commit["short"]] = { - failure["name"]: failure - for failure in payload.get("failures", []) - } - - return data, failures, tests - - -def write_reports(results_dir, commits): - data, failures, tests = load_results(commits) - - csv_path = results_dir / "all-results.csv" - with csv_path.open("w", newline="") as f: - writer = csv.writer(f) - writer.writerow( - [ - "commit", - "short", - "subject", - "test", - "mean_seconds", - "stddev_seconds", - "median_seconds", - "gain_vs_start_percent", - "gain_vs_previous_percent", - "median_gain_vs_start_percent", - "median_gain_vs_previous_percent", - "user_seconds", - "user_gain_vs_start_percent", - "user_gain_vs_previous_percent", - "system_seconds", - "system_gain_vs_start_percent", - "system_gain_vs_previous_percent", - "coefficient_of_variation", - "unstable", - "hyperfine_warnings", - "reran_after_first_run_warning", - ] - ) - - previous_short = None - base_short = commits[0]["short"] - for commit in commits: - short = commit["short"] - for test in tests: - result = data.get(short, {}).get(test) - base = data.get(base_short, {}).get(test) - previous = data.get(previous_short, {}).get(test) if previous_short else None - - mean = result.get("mean") if result else None - base_mean = base.get("mean") if base else None - previous_mean = previous.get("mean") if previous else None - median = result.get("median") if result else None - base_median = base.get("median") if base else None - previous_median = previous.get("median") if previous else None - user = result.get("user") if result else None - base_user = base.get("user") if base else None - previous_user = previous.get("user") if previous else None - system = result.get("system") if result else None - base_system = base.get("system") if base else None - previous_system = previous.get("system") if previous else None - - writer.writerow( - [ - commit["sha"], - short, - commit["subject"], - test, - mean, - result.get("stddev") if result else None, - result.get("median") if result else None, - gain(base_mean, mean), - gain(previous_mean, mean), - gain(base_median, median), - gain(previous_median, median), - user, - gain(base_user, user), - gain(previous_user, user), - system, - gain(base_system, system), - gain(previous_system, system), - coefficient_of_variation(result), - unstable_result(result), - fmt_warnings(result), - result.get("reran_after_first_run_warning") if result else None, - ] - ) - previous_short = short - - summary_path = results_dir / "summary.md" - base = commits[0] - head = commits[-1] - with summary_path.open("w") as f: - f.write("# Buzz perf comparison\n\n") - f.write(f"Start: `{base['short']}` {base['subject']}\n\n") - f.write(f"Head: `{head['short']}` {head['subject']}\n\n") - f.write("Positive gain means the later commit is faster. Summary gains use medians; CV above 20% or any hyperfine warning is flagged as unstable. If hyperfine reports a significantly slower first run, that benchmark is rerun once and the rerun is recorded.\n\n") - - f.write("## HEAD vs start\n\n") - f.write("| Test | Start median | HEAD median | Median gain | Start user | HEAD user | User gain | HEAD CV | HEAD warnings |\n") - f.write("| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- |\n") - for test in tests: - base_result = data.get(base["short"], {}).get(test) - head_result = data.get(head["short"], {}).get(test) - base_median = base_result.get("median") if base_result else None - head_median = head_result.get("median") if head_result else None - g = gain(base_median, head_median) - base_user = base_result.get("user") if base_result else None - head_user = head_result.get("user") if head_result else None - user_gain = gain(base_user, head_user) - cv_text = fmt_cv(head_result) - if unstable_result(head_result): - cv_text += " unstable" - warning_text = fmt_warnings(head_result) - f.write( - f"| `{test}` | {fmt_seconds(base_median)} | {fmt_seconds(head_median)} | {fmt_percent(g)} | {fmt_seconds(base_user)} | {fmt_seconds(head_user)} | {fmt_percent(user_gain)} | {cv_text} | {warning_text} |\n" - ) - - f.write("\n## Per-commit rundown\n\n") - for index, commit in enumerate(commits): - short = commit["short"] - previous_short = commits[index - 1]["short"] if index > 0 else None - - f.write(f"### `{short}` {commit['subject']}\n\n") - - for test in tests: - result = data.get(short, {}).get(test) - if result is None: - failure = failures.get(short, {}).get(test) - if failure is not None: - f.write(f"- `{test}`: failed, exit {failure['returncode']}\n") - else: - f.write(f"- `{test}`: no result\n") - continue - - mean = result.get("mean") - median = result.get("median") - stddev = result.get("stddev") - user = result.get("user") - system = result.get("system") - base_result = data.get(base["short"], {}).get(test) - previous_result = data.get(previous_short, {}).get(test) if previous_short else None - - base_gain = gain(base_result.get("median") if base_result else None, median) - previous_gain = gain(previous_result.get("median") if previous_result else None, median) - base_user_gain = gain(base_result.get("user") if base_result else None, user) - previous_user_gain = gain(previous_result.get("user") if previous_result else None, user) - - parts = [ - f"mean {fmt_seconds(mean)}", - f"median {fmt_seconds(median)}", - f"stddev {fmt_seconds(stddev)}", - f"user {fmt_seconds(user)}", - f"system {fmt_seconds(system)}", - ] - - cv_text = fmt_cv(result) - if cv_text: - if unstable_result(result): - cv_text += " unstable" - parts.append(f"CV {cv_text}") - - if result.get("reran_after_first_run_warning"): - parts.append("reran after slow first run") - - warning_text = fmt_warnings(result) - if warning_text: - parts.append(f"hyperfine warning: {warning_text}") - - if previous_gain is not None: - parts.append(f"median vs previous {previous_gain:+.2f}%") - - if base_gain is not None: - parts.append(f"median vs start {base_gain:+.2f}%") - - if previous_user_gain is not None: - parts.append(f"user vs previous {previous_user_gain:+.2f}%") - - if base_user_gain is not None: - parts.append(f"user vs start {base_user_gain:+.2f}%") - - f.write(f"- `{test}`: " + ", ".join(parts) + "\n") - - f.write("\n") - - return summary_path, csv_path - - -def parse_args(): - parser = argparse.ArgumentParser( - description=( - "Check out each commit from START_COMMIT to HEAD, build Buzz, " - "run the selected Buzz perf benchmarks with hyperfine, and compare results." - ) - ) - parser.add_argument( - "start_commit", - nargs="?", - default="HEAD^", - help="first commit to benchmark, default: HEAD^", - ) - parser.add_argument( - "--results-dir", - default=os.environ.get("RESULTS_DIR"), - help="output directory, default: perf-results-", - ) - parser.add_argument( - "--build-args", - default=os.environ.get("BUZZ_BUILD_ARGS", "-Doptimize=ReleaseFast"), - help='arguments after "zig build", default: %(default)s', - ) - parser.add_argument( - "--warmup", - type=int, - default=int(os.environ.get("HYPERFINE_WARMUP", "5")), - help="hyperfine warmup count", - ) - parser.add_argument( - "--runs", - type=int, - default=int(os.environ.get("HYPERFINE_RUNS", "10")), - help="hyperfine run count", - ) - parser.add_argument( - "--hyperfine-extra-args", - default=os.environ.get("HYPERFINE_EXTRA_ARGS", ""), - help="extra arguments passed to hyperfine", - ) - return parser.parse_args() - - -def main(): - args = parse_args() - - for cmd in ("git", "zig", "hyperfine"): - require_cmd(cmd) - - repo = pathlib.Path(run(["git", "rev-parse", "--show-toplevel"], capture=True)) - os.chdir(repo) - - for relative in benchmark_source_paths(): - if not (repo / relative).is_file(): - raise SystemExit(f"error: benchmark source does not exist: {relative}") - - start_sha = run(["git", "rev-parse", "--verify", f"{args.start_commit}^{{commit}}"], cwd=repo, capture=True) - head_sha = run(["git", "rev-parse", "--verify", "HEAD"], cwd=repo, capture=True) - - ancestor = subprocess.run( - ["git", "merge-base", "--is-ancestor", start_sha, head_sha], - cwd=repo, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - if ancestor.returncode != 0: - raise SystemExit(f"error: {start_sha} is not an ancestor of HEAD") - - commits_to_run = commit_range(repo, start_sha, head_sha) - if not commits_to_run: - raise SystemExit("error: empty commit range") - - timestamp = dt.datetime.now().strftime("%Y%m%d-%H%M%S") - results_dir = pathlib.Path(args.results_dir) if args.results_dir else repo / f"perf-results-{timestamp}" - json_dir = results_dir / "json" - json_dir.mkdir(parents=True, exist_ok=True) - - build_args = args.build_args.split() if args.build_args else [] - hyperfine_extra_args = args.hyperfine_extra_args.split() if args.hyperfine_extra_args else [] - - with tempfile.TemporaryDirectory(prefix="buzz-perf-commits.") as tmp: - tmp_path = pathlib.Path(tmp) - worktree = tmp_path / "worktree" - benchmark_snapshot = tmp_path / "benchmarks" - - copy_benchmark_sources(repo, benchmark_snapshot) - tests = BENCHMARKS - - run(["git", "worktree", "add", "--detach", "--quiet", str(worktree), head_sha], cwd=repo) - - commands_path = results_dir / "commands.tsv" - commits_path = results_dir / "commits.tsv" - completed_commits = [] - - try: - with commands_path.open("w", newline="") as commands_file, commits_path.open("w", newline="") as commits_file: - commands_writer = csv.writer(commands_file, delimiter="\t") - commits_writer = csv.writer(commits_file, delimiter="\t") - - print(f"Benchmarking {len(commits_to_run)} commits with {len(tests)} tests each") - print(f"Results: {results_dir}") - print(f"Build args: zig build {' '.join(build_args)}") - print("Benchmarks:") - for benchmark in tests: - print(f" - {benchmark['name']}: {benchmark_command_for(benchmark)}") - - for idx, commit in enumerate(commits_to_run, start=1): - short = short_sha(repo, commit) - subject = commit_subject(repo, commit) - json_file = json_dir / f"{idx:04d}-{short}.json" - - print(f"\n[{idx}/{len(commits_to_run)}] {short} {subject}") - - run(["git", "-C", str(worktree), "checkout", "--force", "--quiet", commit], cwd=repo) - update_submodules(worktree) - copy_benchmark_sources(benchmark_snapshot, worktree) - - run(["zig", "build", *build_args], cwd=worktree) - - commits_writer.writerow([idx, commit, short, subject, json_file]) - commits_file.flush() - - combined_results = [] - failures = [] - - for benchmark in tests: - name = benchmark["name"] - command = benchmark_command_for(benchmark) - benchmark_json = json_dir / f"{idx:04d}-{short}-{name}.json" - commands_writer.writerow([short, name, command]) - commands_file.flush() - - hyperfine_cmd = [ - "hyperfine", - "--warmup", - str(args.warmup), - "--runs", - str(args.runs), - "--export-json", - str(benchmark_json), - *hyperfine_extra_args, - "--command-name", - name, - command, - ] - - returncode, attempts = run_hyperfine(hyperfine_cmd, worktree, name) - if returncode == 0: - merge_hyperfine_result(benchmark_json, combined_results, attempts) - else: - failures.append( - { - "name": name, - "command": command, - "returncode": returncode, - "attempts": attempts, - } - ) - print( - f"warning: benchmark `{name}` failed with exit code {returncode}; continuing", - file=sys.stderr, - ) - - with json_file.open("w") as f: - json.dump( - { - "results": combined_results, - "failures": failures, - }, - f, - indent=2, - ) - - completed_commits.append( - { - "idx": idx, - "sha": commit, - "short": short, - "subject": subject, - "json": json_file, - } - ) - finally: - run(["git", "worktree", "remove", "--force", str(worktree)], cwd=repo, check=False) - - if not completed_commits: - raise SystemExit("error: no benchmark results were recorded") - - summary, csv_path = write_reports(results_dir, completed_commits) - print("\nDone.") - print(f"Summary: {summary}") - print(f"CSV: {csv_path}") - - -if __name__ == "__main__": - try: - main() - except subprocess.CalledProcessError as err: - cmd = " ".join(str(part) for part in err.cmd) - print(f"error: command failed: {cmd}", file=sys.stderr) - if err.stdout: - print(err.stdout, file=sys.stderr) - if err.stderr: - print(err.stderr, file=sys.stderr) - raise SystemExit(err.returncode) diff --git a/src/Ast.zig b/src/Ast.zig index 1a6e60a5..a30aaa76 100644 --- a/src/Ast.zig +++ b/src/Ast.zig @@ -347,7 +347,12 @@ pub const Slice = struct { const IsConstantContext = struct { result: ?bool = null, - pub fn processNode(self: *IsConstantContext, _: std.mem.Allocator, ast: Self.Slice, node: Self.Node.Index) (std.mem.Allocator.Error || std.fmt.BufPrintError)!bool { + pub fn processNode( + self: *IsConstantContext, + _: std.mem.Allocator, + ast: Self.Slice, + node: Self.Node.Index, + ) (std.mem.Allocator.Error || std.fmt.BufPrintError)!bool { switch (ast.nodes.items(.tag)[node]) { .AnonymousObjectType, .FiberType, @@ -496,6 +501,58 @@ pub const Slice = struct { return ctx.result orelse false; } + /// Mirrors Chunk.score (even though Chunk.score and Node.score won't be comparable) + /// Is used to compute complexity of a hotspot node (which doesn't have a Chunk available to evaluate) + const ComplexityContext = struct { + score: usize = 0, + + pub fn processNode( + ctx: *ComplexityContext, + _: std.mem.Allocator, + ast: Self.Slice, + node: Self.Node.Index, + ) (std.mem.Allocator.Error || std.fmt.BufPrintError)!bool { + if (ast.nodes.items(.complexity_score)[node]) |sc| { + ctx.score += sc; + return true; // Don't go deeper we already computed this node score + } + + ctx.score += switch (ast.nodes.items(.tag)[node]) { + .AsyncCall, + .Resolve, + .Resume, + => { // Blacklist because of fiber use + ctx.score = 0; + return true; + }, + .Call, + .DoUntil, + .For, + .ForEach, + .Throw, + .Try, + .While, + => @as(usize, @intCast(1)), + else => @as(usize, @intCast(0)), + } + 1; // At least 1 per node + + return false; + } + }; + + pub fn score(self: Self.Slice, allocator: std.mem.Allocator, node: Node.Index) !usize { + const complexity_score = &self.nodes.items(.complexity_score)[node]; + if (complexity_score.* == null) { + var ctx = ComplexityContext{}; + + try self.walk(allocator, &ctx, node); + + complexity_score.* = ctx.score; + } + + return complexity_score.* orelse 0; + } + fn binaryValue(self: Self.Slice, node: Node.Index, gc: *GC) !?Value { const components = self.nodes.items(.components)[node].Binary; @@ -990,26 +1047,25 @@ pub const Node = struct { end_location: TokenIndex, /// Docblock if any docblock: ?TokenIndex = null, - /// If null, either its a statement or its a reference to something unknown that should ultimately raise a compile error type_def: ?*obj.ObjTypeDef = null, /// Wether optional jumps must be patch before generate this node bytecode patch_opt_jumps: bool = false, /// Does this node closes a scope ends_scope: ?[]const Close = null, - /// Data related to this node components: Components, - /// To avoid generating a node const value multiple times value: ?Value = null, + // JIT related metdata + /// How many time it was visited at runtime (used to decide wether its a hotspot that needs to be compiled) count: usize = 0, - - /// Wether its blacklisted + /// Complexity score computed once to help evaluate if the node is worth JIT compiling + complexity_score: ?usize = null, + /// Node status: blacklisted, queued/generated/compiled by the JIT, compilable jit_status: JitStatus = .compilable, - /// Once compiled compiled: ?*anyopaque = null, @@ -1026,6 +1082,8 @@ pub const Node = struct { compilable, /// Node is already queued in the jit compiler queued, + /// Node has generated native code waiting to be published by the VM thread + generated, /// Node can't be compiled (contains use of fiber) blacklisted, /// Already compiled diff --git a/src/Chunk.zig b/src/Chunk.zig index 95e555ec..a322db6c 100644 --- a/src/Chunk.zig +++ b/src/Chunk.zig @@ -13,6 +13,10 @@ code: std.ArrayList(u32) = .empty, locations: std.ArrayList(Ast.TokenIndex) = .empty, /// List of constants defined in this chunk constants: std.ArrayList(Value) = .empty, +/// Ranges of bytecode skipped by compiled hotspots +compiled_hotspot_ranges: std.ArrayList(InstructionRange) = .empty, +/// Complexity score computed once to help evaluate if the chunk is worth JIT compiling +complexity_score: ?u32 = null, pub fn init(allocator: std.mem.Allocator, ast: Ast.Slice) Self { return Self{ @@ -25,6 +29,7 @@ pub fn deinit(self: *Self) void { self.code.deinit(self.allocator); self.constants.deinit(self.allocator); self.locations.deinit(self.allocator); + self.compiled_hotspot_ranges.deinit(self.allocator); } pub fn write(self: *Self, code: u32, where: Ast.TokenIndex) !void { @@ -40,6 +45,94 @@ pub fn addConstant(self: *Self, vm: ?*VM, value: Value) !u24 { return @intCast(self.constants.items.len - 1); } +/// Compute a basic complexity score based on size and presence "costly" opcodes +pub fn score(self: *Self) u32 { + if (self.complexity_score) |sc| return sc; + + var complexity_score: u32 = 0; + + for (self.code.items, 0..) |op, index| { + if (self.isInCompiledHotspotRange(index)) { + continue; + } + + complexity_score += 1; + + switch (VM.getCode(op)) { + .OP_HOTSPOT, // Those cover any loop + .OP_CALL, + .OP_TAIL_CALL, + .OP_CALL_INSTANCE_PROPERTY, + .OP_TAIL_CALL_INSTANCE_PROPERTY, + .OP_INSTANCE_INVOKE, + .OP_INSTANCE_TAIL_INVOKE, + .OP_PROTOCOL_INVOKE, + .OP_PROTOCOL_TAIL_INVOKE, + .OP_TRY, + .OP_TRY_END, + .OP_THROW, + => complexity_score += 1, + .OP_FIBER_FOREACH, + .OP_RESUME, + .OP_RESOLVE, + => return 0, // A chunk with fiber op codes will not be compiled so the score is 0 + else => {}, + } + } + + self.complexity_score = complexity_score; + + return complexity_score; +} + +pub fn addCompiledHotspotRange(self: *Self, start: usize, end: usize) !void { + if (start >= end) { + return; + } + + var merged = InstructionRange{ + .start = start, + .end = end, + }; + + var index: usize = 0; + while (index < self.compiled_hotspot_ranges.items.len) { + const range = self.compiled_hotspot_ranges.items[index]; + + if (merged.end < range.start) { + try self.compiled_hotspot_ranges.insert(self.allocator, index, merged); + self.complexity_score = null; + return; + } + + if (merged.start > range.end) { + index += 1; + continue; + } + + merged.start = @min(merged.start, range.start); + merged.end = @max(merged.end, range.end); + _ = self.compiled_hotspot_ranges.orderedRemove(index); + } + + try self.compiled_hotspot_ranges.append(self.allocator, merged); + self.complexity_score = null; +} + +fn isInCompiledHotspotRange(self: *const Self, index: usize) bool { + for (self.compiled_hotspot_ranges.items) |range| { + if (index < range.start) { + return false; + } + + if (index >= range.start and index < range.end) { + return true; + } + } + + return false; +} + pub const OpCode = enum(u8) { OP_CONSTANT, OP_NULL, @@ -185,6 +278,11 @@ const Self = @This(); pub const max_constants = std.math.maxInt(u24); +const InstructionRange = struct { + start: usize, + end: usize, +}; + const RegistryContext = struct { pub fn hash(_: RegistryContext, key: Self) u64 { return std.hash.Wyhash.hash( diff --git a/src/Codegen.zig b/src/Codegen.zig index 9cf08372..a2b912b9 100644 --- a/src/Codegen.zig +++ b/src/Codegen.zig @@ -17,6 +17,7 @@ const JIT = if (!is_wasm) @import("Jit.zig") else void; const disassembler = @import("disassembler.zig"); const TypeChecker = @import("TypeChecker.zig"); const Init = @import("vm.zig").Init; +const Perf = @import("Perf.zig"); const Self = @This(); @@ -72,6 +73,7 @@ opt_jumps: std.ArrayList(std.ArrayList(usize)) = .empty, /// Used to generate error messages parser: *Parser, jit: ?*JIT, +perf: ?*Perf = null, /// Wether we are debugging the program debugging: bool, @@ -175,6 +177,9 @@ pub inline fn currentCode(self: *Self) usize { } pub fn generate(self: *Self, ast: Ast.Slice) Error!?*obj.ObjFunction { + var perf_scope = Perf.start(self.perf, .codegen); + defer perf_scope.end(); + self.ast = ast; self.reporter.last_error = null; self.reporter.panic_mode = false; diff --git a/src/Debugger.zig b/src/Debugger.zig index c55c69ec..5bc3a03f 100644 --- a/src/Debugger.zig +++ b/src/Debugger.zig @@ -450,6 +450,7 @@ pub fn launch(self: *Debugger, arguments: Arguments(.launch)) Error!Response(.la self.allocator, .Run, self, + null, ) catch return error.LaunchFailed; try self.session.?.variables.append( diff --git a/src/GC.zig b/src/GC.zig index 988a0f2c..56d0a599 100644 --- a/src/GC.zig +++ b/src/GC.zig @@ -10,6 +10,7 @@ const buzz_api = @import("buzz_api.zig"); const Reporter = @import("Reporter.zig"); const is_wasm = builtin.cpu.arch.isWasm(); const TypeRegistry = @import("TypeRegistry.zig"); +const Perf = @import("Perf.zig"); const log = std.log.scoped(.gc); @@ -35,6 +36,7 @@ const Mode = enum { }; allocator: std.mem.Allocator, +perf: ?*Perf = null, strings: std.StringHashMapUnmanaged(*o.ObjString) = .empty, type_registry: TypeRegistry, bytes_allocated: usize = 0, @@ -168,7 +170,7 @@ pub fn allocate(self: *GC, comptime T: type) !*T { } pub fn allocateMany(self: *GC, comptime T: type, count: usize) ![]T { - self.bytes_allocated += (@sizeOf(T) * count); + self.bytes_allocated += @sizeOf(T) * count; if (self.bytes_allocated > self.max_allocated) { self.max_allocated = self.bytes_allocated; @@ -278,11 +280,11 @@ fn free(self: *GC, comptime T: type, pointer: *T) void { } fn freeMany(self: *GC, comptime T: type, pointer: []const T) void { + const n: usize = (@sizeOf(T) * pointer.len); if (BuildOptions.gc_debug) { log.info("Going to free slice {*} `{s}`", .{ pointer, pointer }); } - const n: usize = (@sizeOf(T) * pointer.len); self.bytes_allocated -= n; self.allocator.free(pointer); @@ -778,6 +780,9 @@ pub fn collectGarbage(self: *GC) !void { return; } + var perf_scope = Perf.start(self.perf, .gc); + defer perf_scope.end(); + const mode: Mode = if (self.bytes_allocated > self.next_full_gc and self.last_gc != null) .Full else .Young; if (BuildOptions.gc_debug or BuildOptions.gc_debug_light) { diff --git a/src/Jit.zig b/src/Jit.zig index 29714d58..333a820f 100644 --- a/src/Jit.zig +++ b/src/Jit.zig @@ -18,6 +18,7 @@ const Double = _v.Double; const Token = @import("Token.zig"); const ZigType = @import("zigtypes.zig").Type; const api = @import("buzz_api.zig"); +const Perf = @import("Perf.zig"); const log = std.log.scoped(.jit); @@ -111,6 +112,7 @@ const State = struct { }; process: Init, +perf: ?*Perf = null, /// We only read the interned strings map, the worker thread can allocate buzz objects since it's not thread safe. /// But it does not make sense for the Jit to have to allocate any buzz constant since it must have been done by CodeGen first. gc: *GC, @@ -122,6 +124,8 @@ ctx: m.MIR_context_t, call_count: usize = 0, /// Queue of things to compile jobs: SpscQueue(Job), +/// Completed jobs ready to be published by the VM thread +completed_jobs: SpscQueue(CompletedJob), /// Worker thread worker: ?std.Thread = null, /// To stop the worker, @@ -141,6 +145,8 @@ objclosures_queue: std.AutoHashMapUnmanaged(Ast.Node.Index, *o.ObjClosure) = .em required_ext_api: std.AutoHashMapUnmanaged(ExternApi, void) = .empty, /// Modules to load when linking/generating modules: std.ArrayList(m.MIR_module_t) = .empty, +/// Amount of time passed in JIT +duration: std.Io.Duration = .fromMilliseconds(0), pub fn init(process: Init, gc: *GC) Error!Self { return .{ @@ -148,6 +154,7 @@ pub fn init(process: Init, gc: *GC) Error!Self { .ctx = m.MIR_init(), .process = process, .jobs = try .initCapacity(gc.allocator, 256), + .completed_jobs = try .initCapacity(gc.allocator, 256), }; } @@ -164,10 +171,17 @@ fn reset(self: *Self) void { } pub fn deinit(self: *Self) void { - if (self.worker != null) { - self.worker.?.detach(); + if (self.worker) |worker| { + while (!self.worker_stopped.load(.acquire)) { + self.publishCompleted(); + std.atomic.spinLoopHint(); + } + worker.join(); } + self.publishCompleted(); + self.jobs.deinit(self.gc.allocator); + self.completed_jobs.deinit(self.gc.allocator); self.functions_queue.deinit(self.gc.allocator); self.objclosures_queue.deinit(self.gc.allocator); self.required_ext_api.deinit(self.gc.allocator); @@ -175,13 +189,108 @@ pub fn deinit(self: *Self) void { m.MIR_finish(self.ctx); } +pub fn compileFunctionIfNeeded(self: *Self, closure: *o.ObjClosure) StartError!bool { + self.publishCompleted(); + + self.call_count += 1; + + switch (closure.function.type_def.resolved_type.?.Function.function_type) { + .Extern, + .Script, + .ScriptEntryPoint, + .EntryPoint, + .Repl, + => return false, + else => {}, + } + + const function_ast = closure.function.chunk.ast; + + if (function_ast.nodes.items(.jit_status)[closure.function.node] != .compilable or + function_ast.nodes.items(.compiled)[closure.function.node] != null) + { + return false; + } + + if (BuildOptions.jit_always_on or closure.function.call_count > BuildOptions.jit_call_threshold) { + const score = closure.function.call_count * closure.function.chunk.score(); + if (score == 0) { + function_ast.nodes.items(.jit_status)[closure.function.node] = .blacklisted; + + if (BuildOptions.jit_debug) { + log.info( + "Blacklisted function `{s}` for JIT compilation", + .{ + closure.function.type_def.resolved_type.?.Function.name.string, + }, + ); + } + + return false; + } + + if (BuildOptions.jit_always_on or score > BuildOptions.jit_score_threshold) { + self.compile(function_ast, closure, null) catch |err| { + if (err == Error.CantCompile) { + return false; + } else { + return err; + } + }; + + return true; + } + } + + return false; +} + +pub fn compileHotspotIfNeeded(self: *Self, ast: Ast.Slice, frame_closure: *o.ObjClosure, node: Ast.Node.Index) StartError!void { + self.publishCompleted(); + + if (ast.nodes.items(.jit_status)[node] != .compilable or + ast.nodes.items(.compiled)[node] != null) + { + return; + } + + if (BuildOptions.jit_hotspot_always_on or ast.nodes.items(.count)[node] > BuildOptions.jit_hotspot_threshold) { + const score = ast.nodes.items(.count)[node] * try ast.score(self.gc.allocator, node); + if (score == 0) { + ast.nodes.items(.jit_status)[node] = .blacklisted; + + if (BuildOptions.jit_debug) { + log.info( + "Blacklisted hotspot {} ({s}) for JIT compilation", + .{ + node, + @tagName(ast.nodes.items(.tag)[node]), + }, + ); + } + + return; + } + + if (BuildOptions.jit_hotspot_always_on or score > BuildOptions.jit_hotspot_score_threshold) { + self.compile(ast, frame_closure, node) catch |err| { + if (err == Error.CantCompile) { + return; + } else { + return err; + } + }; + } + } +} + pub fn compile(self: *Self, ast: Ast.Slice, closure: *o.ObjClosure, hotspot_node: ?Ast.Node.Index) StartError!void { const ast_node = hotspot_node orelse closure.function.node; // Is the node already compiled or blacklisted switch (ast.nodes.items(.jit_status)[ast_node]) { .blacklisted => return error.CantCompile, - .queued, .compiled => return, + .queued, .generated, .compiled => return, .compilable => {}, } @@ -230,11 +339,59 @@ pub fn compile(self: *Self, ast: Ast.Slice, closure: *o.ObjClosure, hotspot_node // If the worker is not working, start it again try self.start(); } else { - try self.doJob(&job); + var completed_job = try self.doJob(&job); + defer completed_job.deinit(self.gc.allocator); + + self.publishCompletedJob(&completed_job); self.reset(); } } +pub fn compileFunctionSynchronously(self: *Self, closure: *o.ObjClosure) Error!void { + self.publishCompleted(); + + if (closure.function.native_raw != null) { + return; + } + + if (BuildOptions.jit_asynchronous and !self.worker_stopped.load(.acquire)) { + return error.CantCompile; + } + + const function_ast = closure.function.chunk.ast; + const function_node = closure.function.node; + _ = closure.function.chunk.score(); + + switch (function_ast.nodes.items(.jit_status)[function_node]) { + .blacklisted, .generated, .queued => return error.CantCompile, + .compiled => return, + .compilable => {}, + } + + if (try function_ast.usesFiber( + self.gc.allocator, + function_node, + )) { + function_ast.nodes.items(.jit_status)[function_node] = .blacklisted; + + return error.CantCompile; + } + + const job = Job{ + .ast = function_ast, + .closure = closure, + .node = function_node, + }; + + function_ast.nodes.items(.jit_status)[function_node] = .queued; + + var completed_job = try self.doJob(&job); + defer completed_job.deinit(self.gc.allocator); + + self.publishCompletedJob(&completed_job); + self.reset(); +} + pub fn start(self: *Self) StartError!void { if (BuildOptions.jit_asynchronous and (self.worker == null or self.worker_stopped.load(.acquire))) { if (self.worker) |*worker| { @@ -256,19 +413,50 @@ pub fn start(self: *Self) StartError!void { } fn work(self: *Self) Error!void { + defer self.worker_stopped.store(true, .release); + while (self.jobs.front()) |job| { + switch (job.ast.nodes.items(.jit_status)[job.node]) { + .blacklisted, .generated, .compiled => { + self.jobs.pop(); + self.reset(); + continue; + }, + .queued, .compilable => {}, + } + if (BuildOptions.jit_debug) { - log.debug( - "Worker starting job for node {} and closure {*}", - .{ - job.node, - job.closure, - }, - ); + if (job.node == job.closure.function.node) + log.info( + "Worker starting for compiling function `{s}` with score {}", + .{ + job.closure.function.type_def.resolved_type.?.Function.name.string, + job.closure.function.call_count * job.closure.function.chunk.complexity_score.?, + }, + ) + else + log.info( + "Worker starting for hostpot node {} ({s}) witch score {} in function `{s}`", + .{ + job.node, + @tagName(job.ast.nodes.items(.tag)[job.node]), + job.ast.nodes.items(.count)[job.node] * job.ast.nodes.items(.complexity_score)[job.node].?, + job.closure.function.type_def.resolved_type.?.Function.name.string, + }, + ); } - try self.doJob(job); + const completed_job = self.doJob(job) catch |err| { + if (err != Error.CantCompile) { + return err; + } + + self.jobs.pop(); + self.reset(); + continue; + }; + self.completed_jobs.push(completed_job); self.jobs.pop(); self.reset(); } @@ -276,11 +464,126 @@ fn work(self: *Self) Error!void { if (BuildOptions.jit_debug) { log.debug("Worker done", .{}); } +} + +const GeneratedFunction = struct { + node: Ast.Node.Index, + closure: ?*o.ObjClosure, + native: ?*anyopaque, + native_raw: ?*anyopaque, +}; + +const CompletedJob = struct { + root_node: Ast.Node.Index, + ast: Ast.Slice, + functions: []GeneratedFunction, + + fn deinit(self: *CompletedJob, allocator: std.mem.Allocator) void { + allocator.free(self.functions); + } +}; + +fn publishCompleted(self: *Self) void { + while (self.completed_jobs.front()) |completed_job| { + self.publishCompletedJob(completed_job); + completed_job.deinit(self.gc.allocator); + self.completed_jobs.pop(); + } +} + +fn publishCompletedJob(_: *Self, completed_job: *CompletedJob) void { + for (completed_job.functions) |generated| { + if (generated.closure) |closure| { + closure.function.native_raw = generated.native_raw; + closure.function.native = generated.native; + } else if (generated.node == completed_job.root_node) { + completed_job.ast.nodes.items(.compiled)[generated.node] = generated.native_raw; + } + + completed_job.ast.nodes.items(.jit_status)[generated.node] = .compiled; + } +} + +fn queueCollateralFunction(self: *Self, node: Ast.Node.Index, closure: ?*o.ObjClosure) Error!void { + if (self.state.?.ast.nodes.items(.compiled)[node] != null) { + return; + } + + switch (self.state.?.ast.nodes.items(.jit_status)[node]) { + .blacklisted => return error.CantCompile, + .generated, .compiled => return, + .compilable, .queued => {}, + } + + if (closure) |uclosure| { + try self.objclosures_queue.put(self.gc.allocator, node, uclosure); + } - self.worker_stopped.store(true, .release); + if (!self.functions_queue.contains(node)) { + try self.functions_queue.put(self.gc.allocator, node, null); + } + + self.state.?.ast.nodes.items(.jit_status)[node] = .queued; +} + +fn queueObjectMethodCollateral(self: *Self, object_type: *o.ObjTypeDef, method_index: usize) Error!void { + for (self.state.?.closure.globals.items) |global| { + if (!global.isObj()) { + continue; + } + + const object = o.ObjObject.cast(global.obj()) orelse continue; + if (object.type_def != object_type) { + continue; + } + + const method_value = object.fields[method_index]; + if (!method_value.isObj()) { + return; + } + + const closure = o.ObjClosure.cast(method_value.obj()) orelse return; + try self.queueCollateralFunction(closure.function.node, closure); + + return; + } } -fn doJob(self: *Self, job: *const Job) Error!void { +fn doJob(self: *Self, job: *const Job) Error!CompletedJob { + var perf_scope = Perf.start(self.perf, .jit); + defer perf_scope.end(); + + const start_timestamp = std.Io.Clock.Timestamp.now(self.process.io, .awake); + defer { + const duration = start_timestamp.untilNow(self.process.io).raw; + + if (BuildOptions.jit_debug) { + const time = duration.toMilliseconds(); + + if (job.node == job.closure.function.node) { + log.info( + "Finished job function `{s}` with score {} in {}ms", + .{ + job.closure.function.type_def.resolved_type.?.Function.name.string, + job.closure.function.call_count * job.closure.function.chunk.complexity_score.?, + time, + }, + ); + } else { + log.info( + "Finished job for hostpot node {} ({s}) witch score {} in function `{s}` in {}ms", + .{ + job.node, + @tagName(job.ast.nodes.items(.tag)[job.node]), + job.ast.nodes.items(.count)[job.node] * job.ast.nodes.items(.complexity_score)[job.node].?, + job.closure.function.type_def.resolved_type.?.Function.name.string, + time, + }, + ); + } + } + } + // Remember we need to set this function's fields. Hotspot jobs are tied to // a closure for context, but their native code belongs to the AST node, not // to the enclosing function. @@ -333,24 +636,26 @@ fn doJob(self: *Self, job: *const Job) Error!void { m.MIR_gen_init(self.ctx); defer m.MIR_gen_finish(self.ctx); - // Generate all needed functions and set them in corresponding ObjFunctions + var generated_functions = std.ArrayList(GeneratedFunction).empty; + errdefer generated_functions.deinit(self.gc.allocator); + + // Generate all needed functions before publishing them on the VM thread. var it2 = self.functions_queue.iterator(); while (it2.next()) |kv| { const node = kv.key_ptr.*; const items = kv.value_ptr.*.?; - const native = if (items.native) |item| m.MIR_gen(self.ctx, item) else null; - const native_raw = if (items.native_raw) |item| m.MIR_gen(self.ctx, item) else null; - - // Find out if we need to set it in closure or hostpot node - if (self.objclosures_queue.get(node)) |closure| { - closure.function.native = native; - closure.function.native_raw = native_raw; - } else if (node == job.node) { - job.ast.nodes.items(.compiled)[node] = native_raw; - } + try generated_functions.append( + self.gc.allocator, + .{ + .node = node, + .closure = self.objclosures_queue.get(node), + .native = if (items.native) |item| m.MIR_gen(self.ctx, item) else null, + .native_raw = if (items.native_raw) |item| m.MIR_gen(self.ctx, item) else null, + }, + ); - job.ast.nodes.items(.jit_status)[node] = .compiled; + job.ast.nodes.items(.jit_status)[node] = .generated; } if (BuildOptions.jit_debug) { @@ -362,13 +667,19 @@ fn doJob(self: *Self, job: *const Job) Error!void { }, ); } + + return .{ + .root_node = job.node, + .ast = job.ast, + .functions = try generated_functions.toOwnedSlice(self.gc.allocator), + }; } fn getString(self: *Self, string: []const u8) Error!*o.ObjString { - return if (BuildOptions.jit_always_on) + return self.gc.strings.get(string) orelse if (BuildOptions.jit_always_on) try self.gc.copyString(string) // In this case, we did not run bytecode even once so strings are likely not interned else - self.gc.strings.get(string).?; + error.CantCompile; } fn loadRequiredExternalApi(self: *Self) Error!void { @@ -481,35 +792,6 @@ fn buildFunction(self: *Self, ast: Ast.Slice, closure: ?*o.ObjClosure, ast_node: try self.modules.append(self.gc.allocator, module); self.state.?.module = module; - if (BuildOptions.jit_debug) { - if (closure) |uclosure| { - log.debug( - "Compiling function `{s}` because it was called {}/{} times\n", - .{ - qualified_name, - uclosure.function.call_count, - self.call_count, - }, - ); - } else { - if (tag.isHotspot()) { - log.debug( - "Compiling hotspot for node {s} {}\n", - .{ - @tagName(self.state.?.ast.nodes.items(.tag)[ast_node]), - ast_node, - }, - ); - } else { - log.debug( - "Compiling closure `{s}`\n", - .{ - qualified_name, - }, - ); - } - } - } _ = (if (tag.isHotspot()) self.generateHotspotFunction(ast_node) @@ -523,10 +805,8 @@ fn buildFunction(self: *Self, ast: Ast.Slice, closure: ?*o.ObjClosure, ast_node: m.MIR_finish_func(self.ctx); _ = self.functions_queue.remove(ast_node); - if (closure) |uclosure| { - _ = self.objclosures_queue.remove(uclosure.function.node); - ast.nodes.items(.jit_status)[uclosure.function.node] = .blacklisted; - } + _ = self.objclosures_queue.remove(ast_node); + ast.nodes.items(.jit_status)[ast_node] = .blacklisted; } return err; @@ -1912,20 +2192,7 @@ fn generateNamedVariable(self: *Self, node: Ast.Node.Index) Error!?m.MIR_op_t { // Get the actual Value as it is right now (which is correct since a function doesn't change) const closure = o.ObjClosure.cast(self.state.?.closure.globals.items[components.slot].obj()).?; - // Does it need to be compiled? - switch (self.state.?.ast.nodes.items(.jit_status)[closure.function.node]) { - .compilable => if (self.state.?.ast.nodes.items(.compiled)[closure.function.node] == null) { - // Remember we need to set native fields of this ObjFunction later - try self.objclosures_queue.put(self.gc.allocator, closure.function.node, closure); - - // Remember that we need to compile this function later - try self.functions_queue.put(self.gc.allocator, closure.function.node, null); - - self.state.?.ast.nodes.items(.jit_status)[closure.function.node] = .queued; - }, - .blacklisted => return error.CantCompile, - .queued, .compiled => {}, - } + try self.queueCollateralFunction(closure.function.node, closure); return m.MIR_new_uint_op(self.ctx, closure.toValue().val); } else { @@ -2057,26 +2324,39 @@ fn generateCall(self: *Self, node: Ast.Node.Index) Error!?m.MIR_op_t { const member_lexeme = lexemes[node_components[components.callee].Dot.identifier]; switch (invoked_on.?) { - .Object => try self.buildExternApiCall( - .bz_getObjectField, - callee, - &.{ - subject.?, - m.MIR_new_uint_op( - self.ctx, - type_defs[node_components[components.callee].Dot.callee].? - .resolved_type.?.Object - .fields.get(member_lexeme).? - .index, - ), - }, - ), + .Object => object: { + const object_type = type_defs[node_components[components.callee].Dot.callee].?; + const field = object_type + .resolved_type.?.Object + .fields.get(member_lexeme).?; + + if (field.method) { + try self.queueObjectMethodCollateral(object_type, field.index); + } + + break :object try self.buildExternApiCall( + .bz_getObjectField, + callee, + &.{ + subject.?, + m.MIR_new_uint_op( + self.ctx, + field.index, + ), + }, + ); + }, .ObjectInstance => instance: { - const field = type_defs[node_components[components.callee].Dot.callee].? - .resolved_type.?.ObjectInstance.of + const object_type = type_defs[node_components[components.callee].Dot.callee].? + .resolved_type.?.ObjectInstance.of; + const field = object_type .resolved_type.?.Object .fields.get(member_lexeme).?; + if (field.method) { + try self.queueObjectMethodCollateral(object_type, field.index); + } + break :instance try self.buildExternApiCall( if (field.method) .bz_getObjectInstanceMethod @@ -4991,13 +5271,7 @@ fn generateFunction(self: *Self, node: Ast.Node.Index) Error!?m.MIR_op_t { const nativefn_qualified_name = try self.getQualifiedName(node, false); defer self.gc.allocator.free(nativefn_qualified_name); - // Remember that we need to compile this function later - if (self.state.?.ast.nodes.items(.compiled)[node] == null and - self.state.?.ast.nodes.items(.jit_status)[node] == .compilable) - { - try self.functions_queue.put(self.gc.allocator, node, null); - self.state.?.ast.nodes.items(.jit_status)[node] = .queued; - } + try self.queueCollateralFunction(node, null); // For now declare it const native_raw = m.MIR_new_import(self.ctx, @ptrCast(qualified_name)); @@ -7293,6 +7567,11 @@ fn REG(self: *Self, name: [*:0]const u8, reg_type: m.MIR_type_t) !m.MIR_reg_t { } fn outputModule(self: *Self, name: []const u8, module: m.MIR_module_t) void { + std.Io.Dir.cwd().access(self.process.io, "./dist/gen", .{ .read = true }) catch { + std.Io.Dir.cwd().createDirPath(self.process.io, "./dist/gen") catch + @panic("Could not create debug path to output MIR modules"); + }; + // Output MIR code to .mir file var debug_path = std.Io.Writer.Allocating.init(self.gc.allocator); defer debug_path.deinit(); @@ -7302,7 +7581,7 @@ fn outputModule(self: *Self, name: []const u8, module: m.MIR_module_t) void { .{ name, }, - ) catch unreachable; + ) catch @panic("Out of memory"); const debug_file = std.c.fopen( @ptrCast(debug_path.written().ptr), diff --git a/src/Parser.zig b/src/Parser.zig index 123e8f43..85764fba 100644 --- a/src/Parser.zig +++ b/src/Parser.zig @@ -16,6 +16,7 @@ const Scanner = @import("Scanner.zig"); const RunFlavor = @import("vm.zig").RunFlavor; const Reporter = @import("Reporter.zig"); const StringParser = @import("StringParser.zig"); +const Perf = @import("Perf.zig"); const pcre = if (!is_wasm) @import("pcre.zig") else void; const buzz_api = @import("lib/buzz_api.zig"); const print = @import("io.zig").print; @@ -93,6 +94,7 @@ const Self = @This(); process: Init, ast: Ast, gc: *GC, +perf: ?*Perf = null, scanner: ?Scanner = null, current_token: ?Ast.TokenIndex = null, script_name: []const u8 = undefined, @@ -885,6 +887,9 @@ fn synchronize(self: *Self) !void { } pub fn parse(self: *Self, source: []const u8, file_name: ?[]const u8, name: []const u8) !?Ast { + var perf_scope = Perf.start(self.perf, .parser); + defer perf_scope.end(); + if (self.scanner != null) { self.scanner = null; } @@ -914,6 +919,7 @@ pub fn parse(self: *Self, source: []const u8, file_name: ?[]const u8, name: []co file_name orelse name, source, ); + self.scanner.?.perf = self.perf; const function_type: obj.ObjFunction.FunctionType = if (!self.imported and self.flavor == .Repl) .Repl @@ -2699,7 +2705,8 @@ fn declarePlaceholder(self: *Self, name: Ast.TokenIndex, placeholder: ?*obj.ObjT } pub fn parseTypeDefFrom(self: *Self, source: []const u8) Error!*obj.ObjTypeDef { - const type_scanner = Scanner.init(self.gc.allocator, self.script_name, source); + var type_scanner = Scanner.init(self.gc.allocator, self.script_name, source); + type_scanner.perf = self.perf; // Replace parser scanner with one that only looks at that substring const scanner = self.scanner; self.scanner = type_scanner; diff --git a/src/Perf.zig b/src/Perf.zig new file mode 100644 index 00000000..f2bb574b --- /dev/null +++ b/src/Perf.zig @@ -0,0 +1,279 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const BuildOptions = @import("build_options"); +const bz_io = @import("io.zig"); + +const Self = @This(); + +pub const Component = enum { + file_io, + scanner, + parser, + codegen, + vm, + gc, + jit, + native, +}; + +pub const Scope = struct { + perf: ?*Self, + component: Component, + start: std.Io.Clock.Timestamp = undefined, + + pub inline fn end(self: *Scope) void { + if (!BuildOptions.show_perf or builtin.cpu.arch.isWasm()) { + return; + } + + const perf = self.perf orelse return; + const duration = self.start.untilNow(perf.io).raw.toNanoseconds(); + const component = self.component; + + if (StackState.depth > 0 and StackState.stack[StackState.depth - 1].component == component) { + StackState.depth -= 1; + } + + const parent = if (StackState.depth > 0) + StackState.stack[StackState.depth - 1].component + else + null; + + perf.recordDuration(component, parent, @intCast(@max(0, duration))); + self.perf = null; + } +}; + +const component_count = @typeInfo(Component).@"enum".fields.len; +const default_order = defaultOrder(); +const max_stack = 64; + +const Active = struct { + component: Component, +}; + +const StackState = if (builtin.cpu.arch.isWasm()) struct { + var stack: [max_stack]Active = undefined; + var depth: usize = 0; +} else struct { + threadlocal var stack: [max_stack]Active = undefined; + threadlocal var depth: usize = 0; +}; + +io: bz_io.Io, +started_at: std.Io.Clock.Timestamp, +mutex: std.Io.Mutex = .init, +totals: [component_count]i128 = [_]i128{0} ** component_count, +children: [component_count][component_count]i128 = [_][component_count]i128{[_]i128{0} ** component_count} ** component_count, + +pub fn init(io: bz_io.Io) Self { + return .{ + .io = io, + .started_at = std.Io.Clock.Timestamp.now(io, .awake), + }; +} + +pub inline fn begin(self: *Self, component: Component) Scope { + if (!BuildOptions.show_perf or builtin.cpu.arch.isWasm()) { + return .{ .perf = null, .component = component }; + } + + var i: usize = 0; + while (i < StackState.depth) : (i += 1) { + if (StackState.stack[i].component == component) { + return .{ .perf = null, .component = component }; + } + } + + if (StackState.depth >= max_stack) { + return .{ .perf = null, .component = component }; + } + + StackState.stack[StackState.depth] = .{ .component = component }; + StackState.depth += 1; + + return .{ + .perf = self, + .component = component, + .start = std.Io.Clock.Timestamp.now(self.io, .awake), + }; +} + +pub inline fn start(perf: ?*Self, component: Component) Scope { + if (!BuildOptions.show_perf or builtin.cpu.arch.isWasm()) { + return .{ .perf = null, .component = component }; + } + + return if (perf) |p| p.begin(component) else .{ .perf = null, .component = component }; +} + +pub fn report(self: *Self) void { + if (!BuildOptions.show_perf or builtin.cpu.arch.isWasm()) { + return; + } + + var totals: [component_count]i128 = undefined; + var children: [component_count][component_count]i128 = undefined; + self.mutex.lockUncancelable(self.io); + totals = self.totals; + children = self.children; + self.mutex.unlock(self.io); + + const elapsed = @as(i128, @intCast(@max(0, self.started_at.untilNow(self.io).raw.toNanoseconds()))); + if (elapsed == 0) { + return; + } + + var order = default_order; + std.mem.sort(usize, &order, &totals, durationGreaterThan); + + var stderr = bz_io.stderrWriter(self.io); + const out = &stderr.interface; + + out.print("\n\x1b[36mPerformance\x1b[0m\n", .{}) catch return; + out.print("Total elapsed: ", .{}) catch return; + printDuration(out, elapsed) catch return; + out.print("\n\n", .{}) catch return; + + for (order) |component_index| { + const duration = totals[component_index]; + if (duration <= 0) { + continue; + } + + const component: Component = @enumFromInt(component_index); + const percent: u128 = @intCast(@divTrunc(duration * 100, elapsed)); + var duration_buffer: [32]u8 = undefined; + const duration_string = formatDuration(&duration_buffer, duration) catch return; + out.print("{s: <9} {s: >10} {d: >3}% ", .{ + label(component), + duration_string, + percent, + }) catch return; + printBar(out, duration, elapsed, color(component)) catch return; + tryPrintChildren(out, children[component_index]) catch return; + out.print("\n", .{}) catch return; + } +} + +fn defaultOrder() [component_count]usize { + var order: [component_count]usize = undefined; + for (&order, 0..) |*item, index| { + item.* = index; + } + + return order; +} + +fn durationGreaterThan(totals: *const [component_count]i128, lhs: usize, rhs: usize) bool { + return totals.*[lhs] > totals.*[rhs]; +} + +fn recordDuration(self: *Self, component: Component, parent: ?Component, duration: i128) void { + if (duration <= 0) { + return; + } + + const component_index = @intFromEnum(component); + + self.mutex.lockUncancelable(self.io); + defer self.mutex.unlock(self.io); + + self.totals[component_index] += duration; + + if (parent) |p| { + self.children[@intFromEnum(p)][component_index] += duration; + } +} + +fn label(component: Component) []const u8 { + return switch (component) { + .file_io => "File I/O", + .scanner => "Scanner", + .parser => "Parser", + .codegen => "Codegen", + .vm => "VM", + .gc => "GC", + .jit => "JIT", + .native => "Native", + }; +} + +fn color(component: Component) []const u8 { + return switch (component) { + .file_io => "\x1b[34m", + .scanner => "\x1b[35m", + .parser => "\x1b[36m", + .codegen => "\x1b[33m", + .vm => "\x1b[32m", + .gc => "\x1b[31m", + .jit => "\x1b[95m", + .native => "\x1b[94m", + }; +} + +fn printDuration(out: *std.Io.Writer, duration: i128) !void { + const ns_per_us = std.time.ns_per_us; + const ns_per_ms = std.time.ns_per_ms; + const ns_per_s = std.time.ns_per_s; + const value: u128 = @intCast(@max(0, duration)); + + if (value >= ns_per_s) { + try out.print("{d}.{d:0>3}s", .{ + @divTrunc(value, ns_per_s), + @divTrunc(@mod(value, ns_per_s), ns_per_ms), + }); + } else if (value >= ns_per_ms) { + try out.print("{d}.{d:0>3}ms", .{ + @divTrunc(value, ns_per_ms), + @divTrunc(@mod(value, ns_per_ms), ns_per_us), + }); + } else if (value >= ns_per_us) { + try out.print("{d}us", .{@divTrunc(value, ns_per_us)}); + } else { + try out.print("{d}ns", .{value}); + } +} + +fn formatDuration(buffer: *[32]u8, duration: i128) ![]const u8 { + var writer: std.Io.Writer = .fixed(buffer); + try printDuration(&writer, duration); + return writer.buffered(); +} + +fn printBar(out: *std.Io.Writer, duration: i128, total: i128, bar_color: []const u8) !void { + const width = 24; + const filled: usize = @intCast(@min(width, @divTrunc(duration * width, total))); + + try out.print("{s}", .{bar_color}); + for (0..filled) |_| { + try out.writeAll("━"); + } + + try out.writeAll("\x1b[2m"); + for (filled..width) |_| { + try out.writeAll("─"); + } + + try out.writeAll("\x1b[0m"); +} + +fn tryPrintChildren(out: *std.Io.Writer, component_children: [component_count]i128) !void { + var first = true; + + for (component_children, 0..) |duration, index| { + if (duration <= 0) { + continue; + } + + if (first) { + try out.writeAll(" includes: "); + first = false; + } else { + try out.writeAll(", "); + } + + try out.print("{s} ", .{label(@enumFromInt(index))}); + try printDuration(out, duration); + } +} diff --git a/src/Runner.zig b/src/Runner.zig index d2cae5b7..6860fd38 100644 --- a/src/Runner.zig +++ b/src/Runner.zig @@ -21,6 +21,7 @@ const Renderer = @import("renderer.zig").Renderer; const Value = @import("value.zig").Value; const o = @import("obj.zig"); const disassembler = @import("disassembler.zig"); +const Perf = @import("Perf.zig"); const Runner = @This(); @@ -29,12 +30,17 @@ vm: VM, gc: GC, parser: Parser, codegen: CodeGen, +perf: ?*Perf = null, import_registry: ImportRegistry = .empty, imports: std.StringHashMapUnmanaged(Parser.ScriptImport) = .empty, /// DynLib lookup cache dlib_symbols: std.StringHashMapUnmanaged(Parser.Dlib) = .empty, pub fn deinit(self: *Runner) void { + if (!is_wasm and self.vm.jit != null) { + self.vm.jit.?.deinit(); + self.vm.jit = null; + } self.codegen.deinit(); self.parser.deinit(); var it = self.dlib_symbols.valueIterator(); @@ -49,24 +55,22 @@ pub fn deinit(self: *Runner) void { } self.imports.deinit(self.gc.allocator); // TODO: free type_registry and its keys which are on the heap - if (!is_wasm and self.vm.jit != null) { - self.vm.jit.?.deinit(); - self.vm.jit = null; - } self.vm.deinit(); } /// Runner must, most of the time be on the stack, and it contains several circular references /// So the use provides the ptr to it and this function populates it -pub fn init(runner_ptr: *Runner, process: Init, allocator: std.mem.Allocator, flavor: RunFlavor, debugger: ?*Debugger) !void { +pub fn init(runner_ptr: *Runner, process: Init, allocator: std.mem.Allocator, flavor: RunFlavor, debugger: ?*Debugger, perf: ?*Perf) !void { runner_ptr.* = .{ .process = process, .gc = try GC.init(allocator), .vm = undefined, .parser = undefined, .codegen = undefined, + .perf = perf, }; + runner_ptr.gc.perf = perf; runner_ptr.gc.type_registry = try TypeRegistry.init(&runner_ptr.gc); runner_ptr.vm = try VM.init( process, @@ -75,11 +79,15 @@ pub fn init(runner_ptr: *Runner, process: Init, allocator: std.mem.Allocator, fl flavor, debugger, ); + runner_ptr.vm.perf = perf; runner_ptr.vm.jit = if (BuildOptions.jit and BuildOptions.cycle_limit == null and debugger == null) try JIT.init(process, &runner_ptr.gc) else null; + if (runner_ptr.vm.jit) |*jit| { + jit.perf = perf; + } runner_ptr.parser = Parser.init( process, @@ -89,6 +97,7 @@ pub fn init(runner_ptr: *Runner, process: Init, allocator: std.mem.Allocator, fl false, flavor, ); + runner_ptr.parser.perf = perf; runner_ptr.codegen = CodeGen.init( process, @@ -98,6 +107,7 @@ pub fn init(runner_ptr: *Runner, process: Init, allocator: std.mem.Allocator, fl if (runner_ptr.vm.jit) |*jit| jit else null, debugger != null, ); + runner_ptr.codegen.perf = perf; } pub fn runFile( @@ -105,6 +115,9 @@ pub fn runFile( file_name: []const u8, args: []const []const u8, ) !u8 { + var file_io_scope = Perf.start(runner.perf, .file_io); + defer file_io_scope.end(); + var file = (if (std.fs.path.isAbsolute(file_name)) std.Io.Dir.openFileAbsolute(runner.process.io, file_name, .{}) else @@ -118,6 +131,7 @@ pub fn runFile( defer if (runner.vm.debugger == null) runner.gc.allocator.free(source); _ = try file.readPositionalAll(runner.process.io, source, 0); + file_io_scope.end(); if (try runner.parser.parse(source, null, file_name)) |ast| { if (runner.vm.flavor != .Fmt) { diff --git a/src/Scanner.zig b/src/Scanner.zig index 4800b1a0..f3cfe572 100644 --- a/src/Scanner.zig +++ b/src/Scanner.zig @@ -3,6 +3,7 @@ const mem = std.mem; const Allocator = mem.Allocator; const Token = @import("Token.zig"); const v = @import("value.zig"); +const Perf = @import("Perf.zig"); pub const SourceLocation = struct { start: usize, @@ -30,6 +31,7 @@ line_offset: usize = 0, column_offset: usize = 0, script_name: []const u8, token_index: usize = 0, +perf: ?*Perf = null, pub fn init(allocator: Allocator, script_name: []const u8, source: []const u8) Self { return Self{ @@ -40,6 +42,9 @@ pub fn init(allocator: Allocator, script_name: []const u8, source: []const u8) S } pub fn scanToken(self: *Self) Allocator.Error!Token { + var perf_scope = Perf.start(self.perf, .scanner); + defer perf_scope.end(); + self.skipWhitespaces(); self.current.start = self.current.offset; diff --git a/src/behavior.zig b/src/behavior.zig index c0959343..2b9c61a8 100644 --- a/src/behavior.zig +++ b/src/behavior.zig @@ -8,9 +8,12 @@ const bz_io = @import("io.zig"); const Parser = @import("Parser.zig"); const BuildOptions = @import("build_options"); const clap = @import("clap"); +const Perf = @import("Perf.zig"); const black_listed_tests = std.StaticStringMap(void).initComptime( - .{}, + .{ + .{ "tests/fuzzed/id:000434,sig:06,src:000723,time:202384530,execs:828228,op:arith8,pos:276,val:-1.buzz", {} }, + }, ); const Result = struct { @@ -45,7 +48,7 @@ const Result = struct { } }; -fn testBehaviors(process: std.process.Init, allocator: std.mem.Allocator, fail_fast: bool) !Result { +fn testBehaviors(process: std.process.Init, allocator: std.mem.Allocator, fail_fast: bool, perf: ?*Perf) !Result { var result = Result{}; const dirs = [_][]const u8{ "tests/behavior", "tests" }; @@ -77,7 +80,8 @@ fn testBehaviors(process: std.process.Init, allocator: std.mem.Allocator, fail_f var had_error: bool = false; var runner: Runner = undefined; - try runner.init(process, allocator, .Test, null); + try runner.init(process, allocator, .Test, null, perf); + defer runner.deinit(); var failed = false; _ = runner.runFile( @@ -358,11 +362,14 @@ pub fn main(init: std.process.Init) !u8 { var result: Result = .{}; defer result.deinit(allocator); + var perf: ?Perf = if (BuildOptions.show_perf) Perf.init(init.io) else null; + defer if (perf) |*p| p.report(); + const do_all = res.args.all == 1 or (res.args.behavior != 1 and res.args.@"compile-error" != 1 and res.args.fuzz != 1); if (do_all or res.args.behavior == 1) { bz_io.print(init.io, "\n\x1b[34m■ Behavior tests\x1b[0m...\n", .{}); - var tests_result = try testBehaviors(init, allocator, res.args.fast == 1); + var tests_result = try testBehaviors(init, allocator, res.args.fast == 1, if (perf) |*p| p else null); try result.merge( allocator, &tests_result, diff --git a/src/buzz_api.zig b/src/buzz_api.zig index 4a6e5637..1fcccb01 100644 --- a/src/buzz_api.zig +++ b/src/buzz_api.zig @@ -1131,6 +1131,27 @@ export fn bz_setUpValue(ctx: *o.NativeCtx, slot: usize, value: v.Value) callconv ctx.upvalues[slot].location.* = value; } +export fn bz_callFromJit(ctx: *o.NativeCtx) callconv(.c) v.Value { + const vm = ctx.vm; + + vm.callValue( + ctx.callee, + @intCast(ctx.arg_count), + null, + ) catch @panic("Failed calling function from JIT"); + + // If the callee is interpreted, run it until its return reaches the native + // caller frame. The VM leaves the result on the stack; RawFn returns it. + if (!calleeIsCompiled(ctx.callee)) { + vm.run() catch @panic("Failed running function from JIT"); + } + + const result = vm.pop(); + vm.current_fiber.stack_top = ctx.base; + + return result; +} + export fn bz_context(ctx: *o.NativeCtx, closure_value: v.Value, new_ctx: *o.NativeCtx, arg_count: usize) callconv(.c) *anyopaque { if (is_wasm) { unreachable; @@ -1182,16 +1203,21 @@ export fn bz_context(ctx: *o.NativeCtx, closure_value: v.Value, new_ctx: *o.Nati .upvalues = if (closure) |cls| cls.upvalues.ptr else ctx.upvalues, .base = ctx.vm.current_fiber.stack_top - arg_count - 1, .stack_top = &ctx.vm.current_fiber.stack_top, + .callee = closure_value, + .arg_count = arg_count, }; if (closure) |cls| { - if (cls.function.native_raw == null and cls.function.native == null) { - ctx.vm.jit.?.compile(cls.function.chunk.ast, cls, null) catch @panic("Failed compiling function"); + if (cls.function.native_raw == null) { + ctx.vm.jit.?.compileFunctionSynchronously(cls) catch |err| switch (err) { + error.CantCompile => return @as(*anyopaque, @ptrFromInt(@intFromPtr(&bz_callFromJit))), + else => @panic("Failed compiling function"), + }; } ctx.vm.current_fiber.current_compiled_function = cls.function; - return cls.function.native_raw.?; + return cls.function.native_raw orelse @as(*anyopaque, @ptrFromInt(@intFromPtr(&bz_callFromJit))); } return native.?.native; diff --git a/src/lib/buzz_api.zig b/src/lib/buzz_api.zig index b0ff9bd1..29e465e4 100644 --- a/src/lib/buzz_api.zig +++ b/src/lib/buzz_api.zig @@ -196,6 +196,8 @@ pub const NativeCtx = extern struct { // Pointer to the stack_top field of the current fiber // !! Needs to change when current fiber changes !! stack_top: *[*]Value, + callee: Value, + arg_count: usize, pub fn getIo(self: *@This()) Io { if (is_wasm) return {}; diff --git a/src/main.zig b/src/main.zig index a1e63156..e0c4fc78 100644 --- a/src/main.zig +++ b/src/main.zig @@ -23,6 +23,7 @@ const wasm_repl = @import("wasm_repl.zig"); const Renderer = @import("renderer.zig").Renderer; const io = @import("io.zig"); const Runner = @import("Runner.zig"); +const Perf = @import("Perf.zig"); pub export const initRepl_export = wasm_repl.initRepl; pub export const runLine_export = wasm_repl.runLine; @@ -135,15 +136,20 @@ pub fn main(provided_init: Init) u8 { return 1; }; } else if (!is_wasm and res.positionals[0].len > 0) { + var perf: ?Perf = if (BuildOptions.show_perf) Perf.init(init.io) else null; + defer if (perf) |*p| p.report(); + var runner: Runner = undefined; runner.init( init, allocator, flavor, null, + if (perf) |*p| p else null, ) catch { return 1; }; + defer runner.deinit(); return runner.runFile( res.positionals[0][0], diff --git a/src/obj.zig b/src/obj.zig index bd34a364..3f8a58c7 100644 --- a/src/obj.zig +++ b/src/obj.zig @@ -1242,6 +1242,8 @@ pub const NativeCtx = extern struct { // Pointer to the stack_top field of the current fiber // !! Needs to change when current fiber changes !! stack_top: *[*]Value, + callee: Value, + arg_count: usize, }; // 1 = return value on stack, 0 = no return value, -1 = error @@ -1316,21 +1318,21 @@ pub const ObjFunction = struct { chunk: Chunk, upvalue_count: u8 = 0, - // So we can JIT the function at runtime + // Jit related stuff + + /// So we can JIT the function at runtime node: Ast.Node.Index, - // How many time the function was called + /// How many time the function was called call_count: u32 = 0, - - // JIT compiled function + /// JIT compiled function native_raw: ?*anyopaque = null, - - // JIT compiled function callable by buzz VM + /// JIT compiled function callable by buzz VM native: ?*anyopaque = null, pub fn init(allocator: Allocator, ast: Ast.Slice, node: Ast.Node.Index) !Self { - return Self{ + return .{ .node = node, - .chunk = Chunk.init(allocator, ast), + .chunk = .init(allocator, ast), }; } diff --git a/src/repl.zig b/src/repl.zig index a807181f..a11488fd 100644 --- a/src/repl.zig +++ b/src/repl.zig @@ -17,6 +17,7 @@ const GC = @import("GC.zig"); const TypeRegistry = @import("TypeRegistry.zig"); const Runner = @import("Runner.zig"); const QualifiedNameContext = @import("Ast.zig").QualifiedName.Context; +const Perf = @import("Perf.zig"); pub const PROMPT = ">>> "; pub const MULTILINE_PROMPT = "... "; @@ -68,7 +69,10 @@ pub fn repl(process: std.process.Init, allocator: std.mem.Allocator) !void { false; var runner: Runner = undefined; - try runner.init(process, allocator, .Repl, null); + var perf: ?Perf = if (BuildOptions.show_perf) Perf.init(process.io) else null; + defer if (perf) |*p| p.report(); + + try runner.init(process, allocator, .Repl, null, if (perf) |*p| p else null); defer runner.deinit(); var stdout = io.stdoutWriter(process.io); diff --git a/src/vm.zig b/src/vm.zig index 9a2294c2..2d40ae0c 100644 --- a/src/vm.zig +++ b/src/vm.zig @@ -17,6 +17,7 @@ const dispatch_call_modifier: std.builtin.CallModifier = if (!is_wasm) .always_t const print = @import("io.zig").print; const Debugger = if (!is_wasm) @import("Debugger.zig") else void; const TypeRegistry = @import("TypeRegistry.zig"); +const Perf = @import("Perf.zig"); pub const Init = if (is_wasm) std.process.Init.Minimal else std.process.Init; @@ -453,6 +454,7 @@ pub const VM = struct { globals_dbg: std.ArrayList(Value) = .empty, import_registry: *ImportRegistry, jit: ?JIT = null, + perf: ?*Perf = null, debugger: ?*Debugger = null, paused: bool = false, hotspots_count: u128 = 0, @@ -498,6 +500,7 @@ pub const VM = struct { var gc = try other.gc.allocator.create(GC); // FIXME: should share strings between gc gc.* = try GC.init(other.gc.allocator); + gc.perf = other.gc.perf; gc.type_registry = try TypeRegistry.init(gc); const import_registry = try other.gc.allocator.create(ImportRegistry); import_registry.* = .{}; @@ -509,6 +512,7 @@ pub const VM = struct { .Run, null, ); + vm.perf = other.perf; return vm; } @@ -593,6 +597,9 @@ pub const VM = struct { } pub fn interpret(self: *Self, ast: Ast.Slice, function: *obj.ObjFunction, args: ?[]const []const u8) Error!void { + var perf_scope = Perf.start(self.perf, .vm); + defer perf_scope.end(); + self.current_ast = ast; self.current_fiber = try self.gc.allocator.create(Fiber); @@ -2292,6 +2299,7 @@ pub const VM = struct { self.panic("Out of memory"); unreachable; }; + vm.perf = self.perf; // TODO: how to free this since we copy things to new vm, also fails anyway // { // defer vm.deinit(); @@ -4545,12 +4553,10 @@ pub const VM = struct { function_ast.nodes.items(.count)[node] += 1; - if (self.shouldCompileHotspot(function_ast, node)) { - self.jit.?.compile( - function_ast, - frame.closure, - node, - ) catch {}; + if (BuildOptions.jit and BuildOptions.jit_hotspot and self.jit != null) { + self.jit.?.compileHotspotIfNeeded(function_ast, frame.closure, node) catch { + // FIXME: what to do? + }; } if (function_ast.nodes.items(.compiled)[node]) |native| { @@ -4572,17 +4578,6 @@ pub const VM = struct { }; obj_native.mark(self.gc); - if (BuildOptions.jit_debug) { - print( - self.process.io, - "Compiled hotspot {s} in function `{s}`\n", - .{ - @tagName(function_ast.nodes.items(.tag)[node]), - frame.closure.function.type_def.resolved_type.?.Function.name.string, - }, - ); - } - // The now compile hotspot must be a new constant for the current function frame.closure.function.chunk.constants.append( frame.closure.function.chunk.allocator, @@ -4759,6 +4754,9 @@ pub const VM = struct { } pub fn run(self: *Self) error{RuntimeError}!void { + var perf_scope = Perf.start(self.perf, .vm); + defer perf_scope.end(); + const next_current_frame = self.currentFrame().?; const next_full_instruction = self.readInstruction(next_current_frame); const next_instruction: Chunk.OpCode = getCode(next_full_instruction); @@ -4822,6 +4820,18 @@ pub const VM = struct { const frame_ptr = self.currentFrame(); const frame_val = if (frame_ptr) |ptr| ptr.* else null; if (self.current_fiber.frame_count > 0) { + if (!is_wasm and frame_ptr.?.in_native_call and self.current_fiber.try_context != null) { + self.push(payload); + + if (builtin.os.tag == .macos or builtin.os.tag == .linux) { + jmp._longjmp(&self.current_fiber.try_context.?.env, 1); + } else { + jmp.longjmp(&self.current_fiber.try_context.?.env, 1); + } + + unreachable; + } + const function_type = frame_ptr.?.closure.function.type_def.resolved_type.?.Function.function_type; if (function_type != .ScriptEntryPoint and function_type != .Repl) { try stack.append(self.gc.allocator, frame_val.?); @@ -5013,54 +5023,15 @@ pub const VM = struct { } fn compileAndCall(self: *Self, closure: *obj.ObjClosure, arg_count: u8, catch_value: ?Value) Error!bool { - var native = closure.function.native; - if (self.jit) |*jit| { - jit.call_count += 1; - // Do we need to jit the function? - // TODO: figure out threshold strategy - if (self.shouldCompileFunction(closure)) { - var success = true; - jit.compile(closure.function.chunk.ast, closure, null) catch |err| { - if (err == Error.CantCompile) { - success = false; - } else { - return err; - } - }; - - if (BuildOptions.jit_debug and success) { - print( - self.process.io, - "Compiled function `{s}`\n", - .{ - closure.function.type_def.resolved_type.?.Function.name.string, - }, - ); - } - - if (success) { - native = closure.function.native; - } - } - } - - // Is there a compiled version of it? - if (native != null) { - // if (BuildOptions.jit_debug) { - // print( - // self.process.io, - // "Calling compiled version of function `{s}.{}.n{}`\n", - // .{ - // closure.function.type_def.resolved_type.?.Function.name.string, - // self.current_ast.nodes.items(.components)[closure.function.node].Function.id, - // closure.function.node, - // }, - // ); - // } - + if (closure.function.native orelse + if (BuildOptions.jit and self.jit != null and try self.jit.?.compileFunctionIfNeeded(closure)) + closure.function.native + else + closure.function.native) |native| + { try self.callCompiled( closure, - @ptrCast(@alignCast(native.?)), + @ptrCast(@alignCast(native)), arg_count, catch_value, ); @@ -5072,7 +5043,7 @@ pub const VM = struct { } fn call(self: *Self, closure: *obj.ObjClosure, arg_count: u8, catch_value: ?Value) Error!void { - closure.function.call_count += 1; + if (closure.function.native == null) closure.function.call_count += 1; if (BuildOptions.recursive_call_limit) |recursive_call_limit| { // If recursive call, update counter @@ -5187,6 +5158,8 @@ pub const VM = struct { .upvalues = frame.closure.upvalues.ptr, .base = frame.slots, .stack_top = &self.current_fiber.stack_top, + .callee = Value.Void, + .arg_count = 0, }; // If native returns 1 here, we know there was an early return in the hotspot @@ -5211,6 +5184,9 @@ pub const VM = struct { } fn callNative(self: *Self, native: obj.NativeFn, arg_count: u8, catch_value: ?Value) !void { + var perf_scope = Perf.start(self.perf, .native); + defer perf_scope.end(); + var frame = self.currentFrame().?; const was_in_native_call = frame.in_native_call; frame.in_native_call = true; @@ -5224,6 +5200,8 @@ pub const VM = struct { .upvalues = &[_]*obj.ObjUpValue{}, .base = self.current_fiber.stack_top - arg_count - 1, .stack_top = &self.current_fiber.stack_top, + .callee = Value.Void, + .arg_count = arg_count, }; const native_return = native(&ctx); @@ -5284,6 +5262,8 @@ pub const VM = struct { .upvalues = closure.upvalues.ptr, .base = self.current_fiber.stack_top - arg_count - 1, .stack_top = &self.current_fiber.stack_top, + .callee = closure.toValue(), + .arg_count = arg_count, }; const native_return = native(&ctx); @@ -5547,48 +5527,6 @@ pub const VM = struct { return created_upvalue; } - fn shouldCompileFunction(self: *Self, closure: *obj.ObjClosure) bool { - const function_type = closure.function.type_def.resolved_type.?.Function.function_type; - const function_ast = closure.function.chunk.ast; - - switch (function_type) { - .Extern, - .Script, - .ScriptEntryPoint, - .EntryPoint, - .Repl, - => return false, - else => {}, - } - - return function_ast.nodes.items(.jit_status)[closure.function.node] == .compilable and - function_ast.nodes.items(.compiled)[closure.function.node] == null and - self.jit != null and - ( - // Always on - BuildOptions.jit_always_on or - // Threshold reached - (closure.function.call_count > 10 and - (@as(f64, @floatFromInt(closure.function.call_count)) / @as(f64, @floatFromInt(self.jit.?.call_count))) > BuildOptions.jit_prof_threshold)); - } - - fn shouldCompileHotspot(self: *Self, ast: Ast.Slice, node: Ast.Node.Index) bool { - const count = ast.nodes.items(.count)[node]; - - return BuildOptions.jit_hotspot_on and - // Marked as compilable - ast.nodes.items(.jit_status)[node] == .compilable and - ast.nodes.items(.compiled)[node] == null and - self.jit != null and - // JIT compile all the thing? - ( - // Always compile - BuildOptions.jit_always_on or BuildOptions.jit_hotspot_always_on or - // Threshold reached - (count > 10 and - (@as(f64, @floatFromInt(count)) / @as(f64, @floatFromInt(self.hotspots_count))) > BuildOptions.jit_prof_threshold)); - } - fn patchHotspot( self: *Self, location: Ast.TokenIndex, @@ -5623,6 +5561,7 @@ pub const VM = struct { ); const hotspot_call_start = to - hotspot_call.len; + try chunk.addCompiledHotspotRange(frame.ip - 1, hotspot_call_start); // In the event that we are in a nested loop, we put a jump instruction in place of OP_HOTSPOT chunk.code.items[frame.ip - 2] = (@as(u32, @intCast(@intFromEnum(Chunk.OpCode.OP_JUMP))) << 24) | @as( diff --git a/tests/behavior/jit-dynamic-call.buzz b/tests/behavior/jit-dynamic-call.buzz new file mode 100644 index 00000000..b60d7f84 --- /dev/null +++ b/tests/behavior/jit-dynamic-call.buzz @@ -0,0 +1,60 @@ +import "std"; + +fun dynamicInc(value: int) > int { + return value + 1; +} + +fun dynamicIncViaFiber(value: int) > int { + return resolve &dynamicInc(value); +} + +fun failDynamic(value: int) > int !> str { + throw "failed"; + + return value; +} + +fun failDynamicViaFiber(value: int) > int !> str { + return resolve &failDynamic(value); +} + +fun callDynamic(fn: fun (value: int) > int, value: int) > int { + return fn(value); +} + +fun callDynamicWithCatch(fn: fun (value: int) > int !> str, value: int) > int { + return fn(value) catch 42; +} + +test "JIT dynamic closure call" { + final callbacks = [ dynamicInc ]; + var value = 0; + + foreach (_ in 0..1500) { + value = callDynamic(callbacks[0], value: value); + } + + std\assert(value == 1500, message: "compiled code can call a dynamic closure"); +} + +test "JIT dynamic call to blacklisted closure" { + final callbacks = [ dynamicIncViaFiber ]; + var value = 0; + + foreach (_ in 0..1500) { + value = callDynamic(callbacks[0], value: value); + } + + std\assert(value == 1500, message: "compiled code can call an interpreted dynamic closure"); +} + +test "JIT dynamic call to blacklisted throwing closure" { + final callbacks = [ failDynamicViaFiber ]; + var value = 0; + + foreach (_ in 0..1500) { + value = callDynamicWithCatch(callbacks[0], value: value); + } + + std\assert(value == 42, message: "compiled code can catch an interpreted dynamic closure error"); +} diff --git a/tests/bench/fasta.buzz b/tests/bench/fasta.buzz index 4e0660ee..8e5d755a 100644 --- a/tests/bench/fasta.buzz +++ b/tests/bench/fasta.buzz @@ -63,7 +63,7 @@ object Frequency { foreach (_ in 0..nRandom) { final r = this.random(1.0); var skip = false; - foreach (_ in 0..len) { + foreach (i in 0..len) { if (r < this.probs[i]) { buffer.setAt(bufferIndex, value: this.chars.at(i)); bufferIndex = bufferIndex + 1; diff --git a/tests/bench/nbody.buzz b/tests/bench/nbody.buzz index d88ac267..3618f9ae 100644 --- a/tests/bench/nbody.buzz +++ b/tests/bench/nbody.buzz @@ -26,23 +26,25 @@ fun advance(bodies: [mut Body], nbody: int, dt: double) > void { var bivy = bi.vy; var bivz = bi.vz; - foreach (j in (i + 1)..nbody) { - final bj = bodies[j]; - final dx = bix - bj.x; - final dy = biy - bj.y; - final dz = biz - bj.z; - final dist2 = dx * dx + dy * dy + dz * dz; - var mag = math\sqrt(dist2); - mag = dt / (mag * dist2); - var bm = bj.mass * mag; - - bivx = bivx - (dx * bm); - bivy = bivy - (dy * bm); - bivz = bivz - (dz * bm); - bm = bimass * mag; - bj.vx = bj.vx + (dx * bm); - bj.vy = bj.vy + (dy * bm); - bj.vz = bj.vz + (dz * bm); + if (i + 1 < nbody) { + foreach (j in (i + 1)..nbody) { + final bj = bodies[j]; + final dx = bix - bj.x; + final dy = biy - bj.y; + final dz = biz - bj.z; + final dist2 = dx * dx + dy * dy + dz * dz; + var mag = math\sqrt(dist2); + mag = dt / (mag * dist2); + var bm = bj.mass * mag; + + bivx = bivx - (dx * bm); + bivy = bivy - (dy * bm); + bivz = bivz - (dz * bm); + bm = bimass * mag; + bj.vx = bj.vx + (dx * bm); + bj.vy = bj.vy + (dy * bm); + bj.vz = bj.vz + (dz * bm); + } } bi.vx = bivx; @@ -63,13 +65,15 @@ fun energy(bodies: [Body], nbody: int) > double { final vz = bi.vz; final bim = bi.mass; e = e + (0.5 * bim * (vx * vx + vy * vy + vz * vz)); - foreach (j in (i + 1)..nbody) { - final bj = bodies[j]; - final dx = bi.x - bj.x; - final dy = bi.y - bj.y; - final dz = bi.z - bj.z; - final distance = math\sqrt(dx * dx + dy * dy + dz * dz); - e = e - ((bim * bj.mass) / distance); + if (i + 1 < nbody) { + foreach (j in (i + 1)..nbody) { + final bj = bodies[j]; + final dx = bi.x - bj.x; + final dy = bi.y - bj.y; + final dz = bi.z - bj.z; + final distance = math\sqrt(dx * dx + dy * dy + dz * dz); + e = e - ((bim * bj.mass) / distance); + } } } diff --git a/tests/bench/spectral.buzz b/tests/bench/spectral.buzz index cbb33a88..5f4d891e 100644 --- a/tests/bench/spectral.buzz +++ b/tests/bench/spectral.buzz @@ -2,8 +2,8 @@ import "std"; import "math"; fun A(i: double, j: double) > double { - final ij = i + j - 1.0; - return 1.0 / (ij * (ij - 1.0) * 0.5 + i); + final ij = i + j; + return 1.0 / (ij * (ij + 1.0) * 0.5 + i + 1.0); } fun Av(x: [double], y: mut [double], N: int) > void {