From 966ff10259ecb806d2f39a578cde48245265f692 Mon Sep 17 00:00:00 2001
From: Benoit Giannangeli <giann008@gmail.com>
Date: Thu, 14 May 2026 15:05:47 +0200
Subject: [PATCH 1/5] feat(jit): Improve JIT strategy

---
 build.zig      |  43 +++++++++---
 src/Ast.zig    |  70 +++++++++++++++++--
 src/Chunk.zig  |  98 +++++++++++++++++++++++++++
 src/Jit.zig    | 180 +++++++++++++++++++++++++++++++++++++++----------
 src/Runner.zig |  13 ++++
 src/obj.zig    |  16 ++---
 src/vm.zig     | 119 ++++----------------------------
 7 files changed, 370 insertions(+), 169 deletions(-)

diff --git a/build.zig b/build.zig
index 567b58d2..499d3e60 100644
--- a/build.zig
+++ b/build.zig
@@ -904,9 +904,9 @@ const BuildOptions = struct {
                     "jit_hotspot_always_on",
                     "JIT compiler will compile any hotspot encountered",
                 ) orelse false,
-                .hotspot_on = !is_wasm and b.option(
+                .hotspot = !is_wasm and b.option(
                     bool,
-                    "jit_hotspot_on",
+                    "jit_hotspot",
                     "JIT compiler will compile hotspot when threshold reached",
                 ) orelse true,
                 .on = !is_wasm and b.option(
@@ -919,11 +919,26 @@ const BuildOptions = struct {
                     "jit_asynchronous",
                     "JIT will work in a dedicated thread",
                 ) orelse false,
-                .prof_threshold = b.option(
-                    f32,
-                    "jit_prof_threshold",
-                    "Threshold to determine if a function is hot. If the numbers of calls to it makes this percentage of all calls, it's considered hot and will be JIT compiled.",
-                ) orelse 0.05,
+                .call_threshold = b.option(
+                    u16,
+                    "jit_call_threshold",
+                    "Call count threshold above which the function is being considered for JIT compilation.",
+                ) orelse 32,
+                .score_threshold = b.option(
+                    u16,
+                    "jit_score_threshold",
+                    "Complexity score threshold above which the function will be JIT compiled.",
+                ) orelse 1024,
+                .hotspot_threshold = b.option(
+                    u16,
+                    "jit_hotspot_threshold",
+                    "Loop count threshold above which aloop is being considered for JIT compilation.",
+                ) orelse 16,
+                .hotspot_score_threshold = b.option(
+                    u16,
+                    "jit_hotspot_score_threshold",
+                    "Complexity score threshold above which a loop node will be JIT compiled.",
+                ) orelse 1024,
             },
         };
     }
@@ -971,9 +986,12 @@ const BuildOptions = struct {
         on: bool,
         always_on: bool,
         hotspot_always_on: bool,
-        hotspot_on: bool,
+        hotspot: bool,
         debug: bool,
-        prof_threshold: f32 = 0.05,
+        call_threshold: u16 = 32,
+        score_threshold: u16 = 1024,
+        hotspot_threshold: u16 = 16,
+        hotspot_score_threshold: u16 = 1024,
         asynchronous: bool,
 
         pub fn step(self: JITOptions, options: *Build.Step.Options) void {
@@ -981,9 +999,12 @@ const BuildOptions = struct {
             options.addOption(@TypeOf(self.always_on), "jit_always_on", self.always_on);
             options.addOption(@TypeOf(self.hotspot_always_on), "jit_hotspot_always_on", self.hotspot_always_on);
             options.addOption(@TypeOf(self.on), "jit", self.on);
-            options.addOption(@TypeOf(self.prof_threshold), "jit_prof_threshold", self.prof_threshold);
-            options.addOption(@TypeOf(self.hotspot_on), "jit_hotspot_on", self.hotspot_on);
+            options.addOption(@TypeOf(self.call_threshold), "jit_call_threshold", self.call_threshold);
+            options.addOption(@TypeOf(self.score_threshold), "jit_score_threshold", self.score_threshold);
+            options.addOption(@TypeOf(self.hotspot), "jit_hotspot", self.hotspot);
             options.addOption(@TypeOf(self.asynchronous), "jit_asynchronous", self.asynchronous);
+            options.addOption(@TypeOf(self.hotspot_threshold), "jit_hotspot_threshold", self.hotspot_threshold);
+            options.addOption(@TypeOf(self.hotspot_score_threshold), "jit_hotspot_score_threshold", self.hotspot_threshold);
         }
     };
 
diff --git a/src/Ast.zig b/src/Ast.zig
index 1a6e60a5..edb498c9 100644
--- a/src/Ast.zig
+++ b/src/Ast.zig
@@ -347,7 +347,12 @@ pub const Slice = struct {
     const IsConstantContext = struct {
         result: ?bool = null,
 
-        pub fn processNode(self: *IsConstantContext, _: std.mem.Allocator, ast: Self.Slice, node: Self.Node.Index) (std.mem.Allocator.Error || std.fmt.BufPrintError)!bool {
+        pub fn processNode(
+            self: *IsConstantContext,
+            _: std.mem.Allocator,
+            ast: Self.Slice,
+            node: Self.Node.Index,
+        ) (std.mem.Allocator.Error || std.fmt.BufPrintError)!bool {
             switch (ast.nodes.items(.tag)[node]) {
                 .AnonymousObjectType,
                 .FiberType,
@@ -496,6 +501,58 @@ pub const Slice = struct {
         return ctx.result orelse false;
     }
 
+    /// Mirrors Chunk.score (even though Chunk.score and Node.score won't be comparable)
+    /// Is use to compute complexity of a hotspot node (which don't have a Chunk available to evaluate)
+    const ComplexityContext = struct {
+        score: usize = 0,
+
+        pub fn processNode(
+            ctx: *ComplexityContext,
+            _: std.mem.Allocator,
+            ast: Self.Slice,
+            node: Self.Node.Index,
+        ) (std.mem.Allocator.Error || std.fmt.BufPrintError)!bool {
+            if (ast.nodes.items(.complexity_score)[node]) |sc| {
+                ctx.score += sc;
+                return true; // Don't go deeper we already computed this node score
+            }
+
+            ctx.score += switch (ast.nodes.items(.tag)[node]) {
+                .AsyncCall,
+                .Resolve,
+                .Resume,
+                => { // Blacklist because of fiber use
+                    ctx.score = 0;
+                    return true;
+                },
+                .Call,
+                .DoUntil,
+                .For,
+                .ForEach,
+                .Throw,
+                .Try,
+                .While,
+                => @as(usize, @intCast(1)),
+                else => @as(usize, @intCast(0)),
+            } + 1; // At least 1 per node
+
+            return false;
+        }
+    };
+
+    pub fn score(self: Self.Slice, allocator: std.mem.Allocator, node: Node.Index) !usize {
+        const complexity_score = &self.nodes.items(.complexity_score)[node];
+        if (complexity_score.* == null) {
+            var ctx = ComplexityContext{};
+
+            try self.walk(allocator, &ctx, node);
+
+            complexity_score.* = ctx.score;
+        }
+
+        return complexity_score.* orelse 0;
+    }
+
     fn binaryValue(self: Self.Slice, node: Node.Index, gc: *GC) !?Value {
         const components = self.nodes.items(.components)[node].Binary;
 
@@ -990,26 +1047,25 @@ pub const Node = struct {
     end_location: TokenIndex,
     /// Docblock if any
     docblock: ?TokenIndex = null,
-
     /// If null, either its a statement or its a reference to something unknown that should ultimately raise a compile error
     type_def: ?*obj.ObjTypeDef = null,
     /// Wether optional jumps must be patch before generate this node bytecode
     patch_opt_jumps: bool = false,
     /// Does this node closes a scope
     ends_scope: ?[]const Close = null,
-
     /// Data related to this node
     components: Components,
-
     /// To avoid generating a node const value multiple times
     value: ?Value = null,
 
+    // JIT related metdata
+
     /// How many time it was visited at runtime (used to decide wether its a hotspot that needs to be compiled)
     count: usize = 0,
-
-    /// Wether its blacklisted
+    /// Complexity score computed once to help evaluate if the node is worth JIT compiling
+    complexity_score: ?usize = null,
+    /// Node status: blacklisted, queued for compilation, compiled, compilable
     jit_status: JitStatus = .compilable,
-
     /// Once compiled
     compiled: ?*anyopaque = null,
 
diff --git a/src/Chunk.zig b/src/Chunk.zig
index 95e555ec..a322db6c 100644
--- a/src/Chunk.zig
+++ b/src/Chunk.zig
@@ -13,6 +13,10 @@ code: std.ArrayList(u32) = .empty,
 locations: std.ArrayList(Ast.TokenIndex) = .empty,
 /// List of constants defined in this chunk
 constants: std.ArrayList(Value) = .empty,
+/// Ranges of bytecode skipped by compiled hotspots
+compiled_hotspot_ranges: std.ArrayList(InstructionRange) = .empty,
+/// Complexity score computed once to help evaluate if the chunk is worth JIT compiling
+complexity_score: ?u32 = null,
 
 pub fn init(allocator: std.mem.Allocator, ast: Ast.Slice) Self {
     return Self{
@@ -25,6 +29,7 @@ pub fn deinit(self: *Self) void {
     self.code.deinit(self.allocator);
     self.constants.deinit(self.allocator);
     self.locations.deinit(self.allocator);
+    self.compiled_hotspot_ranges.deinit(self.allocator);
 }
 
 pub fn write(self: *Self, code: u32, where: Ast.TokenIndex) !void {
@@ -40,6 +45,94 @@ pub fn addConstant(self: *Self, vm: ?*VM, value: Value) !u24 {
     return @intCast(self.constants.items.len - 1);
 }
 
+/// Compute a basic complexity score based on size and presence "costly" opcodes
+pub fn score(self: *Self) u32 {
+    if (self.complexity_score) |sc| return sc;
+
+    var complexity_score: u32 = 0;
+
+    for (self.code.items, 0..) |op, index| {
+        if (self.isInCompiledHotspotRange(index)) {
+            continue;
+        }
+
+        complexity_score += 1;
+
+        switch (VM.getCode(op)) {
+            .OP_HOTSPOT, // Those cover any loop
+            .OP_CALL,
+            .OP_TAIL_CALL,
+            .OP_CALL_INSTANCE_PROPERTY,
+            .OP_TAIL_CALL_INSTANCE_PROPERTY,
+            .OP_INSTANCE_INVOKE,
+            .OP_INSTANCE_TAIL_INVOKE,
+            .OP_PROTOCOL_INVOKE,
+            .OP_PROTOCOL_TAIL_INVOKE,
+            .OP_TRY,
+            .OP_TRY_END,
+            .OP_THROW,
+            => complexity_score += 1,
+            .OP_FIBER_FOREACH,
+            .OP_RESUME,
+            .OP_RESOLVE,
+            => return 0, // A chunk with fiber op codes will not be compiled so the score is 0
+            else => {},
+        }
+    }
+
+    self.complexity_score = complexity_score;
+
+    return complexity_score;
+}
+
+pub fn addCompiledHotspotRange(self: *Self, start: usize, end: usize) !void {
+    if (start >= end) {
+        return;
+    }
+
+    var merged = InstructionRange{
+        .start = start,
+        .end = end,
+    };
+
+    var index: usize = 0;
+    while (index < self.compiled_hotspot_ranges.items.len) {
+        const range = self.compiled_hotspot_ranges.items[index];
+
+        if (merged.end < range.start) {
+            try self.compiled_hotspot_ranges.insert(self.allocator, index, merged);
+            self.complexity_score = null;
+            return;
+        }
+
+        if (merged.start > range.end) {
+            index += 1;
+            continue;
+        }
+
+        merged.start = @min(merged.start, range.start);
+        merged.end = @max(merged.end, range.end);
+        _ = self.compiled_hotspot_ranges.orderedRemove(index);
+    }
+
+    try self.compiled_hotspot_ranges.append(self.allocator, merged);
+    self.complexity_score = null;
+}
+
+fn isInCompiledHotspotRange(self: *const Self, index: usize) bool {
+    for (self.compiled_hotspot_ranges.items) |range| {
+        if (index < range.start) {
+            return false;
+        }
+
+        if (index >= range.start and index < range.end) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 pub const OpCode = enum(u8) {
     OP_CONSTANT,
     OP_NULL,
@@ -185,6 +278,11 @@ const Self = @This();
 
 pub const max_constants = std.math.maxInt(u24);
 
+const InstructionRange = struct {
+    start: usize,
+    end: usize,
+};
+
 const RegistryContext = struct {
     pub fn hash(_: RegistryContext, key: Self) u64 {
         return std.hash.Wyhash.hash(
diff --git a/src/Jit.zig b/src/Jit.zig
index 29714d58..746a7c73 100644
--- a/src/Jit.zig
+++ b/src/Jit.zig
@@ -141,6 +141,8 @@ objclosures_queue: std.AutoHashMapUnmanaged(Ast.Node.Index, *o.ObjClosure) = .em
 required_ext_api: std.AutoHashMapUnmanaged(ExternApi, void) = .empty,
 /// Modules to load when linking/generating
 modules: std.ArrayList(m.MIR_module_t) = .empty,
+/// Amount of time passed in JIT
+duration: std.Io.Duration = .fromMilliseconds(0),
 
 pub fn init(process: Init, gc: *GC) Error!Self {
     return .{
@@ -175,6 +177,97 @@ pub fn deinit(self: *Self) void {
     m.MIR_finish(self.ctx);
 }
 
+pub fn compileFunctionIfNeeded(self: *Self, closure: *o.ObjClosure) StartError!bool {
+    self.call_count += 1;
+
+    switch (closure.function.type_def.resolved_type.?.Function.function_type) {
+        .Extern,
+        .Script,
+        .ScriptEntryPoint,
+        .EntryPoint,
+        .Repl,
+        => return false,
+        else => {},
+    }
+
+    const function_ast = closure.function.chunk.ast;
+
+    if (function_ast.nodes.items(.jit_status)[closure.function.node] != .compilable or
+        function_ast.nodes.items(.compiled)[closure.function.node] != null)
+    {
+        return false;
+    }
+
+    if (BuildOptions.jit_always_on or closure.function.call_count > BuildOptions.jit_call_threshold) {
+        const score = closure.function.call_count * closure.function.chunk.score();
+        if (score == 0) {
+            function_ast.nodes.items(.jit_status)[closure.function.node] = .blacklisted;
+
+            if (BuildOptions.jit_debug) {
+                log.info(
+                    "Blacklisted function `{s}` for JIT compilation",
+                    .{
+                        closure.function.type_def.resolved_type.?.Function.name.string,
+                    },
+                );
+            }
+
+            return false;
+        }
+
+        if (BuildOptions.jit_always_on or score > BuildOptions.jit_score_threshold) {
+            self.compile(function_ast, closure, null) catch |err| {
+                if (err == Error.CantCompile) {
+                    return false;
+                } else {
+                    return err;
+                }
+            };
+
+            return true;
+        }
+    }
+
+    return false;
+}
+
+pub fn compileHotspotIfNeeded(self: *Self, ast: Ast.Slice, frame_closure: *o.ObjClosure, node: Ast.Node.Index) StartError!void {
+    if (ast.nodes.items(.jit_status)[node] != .compilable or
+        ast.nodes.items(.compiled)[node] != null)
+    {
+        return;
+    }
+
+    if (BuildOptions.jit_hotspot_always_on or ast.nodes.items(.count)[node] > BuildOptions.jit_hotspot_threshold) {
+        const score = ast.nodes.items(.count)[node] * try ast.score(self.gc.allocator, node);
+        if (score == 0) {
+            ast.nodes.items(.jit_status)[node] = .blacklisted;
+
+            if (BuildOptions.jit_debug) {
+                log.info(
+                    "Blacklisted hotspot {} ({s}) for JIT compilation",
+                    .{
+                        node,
+                        @tagName(ast.nodes.items(.tag)[node]),
+                    },
+                );
+            }
+
+            return;
+        }
+
+        if (BuildOptions.jit_hotspot_always_on or score > BuildOptions.jit_hotspot_score_threshold) {
+            self.compile(ast, frame_closure, node) catch |err| {
+                if (err == Error.CantCompile) {
+                    return;
+                } else {
+                    return err;
+                }
+            };
+        }
+    }
+}
+
 pub fn compile(self: *Self, ast: Ast.Slice, closure: *o.ObjClosure, hotspot_node: ?Ast.Node.Index) StartError!void {
     const ast_node = hotspot_node orelse closure.function.node;
 
@@ -258,13 +351,24 @@ pub fn start(self: *Self) StartError!void {
 fn work(self: *Self) Error!void {
     while (self.jobs.front()) |job| {
         if (BuildOptions.jit_debug) {
-            log.debug(
-                "Worker starting job for node {} and closure {*}",
-                .{
-                    job.node,
-                    job.closure,
-                },
-            );
+            if (job.node == job.closure.function.node)
+                log.info(
+                    "Worker starting for compiling function `{s}` with score {}",
+                    .{
+                        job.closure.function.type_def.resolved_type.?.Function.name.string,
+                        job.closure.function.call_count * job.closure.function.chunk.complexity_score.?,
+                    },
+                )
+            else
+                log.info(
+                    "Worker starting for hostpot node {} ({s}) witch score {} in function `{s}`",
+                    .{
+                        job.node,
+                        @tagName(job.ast.nodes.items(.tag)[job.node]),
+                        job.ast.nodes.items(.count)[job.node] * job.ast.nodes.items(.complexity_score)[job.node].?,
+                        job.closure.function.type_def.resolved_type.?.Function.name.string,
+                    },
+                );
         }
 
         try self.doJob(job);
@@ -281,6 +385,32 @@ fn work(self: *Self) Error!void {
 }
 
 fn doJob(self: *Self, job: *const Job) Error!void {
+    var start_timestamp = std.Io.Clock.Timestamp.now(self.process.io, .awake);
+    defer if (BuildOptions.jit_debug or BuildOptions.show_perf) {
+        const time = start_timestamp.untilNow(self.process.io).raw.toMilliseconds();
+
+        if (job.node == job.closure.function.node)
+            log.info(
+                "Finished job function `{s}` with score {} in {}ms",
+                .{
+                    job.closure.function.type_def.resolved_type.?.Function.name.string,
+                    job.closure.function.call_count * job.closure.function.chunk.complexity_score.?,
+                    time,
+                },
+            )
+        else
+            log.info(
+                "Finished job for hostpot node {} ({s}) witch score {} in function `{s}` in {}ms",
+                .{
+                    job.node,
+                    @tagName(job.ast.nodes.items(.tag)[job.node]),
+                    job.ast.nodes.items(.count)[job.node] * job.ast.nodes.items(.complexity_score)[job.node].?,
+                    job.closure.function.type_def.resolved_type.?.Function.name.string,
+                    time,
+                },
+            );
+    };
+
     // Remember we need to set this function's fields. Hotspot jobs are tied to
     // a closure for context, but their native code belongs to the AST node, not
     // to the enclosing function.
@@ -481,35 +611,6 @@ fn buildFunction(self: *Self, ast: Ast.Slice, closure: ?*o.ObjClosure, ast_node:
     try self.modules.append(self.gc.allocator, module);
 
     self.state.?.module = module;
-    if (BuildOptions.jit_debug) {
-        if (closure) |uclosure| {
-            log.debug(
-                "Compiling function `{s}` because it was called {}/{} times\n",
-                .{
-                    qualified_name,
-                    uclosure.function.call_count,
-                    self.call_count,
-                },
-            );
-        } else {
-            if (tag.isHotspot()) {
-                log.debug(
-                    "Compiling hotspot for node {s} {}\n",
-                    .{
-                        @tagName(self.state.?.ast.nodes.items(.tag)[ast_node]),
-                        ast_node,
-                    },
-                );
-            } else {
-                log.debug(
-                    "Compiling closure `{s}`\n",
-                    .{
-                        qualified_name,
-                    },
-                );
-            }
-        }
-    }
 
     _ = (if (tag.isHotspot())
         self.generateHotspotFunction(ast_node)
@@ -7293,6 +7394,11 @@ fn REG(self: *Self, name: [*:0]const u8, reg_type: m.MIR_type_t) !m.MIR_reg_t {
 }
 
 fn outputModule(self: *Self, name: []const u8, module: m.MIR_module_t) void {
+    std.Io.Dir.cwd().access(self.process.io, "./dist/gen", .{ .read = true }) catch {
+        std.Io.Dir.cwd().createDirPath(self.process.io, "./dist/gen") catch
+            @panic("Could not create debug path to output MIR modules");
+    };
+
     // Output MIR code to .mir file
     var debug_path = std.Io.Writer.Allocating.init(self.gc.allocator);
     defer debug_path.deinit();
@@ -7302,7 +7408,7 @@ fn outputModule(self: *Self, name: []const u8, module: m.MIR_module_t) void {
         .{
             name,
         },
-    ) catch unreachable;
+    ) catch @panic("Out of memory");
 
     const debug_file = std.c.fopen(
         @ptrCast(debug_path.written().ptr),
diff --git a/src/Runner.zig b/src/Runner.zig
index d2cae5b7..6c36e5ac 100644
--- a/src/Runner.zig
+++ b/src/Runner.zig
@@ -24,6 +24,8 @@ const disassembler = @import("disassembler.zig");
 
 const Runner = @This();
 
+const log = std.log.scoped(.runner);
+
 process: Init,
 vm: VM,
 gc: GC,
@@ -119,6 +121,17 @@ pub fn runFile(
 
     _ = try file.readPositionalAll(runner.process.io, source, 0);
 
+    var start_timestamp = std.Io.Clock.Timestamp.now(runner.process.io, .awake);
+    defer if (BuildOptions.show_perf) {
+        log.info(
+            "Ran file {s} in {}ms",
+            .{
+                file_name,
+                start_timestamp.untilNow(runner.process.io).raw.toMilliseconds(),
+            },
+        );
+    };
+
     if (try runner.parser.parse(source, null, file_name)) |ast| {
         if (runner.vm.flavor != .Fmt) {
             const ast_slice = ast.slice();
diff --git a/src/obj.zig b/src/obj.zig
index bd34a364..6ed909b3 100644
--- a/src/obj.zig
+++ b/src/obj.zig
@@ -1316,21 +1316,21 @@ pub const ObjFunction = struct {
     chunk: Chunk,
     upvalue_count: u8 = 0,
 
-    // So we can JIT the function at runtime
+    // Jit related stuff
+
+    /// So we can JIT the function at runtime
     node: Ast.Node.Index,
-    // How many time the function was called
+    /// How many time the function was called
     call_count: u32 = 0,
-
-    // JIT compiled function
+    /// JIT compiled function
     native_raw: ?*anyopaque = null,
-
-    // JIT compiled function callable by buzz VM
+    /// JIT compiled function callable by buzz VM
     native: ?*anyopaque = null,
 
     pub fn init(allocator: Allocator, ast: Ast.Slice, node: Ast.Node.Index) !Self {
-        return Self{
+        return .{
             .node = node,
-            .chunk = Chunk.init(allocator, ast),
+            .chunk = .init(allocator, ast),
         };
     }
 
diff --git a/src/vm.zig b/src/vm.zig
index 9a2294c2..a56d6ac7 100644
--- a/src/vm.zig
+++ b/src/vm.zig
@@ -4545,12 +4545,10 @@ pub const VM = struct {
 
         function_ast.nodes.items(.count)[node] += 1;
 
-        if (self.shouldCompileHotspot(function_ast, node)) {
-            self.jit.?.compile(
-                function_ast,
-                frame.closure,
-                node,
-            ) catch {};
+        if (BuildOptions.jit and BuildOptions.jit_hotspot and self.jit != null) {
+            self.jit.?.compileHotspotIfNeeded(function_ast, frame.closure, node) catch {
+                // FIXME: what to do?
+            };
         }
 
         if (function_ast.nodes.items(.compiled)[node]) |native| {
@@ -4572,17 +4570,6 @@ pub const VM = struct {
             };
             obj_native.mark(self.gc);
 
-            if (BuildOptions.jit_debug) {
-                print(
-                    self.process.io,
-                    "Compiled hotspot {s} in function `{s}`\n",
-                    .{
-                        @tagName(function_ast.nodes.items(.tag)[node]),
-                        frame.closure.function.type_def.resolved_type.?.Function.name.string,
-                    },
-                );
-            }
-
             // The now compile hotspot must be a new constant for the current function
             frame.closure.function.chunk.constants.append(
                 frame.closure.function.chunk.allocator,
@@ -5013,54 +5000,15 @@ pub const VM = struct {
     }
 
     fn compileAndCall(self: *Self, closure: *obj.ObjClosure, arg_count: u8, catch_value: ?Value) Error!bool {
-        var native = closure.function.native;
-        if (self.jit) |*jit| {
-            jit.call_count += 1;
-            // Do we need to jit the function?
-            // TODO: figure out threshold strategy
-            if (self.shouldCompileFunction(closure)) {
-                var success = true;
-                jit.compile(closure.function.chunk.ast, closure, null) catch |err| {
-                    if (err == Error.CantCompile) {
-                        success = false;
-                    } else {
-                        return err;
-                    }
-                };
-
-                if (BuildOptions.jit_debug and success) {
-                    print(
-                        self.process.io,
-                        "Compiled function `{s}`\n",
-                        .{
-                            closure.function.type_def.resolved_type.?.Function.name.string,
-                        },
-                    );
-                }
-
-                if (success) {
-                    native = closure.function.native;
-                }
-            }
-        }
-
-        // Is there a compiled version of it?
-        if (native != null) {
-            // if (BuildOptions.jit_debug) {
-            //     print(
-            //         self.process.io,
-            //         "Calling compiled version of function `{s}.{}.n{}`\n",
-            //         .{
-            //             closure.function.type_def.resolved_type.?.Function.name.string,
-            //             self.current_ast.nodes.items(.components)[closure.function.node].Function.id,
-            //             closure.function.node,
-            //         },
-            //     );
-            // }
-
+        if (closure.function.native orelse
+            if (BuildOptions.jit and self.jit != null and try self.jit.?.compileFunctionIfNeeded(closure))
+                closure.function.native
+            else
+                closure.function.native) |native|
+        {
             try self.callCompiled(
                 closure,
-                @ptrCast(@alignCast(native.?)),
+                @ptrCast(@alignCast(native)),
                 arg_count,
                 catch_value,
             );
@@ -5072,7 +5020,7 @@ pub const VM = struct {
     }
 
     fn call(self: *Self, closure: *obj.ObjClosure, arg_count: u8, catch_value: ?Value) Error!void {
-        closure.function.call_count += 1;
+        if (closure.function.native == null) closure.function.call_count += 1;
 
         if (BuildOptions.recursive_call_limit) |recursive_call_limit| {
             // If recursive call, update counter
@@ -5547,48 +5495,6 @@ pub const VM = struct {
         return created_upvalue;
     }
 
-    fn shouldCompileFunction(self: *Self, closure: *obj.ObjClosure) bool {
-        const function_type = closure.function.type_def.resolved_type.?.Function.function_type;
-        const function_ast = closure.function.chunk.ast;
-
-        switch (function_type) {
-            .Extern,
-            .Script,
-            .ScriptEntryPoint,
-            .EntryPoint,
-            .Repl,
-            => return false,
-            else => {},
-        }
-
-        return function_ast.nodes.items(.jit_status)[closure.function.node] == .compilable and
-            function_ast.nodes.items(.compiled)[closure.function.node] == null and
-            self.jit != null and
-            (
-                // Always on
-                BuildOptions.jit_always_on or
-                    // Threshold reached
-                    (closure.function.call_count > 10 and
-                        (@as(f64, @floatFromInt(closure.function.call_count)) / @as(f64, @floatFromInt(self.jit.?.call_count))) > BuildOptions.jit_prof_threshold));
-    }
-
-    fn shouldCompileHotspot(self: *Self, ast: Ast.Slice, node: Ast.Node.Index) bool {
-        const count = ast.nodes.items(.count)[node];
-
-        return BuildOptions.jit_hotspot_on and
-            // Marked as compilable
-            ast.nodes.items(.jit_status)[node] == .compilable and
-            ast.nodes.items(.compiled)[node] == null and
-            self.jit != null and
-            // JIT compile all the thing?
-            (
-                // Always compile
-                BuildOptions.jit_always_on or BuildOptions.jit_hotspot_always_on or
-                    // Threshold reached
-                    (count > 10 and
-                        (@as(f64, @floatFromInt(count)) / @as(f64, @floatFromInt(self.hotspots_count))) > BuildOptions.jit_prof_threshold));
-    }
-
     fn patchHotspot(
         self: *Self,
         location: Ast.TokenIndex,
@@ -5623,6 +5529,7 @@ pub const VM = struct {
         );
 
         const hotspot_call_start = to - hotspot_call.len;
+        try chunk.addCompiledHotspotRange(frame.ip - 1, hotspot_call_start);
 
         // In the event that we are in a nested loop, we put a jump instruction in place of OP_HOTSPOT
         chunk.code.items[frame.ip - 2] = (@as(u32, @intCast(@intFromEnum(Chunk.OpCode.OP_JUMP))) << 24) | @as(

From abc2cf774c8b41f428092b96389f7516fe65ea17 Mon Sep 17 00:00:00 2001
From: Benoit Giannangeli <giann008@gmail.com>
Date: Fri, 15 May 2026 09:32:40 +0200
Subject: [PATCH 2/5] fix(jit): In async JIT, batch collateral function
 compiling with main job

Worker thread does not set compiled function pointers into closure to avoid VM running half compiled batch of functions
---
 AGENTS.md        |   3 +-
 build.zig        |   2 +-
 src/Jit.zig      | 172 ++++++++++++++++++++++++++++++++++-------------
 src/Runner.zig   |   8 +--
 src/behavior.zig |   1 +
 src/main.zig     |   1 +
 6 files changed, 135 insertions(+), 52 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 62fbbf9a..b3e2bab9 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -32,7 +32,8 @@ FFI, Debugger, and LSP are immature. Treat changes there as higher risk.
 - Before editing, check the working tree.
 - If the working tree is clean, make the requested changes normally.
 - If the working tree only has untracked files, edits are allowed. Do not overwrite or delete unrelated untracked files.
-- If the working tree has tracked modifications or staged changes, continue investigation read-only, then show the diff you would have applied and say that no files were modified because the working copy was not clean.
+- If the working tree has tracked modifications or staged changes that were not initiated by the current agent/session, continue investigation read-only, then show the diff you would have applied and say that no files were modified because the working copy was not clean.
+- If tracked modifications were initiated by the current agent/session for the active task, the agent may continue editing those files and related files needed to finish the same task.
 - Do not overwrite or revert user changes.
 
 Agents may delete files they generated during their own work. Do not delete files that were already tracked by git.
diff --git a/build.zig b/build.zig
index 499d3e60..be68cff1 100644
--- a/build.zig
+++ b/build.zig
@@ -1004,7 +1004,7 @@ const BuildOptions = struct {
             options.addOption(@TypeOf(self.hotspot), "jit_hotspot", self.hotspot);
             options.addOption(@TypeOf(self.asynchronous), "jit_asynchronous", self.asynchronous);
             options.addOption(@TypeOf(self.hotspot_threshold), "jit_hotspot_threshold", self.hotspot_threshold);
-            options.addOption(@TypeOf(self.hotspot_score_threshold), "jit_hotspot_score_threshold", self.hotspot_threshold);
+            options.addOption(@TypeOf(self.hotspot_score_threshold), "jit_hotspot_score_threshold", self.hotspot_score_threshold);
         }
     };
 
diff --git a/src/Jit.zig b/src/Jit.zig
index 746a7c73..8ba55fa8 100644
--- a/src/Jit.zig
+++ b/src/Jit.zig
@@ -122,6 +122,8 @@ ctx: m.MIR_context_t,
 call_count: usize = 0,
 /// Queue of things to compile
 jobs: SpscQueue(Job),
+/// Completed jobs ready to be published by the VM thread
+completed_jobs: SpscQueue(CompletedJob),
 /// Worker thread
 worker: ?std.Thread = null,
 /// To stop the worker,
@@ -150,6 +152,7 @@ pub fn init(process: Init, gc: *GC) Error!Self {
         .ctx = m.MIR_init(),
         .process = process,
         .jobs = try .initCapacity(gc.allocator, 256),
+        .completed_jobs = try .initCapacity(gc.allocator, 256),
     };
 }
 
@@ -166,10 +169,17 @@ fn reset(self: *Self) void {
 }
 
 pub fn deinit(self: *Self) void {
-    if (self.worker != null) {
-        self.worker.?.detach();
+    if (self.worker) |worker| {
+        while (!self.worker_stopped.load(.acquire)) {
+            self.publishCompleted();
+            std.atomic.spinLoopHint();
+        }
+        worker.join();
     }
+    self.publishCompleted();
+
     self.jobs.deinit(self.gc.allocator);
+    self.completed_jobs.deinit(self.gc.allocator);
     self.functions_queue.deinit(self.gc.allocator);
     self.objclosures_queue.deinit(self.gc.allocator);
     self.required_ext_api.deinit(self.gc.allocator);
@@ -178,6 +188,8 @@ pub fn deinit(self: *Self) void {
 }
 
 pub fn compileFunctionIfNeeded(self: *Self, closure: *o.ObjClosure) StartError!bool {
+    self.publishCompleted();
+
     self.call_count += 1;
 
     switch (closure.function.type_def.resolved_type.?.Function.function_type) {
@@ -232,6 +244,8 @@ pub fn compileFunctionIfNeeded(self: *Self, closure: *o.ObjClosure) StartError!b
 }
 
 pub fn compileHotspotIfNeeded(self: *Self, ast: Ast.Slice, frame_closure: *o.ObjClosure, node: Ast.Node.Index) StartError!void {
+    self.publishCompleted();
+
     if (ast.nodes.items(.jit_status)[node] != .compilable or
         ast.nodes.items(.compiled)[node] != null)
     {
@@ -323,7 +337,10 @@ pub fn compile(self: *Self, ast: Ast.Slice, closure: *o.ObjClosure, hotspot_node
         // If the worker is not working, start it again
         try self.start();
     } else {
-        try self.doJob(&job);
+        var completed_job = try self.doJob(&job);
+        defer completed_job.deinit(self.gc.allocator);
+
+        self.publishCompletedJob(&completed_job);
         self.reset();
     }
 }
@@ -349,7 +366,18 @@ pub fn start(self: *Self) StartError!void {
 }
 
 fn work(self: *Self) Error!void {
+    defer self.worker_stopped.store(true, .release);
+
     while (self.jobs.front()) |job| {
+        switch (job.ast.nodes.items(.jit_status)[job.node]) {
+            .blacklisted, .compiled => {
+                self.jobs.pop();
+                self.reset();
+                continue;
+            },
+            .queued, .compilable => {},
+        }
+
         if (BuildOptions.jit_debug) {
             if (job.node == job.closure.function.node)
                 log.info(
@@ -371,8 +399,17 @@ fn work(self: *Self) Error!void {
                 );
         }
 
-        try self.doJob(job);
+        const completed_job = self.doJob(job) catch |err| {
+            if (err != Error.CantCompile) {
+                return err;
+            }
+
+            self.jobs.pop();
+            self.reset();
+            continue;
+        };
 
+        self.completed_jobs.push(completed_job);
         self.jobs.pop();
         self.reset();
     }
@@ -380,11 +417,69 @@ fn work(self: *Self) Error!void {
     if (BuildOptions.jit_debug) {
         log.debug("Worker done", .{});
     }
+}
+
+const GeneratedFunction = struct {
+    node: Ast.Node.Index,
+    closure: ?*o.ObjClosure,
+    native: ?*anyopaque,
+    native_raw: ?*anyopaque,
+};
+
+const CompletedJob = struct {
+    root_node: Ast.Node.Index,
+    ast: Ast.Slice,
+    functions: []GeneratedFunction,
+
+    fn deinit(self: *CompletedJob, allocator: std.mem.Allocator) void {
+        allocator.free(self.functions);
+    }
+};
+
+fn publishCompleted(self: *Self) void {
+    while (self.completed_jobs.front()) |completed_job| {
+        self.publishCompletedJob(completed_job);
+        completed_job.deinit(self.gc.allocator);
+        self.completed_jobs.pop();
+    }
+}
+
+fn publishCompletedJob(_: *Self, completed_job: *CompletedJob) void {
+    for (completed_job.functions) |generated| {
+        if (generated.closure) |closure| {
+            closure.function.native_raw = generated.native_raw;
+            closure.function.native = generated.native;
+        } else if (generated.node == completed_job.root_node) {
+            completed_job.ast.nodes.items(.compiled)[generated.node] = generated.native_raw;
+        }
+
+        completed_job.ast.nodes.items(.jit_status)[generated.node] = .compiled;
+    }
+}
+
+fn queueCollateralFunction(self: *Self, node: Ast.Node.Index, closure: ?*o.ObjClosure) Error!void {
+    if (self.state.?.ast.nodes.items(.compiled)[node] != null) {
+        return;
+    }
+
+    switch (self.state.?.ast.nodes.items(.jit_status)[node]) {
+        .blacklisted => return error.CantCompile,
+        .compiled => return,
+        .compilable, .queued => {},
+    }
+
+    if (closure) |uclosure| {
+        try self.objclosures_queue.put(self.gc.allocator, node, uclosure);
+    }
+
+    if (!self.functions_queue.contains(node)) {
+        try self.functions_queue.put(self.gc.allocator, node, null);
+    }
 
-    self.worker_stopped.store(true, .release);
+    self.state.?.ast.nodes.items(.jit_status)[node] = .queued;
 }
 
-fn doJob(self: *Self, job: *const Job) Error!void {
+fn doJob(self: *Self, job: *const Job) Error!CompletedJob {
     var start_timestamp = std.Io.Clock.Timestamp.now(self.process.io, .awake);
     defer if (BuildOptions.jit_debug or BuildOptions.show_perf) {
         const time = start_timestamp.untilNow(self.process.io).raw.toMilliseconds();
@@ -463,24 +558,24 @@ fn doJob(self: *Self, job: *const Job) Error!void {
     m.MIR_gen_init(self.ctx);
     defer m.MIR_gen_finish(self.ctx);
 
-    // Generate all needed functions and set them in corresponding ObjFunctions
+    var generated_functions = std.ArrayList(GeneratedFunction).empty;
+    errdefer generated_functions.deinit(self.gc.allocator);
+
+    // Generate all needed functions before publishing them on the VM thread.
     var it2 = self.functions_queue.iterator();
     while (it2.next()) |kv| {
         const node = kv.key_ptr.*;
         const items = kv.value_ptr.*.?;
 
-        const native = if (items.native) |item| m.MIR_gen(self.ctx, item) else null;
-        const native_raw = if (items.native_raw) |item| m.MIR_gen(self.ctx, item) else null;
-
-        // Find out if we need to set it in closure or hostpot node
-        if (self.objclosures_queue.get(node)) |closure| {
-            closure.function.native = native;
-            closure.function.native_raw = native_raw;
-        } else if (node == job.node) {
-            job.ast.nodes.items(.compiled)[node] = native_raw;
-        }
-
-        job.ast.nodes.items(.jit_status)[node] = .compiled;
+        try generated_functions.append(
+            self.gc.allocator,
+            .{
+                .node = node,
+                .closure = self.objclosures_queue.get(node),
+                .native = if (items.native) |item| m.MIR_gen(self.ctx, item) else null,
+                .native_raw = if (items.native_raw) |item| m.MIR_gen(self.ctx, item) else null,
+            },
+        );
     }
 
     if (BuildOptions.jit_debug) {
@@ -492,13 +587,19 @@ fn doJob(self: *Self, job: *const Job) Error!void {
             },
         );
     }
+
+    return .{
+        .root_node = job.node,
+        .ast = job.ast,
+        .functions = try generated_functions.toOwnedSlice(self.gc.allocator),
+    };
 }
 
 fn getString(self: *Self, string: []const u8) Error!*o.ObjString {
-    return if (BuildOptions.jit_always_on)
+    return self.gc.strings.get(string) orelse if (BuildOptions.jit_always_on)
         try self.gc.copyString(string) // In this case, we did not run bytecode even once so strings are likely not interned
     else
-        self.gc.strings.get(string).?;
+        error.CantCompile;
 }
 
 fn loadRequiredExternalApi(self: *Self) Error!void {
@@ -624,10 +725,8 @@ fn buildFunction(self: *Self, ast: Ast.Slice, closure: ?*o.ObjClosure, ast_node:
             m.MIR_finish_func(self.ctx);
 
             _ = self.functions_queue.remove(ast_node);
-            if (closure) |uclosure| {
-                _ = self.objclosures_queue.remove(uclosure.function.node);
-                ast.nodes.items(.jit_status)[uclosure.function.node] = .blacklisted;
-            }
+            _ = self.objclosures_queue.remove(ast_node);
+            ast.nodes.items(.jit_status)[ast_node] = .blacklisted;
         }
 
         return err;
@@ -2013,20 +2112,7 @@ fn generateNamedVariable(self: *Self, node: Ast.Node.Index) Error!?m.MIR_op_t {
                 // Get the actual Value as it is right now (which is correct since a function doesn't change)
                 const closure = o.ObjClosure.cast(self.state.?.closure.globals.items[components.slot].obj()).?;
 
-                // Does it need to be compiled?
-                switch (self.state.?.ast.nodes.items(.jit_status)[closure.function.node]) {
-                    .compilable => if (self.state.?.ast.nodes.items(.compiled)[closure.function.node] == null) {
-                        // Remember we need to set native fields of this ObjFunction later
-                        try self.objclosures_queue.put(self.gc.allocator, closure.function.node, closure);
-
-                        // Remember that we need to compile this function later
-                        try self.functions_queue.put(self.gc.allocator, closure.function.node, null);
-
-                        self.state.?.ast.nodes.items(.jit_status)[closure.function.node] = .queued;
-                    },
-                    .blacklisted => return error.CantCompile,
-                    .queued, .compiled => {},
-                }
+                try self.queueCollateralFunction(closure.function.node, closure);
 
                 return m.MIR_new_uint_op(self.ctx, closure.toValue().val);
             } else {
@@ -5092,13 +5178,7 @@ fn generateFunction(self: *Self, node: Ast.Node.Index) Error!?m.MIR_op_t {
         const nativefn_qualified_name = try self.getQualifiedName(node, false);
         defer self.gc.allocator.free(nativefn_qualified_name);
 
-        // Remember that we need to compile this function later
-        if (self.state.?.ast.nodes.items(.compiled)[node] == null and
-            self.state.?.ast.nodes.items(.jit_status)[node] == .compilable)
-        {
-            try self.functions_queue.put(self.gc.allocator, node, null);
-            self.state.?.ast.nodes.items(.jit_status)[node] = .queued;
-        }
+        try self.queueCollateralFunction(node, null);
 
         // For now declare it
         const native_raw = m.MIR_new_import(self.ctx, @ptrCast(qualified_name));
diff --git a/src/Runner.zig b/src/Runner.zig
index 6c36e5ac..700a35ce 100644
--- a/src/Runner.zig
+++ b/src/Runner.zig
@@ -37,6 +37,10 @@ imports: std.StringHashMapUnmanaged(Parser.ScriptImport) = .empty,
 dlib_symbols: std.StringHashMapUnmanaged(Parser.Dlib) = .empty,
 
 pub fn deinit(self: *Runner) void {
+    if (!is_wasm and self.vm.jit != null) {
+        self.vm.jit.?.deinit();
+        self.vm.jit = null;
+    }
     self.codegen.deinit();
     self.parser.deinit();
     var it = self.dlib_symbols.valueIterator();
@@ -51,10 +55,6 @@ pub fn deinit(self: *Runner) void {
     }
     self.imports.deinit(self.gc.allocator);
     // TODO: free type_registry and its keys which are on the heap
-    if (!is_wasm and self.vm.jit != null) {
-        self.vm.jit.?.deinit();
-        self.vm.jit = null;
-    }
     self.vm.deinit();
 }
 
diff --git a/src/behavior.zig b/src/behavior.zig
index c0959343..e656bd79 100644
--- a/src/behavior.zig
+++ b/src/behavior.zig
@@ -78,6 +78,7 @@ fn testBehaviors(process: std.process.Init, allocator: std.mem.Allocator, fail_f
                 var had_error: bool = false;
                 var runner: Runner = undefined;
                 try runner.init(process, allocator, .Test, null);
+                defer runner.deinit();
 
                 var failed = false;
                 _ = runner.runFile(
diff --git a/src/main.zig b/src/main.zig
index a1e63156..45b1aa32 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -144,6 +144,7 @@ pub fn main(provided_init: Init) u8 {
         ) catch {
             return 1;
         };
+        defer runner.deinit();
 
         return runner.runFile(
             res.positionals[0][0],

From 8834389e680910eacb02b1c4cd0f04d256c3b7ab Mon Sep 17 00:00:00 2001
From: Benoit Giannangeli <giann008@gmail.com>
Date: Fri, 15 May 2026 12:26:26 +0200
Subject: [PATCH 3/5] fix(jit): JIT tuning

---
 build.zig | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/build.zig b/build.zig
index be68cff1..3cab294d 100644
--- a/build.zig
+++ b/build.zig
@@ -918,27 +918,27 @@ const BuildOptions = struct {
                     bool,
                     "jit_asynchronous",
                     "JIT will work in a dedicated thread",
-                ) orelse false,
+                ) orelse true,
                 .call_threshold = b.option(
                     u16,
                     "jit_call_threshold",
                     "Call count threshold above which the function is being considered for JIT compilation.",
-                ) orelse 32,
+                ) orelse 1024,
                 .score_threshold = b.option(
                     u16,
                     "jit_score_threshold",
                     "Complexity score threshold above which the function will be JIT compiled.",
-                ) orelse 1024,
+                ) orelse 65535,
                 .hotspot_threshold = b.option(
                     u16,
                     "jit_hotspot_threshold",
-                    "Loop count threshold above which aloop is being considered for JIT compilation.",
-                ) orelse 16,
+                    "Loop count threshold above which a loop is being considered for JIT compilation.",
+                ) orelse 256,
                 .hotspot_score_threshold = b.option(
                     u16,
                     "jit_hotspot_score_threshold",
                     "Complexity score threshold above which a loop node will be JIT compiled.",
-                ) orelse 1024,
+                ) orelse 65535,
             },
         };
     }
@@ -988,10 +988,10 @@ const BuildOptions = struct {
         hotspot_always_on: bool,
         hotspot: bool,
         debug: bool,
-        call_threshold: u16 = 32,
-        score_threshold: u16 = 1024,
-        hotspot_threshold: u16 = 16,
-        hotspot_score_threshold: u16 = 1024,
+        call_threshold: u16 = 1024,
+        score_threshold: u16 = 65535,
+        hotspot_threshold: u16 = 256,
+        hotspot_score_threshold: u16 = 65535,
         asynchronous: bool,
 
         pub fn step(self: JITOptions, options: *Build.Step.Options) void {

From 2e60c6a5159ee082dc032c7932f5ee8d871bc6ab Mon Sep 17 00:00:00 2001
From: Benoit Giannangeli <giann008@gmail.com>
Date: Fri, 15 May 2026 18:08:53 +0200
Subject: [PATCH 4/5] feat(jit): Make JIT calls tolerate dynamic interpreted
 callees

Lazy JIT compilation in bz_context now either synchronously compile the collateral
function or falls back to calling interpreted function with JIT compiler is already
busy or if the function can't be compiled (this was not handled before).
---
 .github/workflows/ci.yaml            |   4 +-
 AGENTS.md                            |  14 +
 scripts/perf_compare_commits.py      | 702 ---------------------------
 src/Ast.zig                          |   6 +-
 src/Jit.zig                          | 163 +++++--
 src/buzz_api.zig                     |  32 +-
 src/lib/buzz_api.zig                 |   2 +
 src/obj.zig                          |   2 +
 src/vm.zig                           |  18 +
 tests/behavior/jit-dynamic-call.buzz |  60 +++
 tests/bench/fasta.buzz               |   2 +-
 tests/bench/nbody.buzz               |  52 +-
 tests/bench/spectral.buzz            |   4 +-
 13 files changed, 286 insertions(+), 775 deletions(-)
 delete mode 100755 scripts/perf_compare_commits.py
 create mode 100644 tests/behavior/jit-dynamic-call.buzz

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 71e7e082..0a19f532 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -21,8 +21,8 @@ jobs:
             - "ReleaseSmall"
           options:
             - ""
-            - "-Djit_always_on"
-            - "-Djit_hotspot_always_on"
+            - "-Djit_always_on -Djit_asynchronous=false"
+            - "-Djit_hotspot_always_on -Djit_asynchronous=false"
     
     steps:
       - name: Checkout project
diff --git a/AGENTS.md b/AGENTS.md
index b3e2bab9..8fad3d26 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -147,9 +147,23 @@ Useful build flags include:
 - `-Dgc_debug=true`, `-Dgc_debug_light=true`, and `-Dgc_debug_access=true` for GC debugging.
 - `-Djit_debug=true` for JIT debugging.
 - `-Djit=false` to disable JIT while isolating runtime issues.
+- `-Djit_asynchronous=<bool>` controls whether JIT jobs run on the worker thread. Keep it enabled by default unless isolating an async publication issue.
+- `-Djit_call_threshold=<int>` is the function call count before a function is considered for JIT compilation.
+- `-Djit_score_threshold=<int>` is the function score gate. Function score is call count multiplied by chunk complexity.
+- `-Djit_hotspot_threshold=<int>` is the loop/hotspot execution count before a hotspot is considered for JIT compilation.
+- `-Djit_hotspot_score_threshold=<int>` is the hotspot score gate. Hotspot score is execution count multiplied by AST hotspot complexity.
 - `-Dcycle_limit=<int>` to limit bytecode execution, noting that it disables JIT compilation.
 - `-Dmemory_limit=<int>` to reproduce or bound memory behavior.
 
+Current default JIT thresholds are intentionally conservative: call threshold `1024`, function score threshold `65535`, hotspot threshold `256`, hotspot score threshold `65535`, async enabled. When tuning, compare against the full `tests/bench` matrix instead of optimizing a single benchmark:
+
+```sh
+scripts/jit_bench_matrix.sh quick
+scripts/jit_bench_matrix.sh final nojit current sync-current hotspot-only
+```
+
+The matrix writes timings and output-hash comparisons under `zig-cache/jit-bench/`.
+
 ## Debugging Guidance
 
 - For parser/typechecker/codegen issues, prefer the smallest `.buzz` regression test that reproduces the behavior.
diff --git a/scripts/perf_compare_commits.py b/scripts/perf_compare_commits.py
deleted file mode 100755
index 4d3f1968..00000000
--- a/scripts/perf_compare_commits.py
+++ /dev/null
@@ -1,702 +0,0 @@
-#!/usr/bin/env python3
-import argparse
-import csv
-import datetime as dt
-import json
-import math
-import os
-import pathlib
-import shutil
-import subprocess
-import sys
-import tempfile
-
-
-FIRST_RUN_SLOW_WARNING = "first benchmarking run for this command was significantly slower"
-
-
-BENCHMARKS = [
-    {
-        "name": "006_vm_arithmetic_dispatch",
-        "path": "tests/perf/006_vm_arithmetic_dispatch.buzz",
-    },
-    {
-        "name": "009_vm_object_properties",
-        "path": "tests/perf/009_vm_object_properties.buzz",
-    },
-    {
-        "name": "013_vm_concat_clone",
-        "path": "tests/perf/013_vm_concat_clone.buzz",
-    },
-    {
-        "name": "014_jit_cheap_hotspot",
-        "path": "tests/perf/014_jit_cheap_hotspot.buzz",
-    },
-    {
-        "name": "015_jit_object_heavy_hotspot",
-        "path": "tests/perf/015_jit_object_heavy_hotspot.buzz",
-    },
-    {
-        "name": "bench_001_btree_depth14",
-        "path": "tests/bench/btree.buzz",
-        "args": ["14"],
-    },
-    {
-        "name": "bench_002_merkle_depth12",
-        "path": "tests/bench/merkle.buzz",
-        "args": ["12"],
-    },
-    {
-        "name": "bench_005_k_nucleoide",
-        "path": "tests/bench/k-nucleoide.buzz",
-        "stdin": "tests/bench/reference/knucleotide-input.txt",
-    },
-    {
-        "name": "bench_007_fib",
-        "path": "tests/bench/fib.buzz",
-    },
-    {
-        "name": "bench_008_for",
-        "path": "tests/bench/for.buzz",
-    },
-    {
-        "name": "bench_009_grid_1000x800",
-        "path": "tests/bench/grid.buzz",
-        "args": ["1000", "800"],
-    },
-    {
-        "name": "bench_010_ackermann_3_9",
-        "path": "tests/bench/ackermann.buzz",
-        "args": ["3", "9"],
-    },
-    {
-        "name": "bench_011_bubble_sort_3000",
-        "path": "tests/bench/bubble-sort.buzz",
-        "args": ["3000"],
-    },
-]
-
-
-def run(args, cwd=None, capture=False, check=True):
-    result = subprocess.run(
-        args,
-        cwd=cwd,
-        check=check,
-        text=True,
-        stdout=subprocess.PIPE if capture else None,
-        stderr=subprocess.PIPE if capture else None,
-    )
-    return result.stdout.strip() if capture else ""
-
-
-def require_cmd(name):
-    if shutil.which(name) is None:
-        raise SystemExit(f"error: missing required command: {name}")
-
-
-def split_env(name, default):
-    value = os.environ.get(name, default)
-    return value.split() if value else []
-
-
-def commit_subject(repo, commit):
-    return run(["git", "log", "-1", "--format=%s", commit], cwd=repo, capture=True)
-
-
-def short_sha(repo, commit):
-    return run(["git", "rev-parse", "--short=12", commit], cwd=repo, capture=True)
-
-
-def commit_range(repo, start_sha, head_sha):
-    has_parent = subprocess.run(
-        ["git", "rev-parse", "--verify", f"{start_sha}^"],
-        cwd=repo,
-        stdout=subprocess.DEVNULL,
-        stderr=subprocess.DEVNULL,
-    ).returncode == 0
-
-    if has_parent:
-        out = run(
-            ["git", "rev-list", "--reverse", "--ancestry-path", f"{start_sha}^..{head_sha}"],
-            cwd=repo,
-            capture=True,
-        )
-        return [line for line in out.splitlines() if line]
-
-    out = run(
-        ["git", "rev-list", "--reverse", "--ancestry-path", f"{start_sha}..{head_sha}"],
-        cwd=repo,
-        capture=True,
-    )
-    return [start_sha] + [line for line in out.splitlines() if line]
-
-
-def benchmark_command_for(benchmark):
-    args = " ".join(benchmark.get("args", []))
-    if args:
-        args = f" {args}"
-
-    stdin = benchmark.get("stdin")
-    stdin_redirect = f" < {stdin}" if stdin else ""
-
-    return f"./zig-out/bin/buzz {benchmark['path']}{args}{stdin_redirect} >/dev/null"
-
-
-def benchmark_source_paths():
-    paths = []
-    seen = set()
-    for benchmark in BENCHMARKS:
-        for key in ("path", "stdin"):
-            path = benchmark.get(key)
-            if path and path not in seen:
-                seen.add(path)
-                paths.append(path)
-    return paths
-
-
-def copy_benchmark_sources(repo, destination):
-    for relative in benchmark_source_paths():
-        source = repo / relative
-        target = destination / relative
-        if not source.is_file():
-            raise SystemExit(f"error: missing benchmark source: {relative}")
-
-        target.parent.mkdir(parents=True, exist_ok=True)
-        shutil.copy2(source, target)
-
-
-def hyperfine_warnings(output):
-    warnings = []
-
-    for line in output.splitlines():
-        stripped = line.strip()
-        if stripped.startswith("Warning:"):
-            warnings.append(stripped.removeprefix("Warning:").strip())
-
-    return warnings
-
-
-def has_first_run_slow_warning(output):
-    return FIRST_RUN_SLOW_WARNING in output.lower()
-
-
-def run_hyperfine(hyperfine_cmd, cwd, name):
-    attempts = []
-
-    for attempt in range(1, 3):
-        result = subprocess.run(
-            hyperfine_cmd,
-            cwd=cwd,
-            text=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-        )
-        output = result.stdout + result.stderr
-        print(output, end="")
-
-        warnings = hyperfine_warnings(output)
-        attempts.append(
-            {
-                "returncode": result.returncode,
-                "warnings": warnings,
-            }
-        )
-
-        if result.returncode != 0:
-            return result.returncode, attempts
-
-        if attempt == 1 and has_first_run_slow_warning(output):
-            print(
-                f"warning: benchmark `{name}` had a slow first run; rerunning once",
-                file=sys.stderr,
-            )
-            continue
-
-        return 0, attempts
-
-    return 0, attempts
-
-
-def merge_hyperfine_result(source_json, combined_results, attempts):
-    with source_json.open() as f:
-        payload = json.load(f)
-
-    reran = len(attempts) > 1
-    warnings = attempts[-1]["warnings"] if attempts else []
-
-    for result in payload.get("results", []):
-        result["hyperfine_warnings"] = warnings
-        result["reran_after_first_run_warning"] = reran
-        combined_results.append(result)
-
-
-def update_submodules(worktree):
-    run(["git", "-C", str(worktree), "submodule", "sync", "--recursive"])
-    run(["git", "-C", str(worktree), "submodule", "update", "--init", "--recursive"])
-
-
-def fmt_seconds(value):
-    if value is None or math.isnan(value):
-        return ""
-    if value >= 1:
-        return f"{value:.3f}s"
-    if value >= 0.001:
-        return f"{value * 1000:.2f}ms"
-    return f"{value * 1_000_000:.2f}us"
-
-
-def gain(base, current):
-    if base is None or current is None or base == 0:
-        return None
-    return (base - current) / base * 100.0
-
-
-def coefficient_of_variation(result):
-    if result is None:
-        return None
-
-    mean = result.get("mean")
-    stddev = result.get("stddev")
-    if mean is None or stddev is None or mean == 0:
-        return None
-
-    return stddev / mean
-
-
-def fmt_percent(value):
-    return "" if value is None else f"{value:+.2f}%"
-
-
-def fmt_cv(result):
-    cv = coefficient_of_variation(result)
-    return "" if cv is None else f"{cv * 100:.1f}%"
-
-
-def unstable_result(result):
-    if result is None:
-        return False
-
-    cv = coefficient_of_variation(result)
-    has_warning = len(result.get("hyperfine_warnings", [])) > 0
-
-    return has_warning or (cv is not None and cv > 0.20)
-
-
-def fmt_warnings(result):
-    if result is None:
-        return ""
-
-    warnings = result.get("hyperfine_warnings", [])
-    if not warnings:
-        return ""
-
-    return "; ".join(warnings)
-
-
-def load_results(commits):
-    data = {}
-    failures = {}
-    tests = [benchmark["name"] for benchmark in BENCHMARKS]
-    for commit in commits:
-        with commit["json"].open() as f:
-            payload = json.load(f)
-
-        per_test = {}
-        for result in payload.get("results", []):
-            name = result["command"]
-            per_test[name] = result
-
-        data[commit["short"]] = per_test
-        failures[commit["short"]] = {
-            failure["name"]: failure
-            for failure in payload.get("failures", [])
-        }
-
-    return data, failures, tests
-
-
-def write_reports(results_dir, commits):
-    data, failures, tests = load_results(commits)
-
-    csv_path = results_dir / "all-results.csv"
-    with csv_path.open("w", newline="") as f:
-        writer = csv.writer(f)
-        writer.writerow(
-            [
-                "commit",
-                "short",
-                "subject",
-                "test",
-                "mean_seconds",
-                "stddev_seconds",
-                "median_seconds",
-                "gain_vs_start_percent",
-                "gain_vs_previous_percent",
-                "median_gain_vs_start_percent",
-                "median_gain_vs_previous_percent",
-                "user_seconds",
-                "user_gain_vs_start_percent",
-                "user_gain_vs_previous_percent",
-                "system_seconds",
-                "system_gain_vs_start_percent",
-                "system_gain_vs_previous_percent",
-                "coefficient_of_variation",
-                "unstable",
-                "hyperfine_warnings",
-                "reran_after_first_run_warning",
-            ]
-        )
-
-        previous_short = None
-        base_short = commits[0]["short"]
-        for commit in commits:
-            short = commit["short"]
-            for test in tests:
-                result = data.get(short, {}).get(test)
-                base = data.get(base_short, {}).get(test)
-                previous = data.get(previous_short, {}).get(test) if previous_short else None
-
-                mean = result.get("mean") if result else None
-                base_mean = base.get("mean") if base else None
-                previous_mean = previous.get("mean") if previous else None
-                median = result.get("median") if result else None
-                base_median = base.get("median") if base else None
-                previous_median = previous.get("median") if previous else None
-                user = result.get("user") if result else None
-                base_user = base.get("user") if base else None
-                previous_user = previous.get("user") if previous else None
-                system = result.get("system") if result else None
-                base_system = base.get("system") if base else None
-                previous_system = previous.get("system") if previous else None
-
-                writer.writerow(
-                    [
-                        commit["sha"],
-                        short,
-                        commit["subject"],
-                        test,
-                        mean,
-                        result.get("stddev") if result else None,
-                        result.get("median") if result else None,
-                        gain(base_mean, mean),
-                        gain(previous_mean, mean),
-                        gain(base_median, median),
-                        gain(previous_median, median),
-                        user,
-                        gain(base_user, user),
-                        gain(previous_user, user),
-                        system,
-                        gain(base_system, system),
-                        gain(previous_system, system),
-                        coefficient_of_variation(result),
-                        unstable_result(result),
-                        fmt_warnings(result),
-                        result.get("reran_after_first_run_warning") if result else None,
-                    ]
-                )
-            previous_short = short
-
-    summary_path = results_dir / "summary.md"
-    base = commits[0]
-    head = commits[-1]
-    with summary_path.open("w") as f:
-        f.write("# Buzz perf comparison\n\n")
-        f.write(f"Start: `{base['short']}` {base['subject']}\n\n")
-        f.write(f"Head: `{head['short']}` {head['subject']}\n\n")
-        f.write("Positive gain means the later commit is faster. Summary gains use medians; CV above 20% or any hyperfine warning is flagged as unstable. If hyperfine reports a significantly slower first run, that benchmark is rerun once and the rerun is recorded.\n\n")
-
-        f.write("## HEAD vs start\n\n")
-        f.write("| Test | Start median | HEAD median | Median gain | Start user | HEAD user | User gain | HEAD CV | HEAD warnings |\n")
-        f.write("| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- |\n")
-        for test in tests:
-            base_result = data.get(base["short"], {}).get(test)
-            head_result = data.get(head["short"], {}).get(test)
-            base_median = base_result.get("median") if base_result else None
-            head_median = head_result.get("median") if head_result else None
-            g = gain(base_median, head_median)
-            base_user = base_result.get("user") if base_result else None
-            head_user = head_result.get("user") if head_result else None
-            user_gain = gain(base_user, head_user)
-            cv_text = fmt_cv(head_result)
-            if unstable_result(head_result):
-                cv_text += " unstable"
-            warning_text = fmt_warnings(head_result)
-            f.write(
-                f"| `{test}` | {fmt_seconds(base_median)} | {fmt_seconds(head_median)} | {fmt_percent(g)} | {fmt_seconds(base_user)} | {fmt_seconds(head_user)} | {fmt_percent(user_gain)} | {cv_text} | {warning_text} |\n"
-            )
-
-        f.write("\n## Per-commit rundown\n\n")
-        for index, commit in enumerate(commits):
-            short = commit["short"]
-            previous_short = commits[index - 1]["short"] if index > 0 else None
-
-            f.write(f"### `{short}` {commit['subject']}\n\n")
-
-            for test in tests:
-                result = data.get(short, {}).get(test)
-                if result is None:
-                    failure = failures.get(short, {}).get(test)
-                    if failure is not None:
-                        f.write(f"- `{test}`: failed, exit {failure['returncode']}\n")
-                    else:
-                        f.write(f"- `{test}`: no result\n")
-                    continue
-
-                mean = result.get("mean")
-                median = result.get("median")
-                stddev = result.get("stddev")
-                user = result.get("user")
-                system = result.get("system")
-                base_result = data.get(base["short"], {}).get(test)
-                previous_result = data.get(previous_short, {}).get(test) if previous_short else None
-
-                base_gain = gain(base_result.get("median") if base_result else None, median)
-                previous_gain = gain(previous_result.get("median") if previous_result else None, median)
-                base_user_gain = gain(base_result.get("user") if base_result else None, user)
-                previous_user_gain = gain(previous_result.get("user") if previous_result else None, user)
-
-                parts = [
-                    f"mean {fmt_seconds(mean)}",
-                    f"median {fmt_seconds(median)}",
-                    f"stddev {fmt_seconds(stddev)}",
-                    f"user {fmt_seconds(user)}",
-                    f"system {fmt_seconds(system)}",
-                ]
-
-                cv_text = fmt_cv(result)
-                if cv_text:
-                    if unstable_result(result):
-                        cv_text += " unstable"
-                    parts.append(f"CV {cv_text}")
-
-                if result.get("reran_after_first_run_warning"):
-                    parts.append("reran after slow first run")
-
-                warning_text = fmt_warnings(result)
-                if warning_text:
-                    parts.append(f"hyperfine warning: {warning_text}")
-
-                if previous_gain is not None:
-                    parts.append(f"median vs previous {previous_gain:+.2f}%")
-
-                if base_gain is not None:
-                    parts.append(f"median vs start {base_gain:+.2f}%")
-
-                if previous_user_gain is not None:
-                    parts.append(f"user vs previous {previous_user_gain:+.2f}%")
-
-                if base_user_gain is not None:
-                    parts.append(f"user vs start {base_user_gain:+.2f}%")
-
-                f.write(f"- `{test}`: " + ", ".join(parts) + "\n")
-
-            f.write("\n")
-
-    return summary_path, csv_path
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description=(
-            "Check out each commit from START_COMMIT to HEAD, build Buzz, "
-            "run the selected Buzz perf benchmarks with hyperfine, and compare results."
-        )
-    )
-    parser.add_argument(
-        "start_commit",
-        nargs="?",
-        default="HEAD^",
-        help="first commit to benchmark, default: HEAD^",
-    )
-    parser.add_argument(
-        "--results-dir",
-        default=os.environ.get("RESULTS_DIR"),
-        help="output directory, default: perf-results-<timestamp>",
-    )
-    parser.add_argument(
-        "--build-args",
-        default=os.environ.get("BUZZ_BUILD_ARGS", "-Doptimize=ReleaseFast"),
-        help='arguments after "zig build", default: %(default)s',
-    )
-    parser.add_argument(
-        "--warmup",
-        type=int,
-        default=int(os.environ.get("HYPERFINE_WARMUP", "5")),
-        help="hyperfine warmup count",
-    )
-    parser.add_argument(
-        "--runs",
-        type=int,
-        default=int(os.environ.get("HYPERFINE_RUNS", "10")),
-        help="hyperfine run count",
-    )
-    parser.add_argument(
-        "--hyperfine-extra-args",
-        default=os.environ.get("HYPERFINE_EXTRA_ARGS", ""),
-        help="extra arguments passed to hyperfine",
-    )
-    return parser.parse_args()
-
-
-def main():
-    args = parse_args()
-
-    for cmd in ("git", "zig", "hyperfine"):
-        require_cmd(cmd)
-
-    repo = pathlib.Path(run(["git", "rev-parse", "--show-toplevel"], capture=True))
-    os.chdir(repo)
-
-    for relative in benchmark_source_paths():
-        if not (repo / relative).is_file():
-            raise SystemExit(f"error: benchmark source does not exist: {relative}")
-
-    start_sha = run(["git", "rev-parse", "--verify", f"{args.start_commit}^{{commit}}"], cwd=repo, capture=True)
-    head_sha = run(["git", "rev-parse", "--verify", "HEAD"], cwd=repo, capture=True)
-
-    ancestor = subprocess.run(
-        ["git", "merge-base", "--is-ancestor", start_sha, head_sha],
-        cwd=repo,
-        stdout=subprocess.DEVNULL,
-        stderr=subprocess.DEVNULL,
-    )
-    if ancestor.returncode != 0:
-        raise SystemExit(f"error: {start_sha} is not an ancestor of HEAD")
-
-    commits_to_run = commit_range(repo, start_sha, head_sha)
-    if not commits_to_run:
-        raise SystemExit("error: empty commit range")
-
-    timestamp = dt.datetime.now().strftime("%Y%m%d-%H%M%S")
-    results_dir = pathlib.Path(args.results_dir) if args.results_dir else repo / f"perf-results-{timestamp}"
-    json_dir = results_dir / "json"
-    json_dir.mkdir(parents=True, exist_ok=True)
-
-    build_args = args.build_args.split() if args.build_args else []
-    hyperfine_extra_args = args.hyperfine_extra_args.split() if args.hyperfine_extra_args else []
-
-    with tempfile.TemporaryDirectory(prefix="buzz-perf-commits.") as tmp:
-        tmp_path = pathlib.Path(tmp)
-        worktree = tmp_path / "worktree"
-        benchmark_snapshot = tmp_path / "benchmarks"
-
-        copy_benchmark_sources(repo, benchmark_snapshot)
-        tests = BENCHMARKS
-
-        run(["git", "worktree", "add", "--detach", "--quiet", str(worktree), head_sha], cwd=repo)
-
-        commands_path = results_dir / "commands.tsv"
-        commits_path = results_dir / "commits.tsv"
-        completed_commits = []
-
-        try:
-            with commands_path.open("w", newline="") as commands_file, commits_path.open("w", newline="") as commits_file:
-                commands_writer = csv.writer(commands_file, delimiter="\t")
-                commits_writer = csv.writer(commits_file, delimiter="\t")
-
-                print(f"Benchmarking {len(commits_to_run)} commits with {len(tests)} tests each")
-                print(f"Results: {results_dir}")
-                print(f"Build args: zig build {' '.join(build_args)}")
-                print("Benchmarks:")
-                for benchmark in tests:
-                    print(f"  - {benchmark['name']}: {benchmark_command_for(benchmark)}")
-
-                for idx, commit in enumerate(commits_to_run, start=1):
-                    short = short_sha(repo, commit)
-                    subject = commit_subject(repo, commit)
-                    json_file = json_dir / f"{idx:04d}-{short}.json"
-
-                    print(f"\n[{idx}/{len(commits_to_run)}] {short} {subject}")
-
-                    run(["git", "-C", str(worktree), "checkout", "--force", "--quiet", commit], cwd=repo)
-                    update_submodules(worktree)
-                    copy_benchmark_sources(benchmark_snapshot, worktree)
-
-                    run(["zig", "build", *build_args], cwd=worktree)
-
-                    commits_writer.writerow([idx, commit, short, subject, json_file])
-                    commits_file.flush()
-
-                    combined_results = []
-                    failures = []
-
-                    for benchmark in tests:
-                        name = benchmark["name"]
-                        command = benchmark_command_for(benchmark)
-                        benchmark_json = json_dir / f"{idx:04d}-{short}-{name}.json"
-                        commands_writer.writerow([short, name, command])
-                        commands_file.flush()
-
-                        hyperfine_cmd = [
-                            "hyperfine",
-                            "--warmup",
-                            str(args.warmup),
-                            "--runs",
-                            str(args.runs),
-                            "--export-json",
-                            str(benchmark_json),
-                            *hyperfine_extra_args,
-                            "--command-name",
-                            name,
-                            command,
-                        ]
-
-                        returncode, attempts = run_hyperfine(hyperfine_cmd, worktree, name)
-                        if returncode == 0:
-                            merge_hyperfine_result(benchmark_json, combined_results, attempts)
-                        else:
-                            failures.append(
-                                {
-                                    "name": name,
-                                    "command": command,
-                                    "returncode": returncode,
-                                    "attempts": attempts,
-                                }
-                            )
-                            print(
-                                f"warning: benchmark `{name}` failed with exit code {returncode}; continuing",
-                                file=sys.stderr,
-                            )
-
-                    with json_file.open("w") as f:
-                        json.dump(
-                            {
-                                "results": combined_results,
-                                "failures": failures,
-                            },
-                            f,
-                            indent=2,
-                        )
-
-                    completed_commits.append(
-                        {
-                            "idx": idx,
-                            "sha": commit,
-                            "short": short,
-                            "subject": subject,
-                            "json": json_file,
-                        }
-                    )
-        finally:
-            run(["git", "worktree", "remove", "--force", str(worktree)], cwd=repo, check=False)
-
-    if not completed_commits:
-        raise SystemExit("error: no benchmark results were recorded")
-
-    summary, csv_path = write_reports(results_dir, completed_commits)
-    print("\nDone.")
-    print(f"Summary: {summary}")
-    print(f"CSV:     {csv_path}")
-
-
-if __name__ == "__main__":
-    try:
-        main()
-    except subprocess.CalledProcessError as err:
-        cmd = " ".join(str(part) for part in err.cmd)
-        print(f"error: command failed: {cmd}", file=sys.stderr)
-        if err.stdout:
-            print(err.stdout, file=sys.stderr)
-        if err.stderr:
-            print(err.stderr, file=sys.stderr)
-        raise SystemExit(err.returncode)
diff --git a/src/Ast.zig b/src/Ast.zig
index edb498c9..a30aaa76 100644
--- a/src/Ast.zig
+++ b/src/Ast.zig
@@ -502,7 +502,7 @@ pub const Slice = struct {
     }
 
     /// Mirrors Chunk.score (even though Chunk.score and Node.score won't be comparable)
-    /// Is use to compute complexity of a hotspot node (which don't have a Chunk available to evaluate)
+    /// Is used to compute complexity of a hotspot node (which doesn't have a Chunk available to evaluate)
     const ComplexityContext = struct {
         score: usize = 0,
 
@@ -1064,7 +1064,7 @@ pub const Node = struct {
     count: usize = 0,
     /// Complexity score computed once to help evaluate if the node is worth JIT compiling
     complexity_score: ?usize = null,
-    /// Node status: blacklisted, queued for compilation, compiled, compilable
+    /// Node status: blacklisted, queued/generated/compiled by the JIT, compilable
     jit_status: JitStatus = .compilable,
     /// Once compiled
     compiled: ?*anyopaque = null,
@@ -1082,6 +1082,8 @@ pub const Node = struct {
         compilable,
         /// Node is already queued in the jit compiler
         queued,
+        /// Node has generated native code waiting to be published by the VM thread
+        generated,
         /// Node can't be compiled (contains use of fiber)
         blacklisted,
         /// Already compiled
diff --git a/src/Jit.zig b/src/Jit.zig
index 8ba55fa8..4d363403 100644
--- a/src/Jit.zig
+++ b/src/Jit.zig
@@ -288,7 +288,7 @@ pub fn compile(self: *Self, ast: Ast.Slice, closure: *o.ObjClosure, hotspot_node
     // Is the node already compiled or blacklisted
     switch (ast.nodes.items(.jit_status)[ast_node]) {
         .blacklisted => return error.CantCompile,
-        .queued, .compiled => return,
+        .queued, .generated, .compiled => return,
         .compilable => {},
     }
 
@@ -345,6 +345,51 @@ pub fn compile(self: *Self, ast: Ast.Slice, closure: *o.ObjClosure, hotspot_node
     }
 }
 
+pub fn compileFunctionSynchronously(self: *Self, closure: *o.ObjClosure) Error!void {
+    self.publishCompleted();
+
+    if (closure.function.native_raw != null) {
+        return;
+    }
+
+    if (BuildOptions.jit_asynchronous and !self.worker_stopped.load(.acquire)) {
+        return error.CantCompile;
+    }
+
+    const function_ast = closure.function.chunk.ast;
+    const function_node = closure.function.node;
+    _ = closure.function.chunk.score();
+
+    switch (function_ast.nodes.items(.jit_status)[function_node]) {
+        .blacklisted, .generated, .queued => return error.CantCompile,
+        .compiled => return,
+        .compilable => {},
+    }
+
+    if (try function_ast.usesFiber(
+        self.gc.allocator,
+        function_node,
+    )) {
+        function_ast.nodes.items(.jit_status)[function_node] = .blacklisted;
+
+        return error.CantCompile;
+    }
+
+    const job = Job{
+        .ast = function_ast,
+        .closure = closure,
+        .node = function_node,
+    };
+
+    function_ast.nodes.items(.jit_status)[function_node] = .queued;
+
+    var completed_job = try self.doJob(&job);
+    defer completed_job.deinit(self.gc.allocator);
+
+    self.publishCompletedJob(&completed_job);
+    self.reset();
+}
+
 pub fn start(self: *Self) StartError!void {
     if (BuildOptions.jit_asynchronous and (self.worker == null or self.worker_stopped.load(.acquire))) {
         if (self.worker) |*worker| {
@@ -370,7 +415,7 @@ fn work(self: *Self) Error!void {
 
     while (self.jobs.front()) |job| {
         switch (job.ast.nodes.items(.jit_status)[job.node]) {
-            .blacklisted, .compiled => {
+            .blacklisted, .generated, .compiled => {
                 self.jobs.pop();
                 self.reset();
                 continue;
@@ -464,7 +509,7 @@ fn queueCollateralFunction(self: *Self, node: Ast.Node.Index, closure: ?*o.ObjCl
 
     switch (self.state.?.ast.nodes.items(.jit_status)[node]) {
         .blacklisted => return error.CantCompile,
-        .compiled => return,
+        .generated, .compiled => return,
         .compilable, .queued => {},
     }
 
@@ -479,31 +524,56 @@ fn queueCollateralFunction(self: *Self, node: Ast.Node.Index, closure: ?*o.ObjCl
     self.state.?.ast.nodes.items(.jit_status)[node] = .queued;
 }
 
+fn queueObjectMethodCollateral(self: *Self, object_type: *o.ObjTypeDef, method_index: usize) Error!void {
+    for (self.state.?.closure.globals.items) |global| {
+        if (!global.isObj()) {
+            continue;
+        }
+
+        const object = o.ObjObject.cast(global.obj()) orelse continue;
+        if (object.type_def != object_type) {
+            continue;
+        }
+
+        const method_value = object.fields[method_index];
+        if (!method_value.isObj()) {
+            return;
+        }
+
+        const closure = o.ObjClosure.cast(method_value.obj()) orelse return;
+        try self.queueCollateralFunction(closure.function.node, closure);
+
+        return;
+    }
+}
+
 fn doJob(self: *Self, job: *const Job) Error!CompletedJob {
     var start_timestamp = std.Io.Clock.Timestamp.now(self.process.io, .awake);
     defer if (BuildOptions.jit_debug or BuildOptions.show_perf) {
         const time = start_timestamp.untilNow(self.process.io).raw.toMilliseconds();
 
-        if (job.node == job.closure.function.node)
-            log.info(
-                "Finished job function `{s}` with score {} in {}ms",
-                .{
-                    job.closure.function.type_def.resolved_type.?.Function.name.string,
-                    job.closure.function.call_count * job.closure.function.chunk.complexity_score.?,
-                    time,
-                },
-            )
-        else
-            log.info(
-                "Finished job for hostpot node {} ({s}) witch score {} in function `{s}` in {}ms",
-                .{
-                    job.node,
-                    @tagName(job.ast.nodes.items(.tag)[job.node]),
-                    job.ast.nodes.items(.count)[job.node] * job.ast.nodes.items(.complexity_score)[job.node].?,
-                    job.closure.function.type_def.resolved_type.?.Function.name.string,
-                    time,
-                },
-            );
+        if (BuildOptions.jit_debug) {
+            if (job.node == job.closure.function.node)
+                log.info(
+                    "Finished job function `{s}` with score {} in {}ms",
+                    .{
+                        job.closure.function.type_def.resolved_type.?.Function.name.string,
+                        job.closure.function.call_count * job.closure.function.chunk.complexity_score.?,
+                        time,
+                    },
+                )
+            else
+                log.info(
+                    "Finished job for hostpot node {} ({s}) witch score {} in function `{s}` in {}ms",
+                    .{
+                        job.node,
+                        @tagName(job.ast.nodes.items(.tag)[job.node]),
+                        job.ast.nodes.items(.count)[job.node] * job.ast.nodes.items(.complexity_score)[job.node].?,
+                        job.closure.function.type_def.resolved_type.?.Function.name.string,
+                        time,
+                    },
+                );
+        }
     };
 
     // Remember we need to set this function's fields. Hotspot jobs are tied to
@@ -576,6 +646,8 @@ fn doJob(self: *Self, job: *const Job) Error!CompletedJob {
                 .native_raw = if (items.native_raw) |item| m.MIR_gen(self.ctx, item) else null,
             },
         );
+
+        job.ast.nodes.items(.jit_status)[node] = .generated;
     }
 
     if (BuildOptions.jit_debug) {
@@ -2244,26 +2316,39 @@ fn generateCall(self: *Self, node: Ast.Node.Index) Error!?m.MIR_op_t {
         const member_lexeme = lexemes[node_components[components.callee].Dot.identifier];
 
         switch (invoked_on.?) {
-            .Object => try self.buildExternApiCall(
-                .bz_getObjectField,
-                callee,
-                &.{
-                    subject.?,
-                    m.MIR_new_uint_op(
-                        self.ctx,
-                        type_defs[node_components[components.callee].Dot.callee].?
-                            .resolved_type.?.Object
-                            .fields.get(member_lexeme).?
-                            .index,
-                    ),
-                },
-            ),
+            .Object => object: {
+                const object_type = type_defs[node_components[components.callee].Dot.callee].?;
+                const field = object_type
+                    .resolved_type.?.Object
+                    .fields.get(member_lexeme).?;
+
+                if (field.method) {
+                    try self.queueObjectMethodCollateral(object_type, field.index);
+                }
+
+                break :object try self.buildExternApiCall(
+                    .bz_getObjectField,
+                    callee,
+                    &.{
+                        subject.?,
+                        m.MIR_new_uint_op(
+                            self.ctx,
+                            field.index,
+                        ),
+                    },
+                );
+            },
             .ObjectInstance => instance: {
-                const field = type_defs[node_components[components.callee].Dot.callee].?
-                    .resolved_type.?.ObjectInstance.of
+                const object_type = type_defs[node_components[components.callee].Dot.callee].?
+                    .resolved_type.?.ObjectInstance.of;
+                const field = object_type
                     .resolved_type.?.Object
                     .fields.get(member_lexeme).?;
 
+                if (field.method) {
+                    try self.queueObjectMethodCollateral(object_type, field.index);
+                }
+
                 break :instance try self.buildExternApiCall(
                     if (field.method)
                         .bz_getObjectInstanceMethod
diff --git a/src/buzz_api.zig b/src/buzz_api.zig
index 4a6e5637..1fcccb01 100644
--- a/src/buzz_api.zig
+++ b/src/buzz_api.zig
@@ -1131,6 +1131,27 @@ export fn bz_setUpValue(ctx: *o.NativeCtx, slot: usize, value: v.Value) callconv
     ctx.upvalues[slot].location.* = value;
 }
 
+export fn bz_callFromJit(ctx: *o.NativeCtx) callconv(.c) v.Value {
+    const vm = ctx.vm;
+
+    vm.callValue(
+        ctx.callee,
+        @intCast(ctx.arg_count),
+        null,
+    ) catch @panic("Failed calling function from JIT");
+
+    // If the callee is interpreted, run it until its return reaches the native
+    // caller frame. The VM leaves the result on the stack; RawFn returns it.
+    if (!calleeIsCompiled(ctx.callee)) {
+        vm.run() catch @panic("Failed running function from JIT");
+    }
+
+    const result = vm.pop();
+    vm.current_fiber.stack_top = ctx.base;
+
+    return result;
+}
+
 export fn bz_context(ctx: *o.NativeCtx, closure_value: v.Value, new_ctx: *o.NativeCtx, arg_count: usize) callconv(.c) *anyopaque {
     if (is_wasm) {
         unreachable;
@@ -1182,16 +1203,21 @@ export fn bz_context(ctx: *o.NativeCtx, closure_value: v.Value, new_ctx: *o.Nati
         .upvalues = if (closure) |cls| cls.upvalues.ptr else ctx.upvalues,
         .base = ctx.vm.current_fiber.stack_top - arg_count - 1,
         .stack_top = &ctx.vm.current_fiber.stack_top,
+        .callee = closure_value,
+        .arg_count = arg_count,
     };
 
     if (closure) |cls| {
-        if (cls.function.native_raw == null and cls.function.native == null) {
-            ctx.vm.jit.?.compile(cls.function.chunk.ast, cls, null) catch @panic("Failed compiling function");
+        if (cls.function.native_raw == null) {
+            ctx.vm.jit.?.compileFunctionSynchronously(cls) catch |err| switch (err) {
+                error.CantCompile => return @as(*anyopaque, @ptrFromInt(@intFromPtr(&bz_callFromJit))),
+                else => @panic("Failed compiling function"),
+            };
         }
 
         ctx.vm.current_fiber.current_compiled_function = cls.function;
 
-        return cls.function.native_raw.?;
+        return cls.function.native_raw orelse @as(*anyopaque, @ptrFromInt(@intFromPtr(&bz_callFromJit)));
     }
 
     return native.?.native;
diff --git a/src/lib/buzz_api.zig b/src/lib/buzz_api.zig
index b0ff9bd1..29e465e4 100644
--- a/src/lib/buzz_api.zig
+++ b/src/lib/buzz_api.zig
@@ -196,6 +196,8 @@ pub const NativeCtx = extern struct {
     // Pointer to the stack_top field of the current fiber
     // !! Needs to change when current fiber changes !!
     stack_top: *[*]Value,
+    callee: Value,
+    arg_count: usize,
 
     pub fn getIo(self: *@This()) Io {
         if (is_wasm) return {};
diff --git a/src/obj.zig b/src/obj.zig
index 6ed909b3..3f8a58c7 100644
--- a/src/obj.zig
+++ b/src/obj.zig
@@ -1242,6 +1242,8 @@ pub const NativeCtx = extern struct {
     // Pointer to the stack_top field of the current fiber
     // !! Needs to change when current fiber changes !!
     stack_top: *[*]Value,
+    callee: Value,
+    arg_count: usize,
 };
 
 // 1 = return value on stack, 0 = no return value, -1 = error
diff --git a/src/vm.zig b/src/vm.zig
index a56d6ac7..3811147c 100644
--- a/src/vm.zig
+++ b/src/vm.zig
@@ -4809,6 +4809,18 @@ pub const VM = struct {
             const frame_ptr = self.currentFrame();
             const frame_val = if (frame_ptr) |ptr| ptr.* else null;
             if (self.current_fiber.frame_count > 0) {
+                if (!is_wasm and frame_ptr.?.in_native_call and self.current_fiber.try_context != null) {
+                    self.push(payload);
+
+                    if (builtin.os.tag == .macos or builtin.os.tag == .linux) {
+                        jmp._longjmp(&self.current_fiber.try_context.?.env, 1);
+                    } else {
+                        jmp.longjmp(&self.current_fiber.try_context.?.env, 1);
+                    }
+
+                    unreachable;
+                }
+
                 const function_type = frame_ptr.?.closure.function.type_def.resolved_type.?.Function.function_type;
                 if (function_type != .ScriptEntryPoint and function_type != .Repl) {
                     try stack.append(self.gc.allocator, frame_val.?);
@@ -5135,6 +5147,8 @@ pub const VM = struct {
             .upvalues = frame.closure.upvalues.ptr,
             .base = frame.slots,
             .stack_top = &self.current_fiber.stack_top,
+            .callee = Value.Void,
+            .arg_count = 0,
         };
 
         // If native returns 1 here, we know there was an early return in the hotspot
@@ -5172,6 +5186,8 @@ pub const VM = struct {
             .upvalues = &[_]*obj.ObjUpValue{},
             .base = self.current_fiber.stack_top - arg_count - 1,
             .stack_top = &self.current_fiber.stack_top,
+            .callee = Value.Void,
+            .arg_count = arg_count,
         };
         const native_return = native(&ctx);
 
@@ -5232,6 +5248,8 @@ pub const VM = struct {
             .upvalues = closure.upvalues.ptr,
             .base = self.current_fiber.stack_top - arg_count - 1,
             .stack_top = &self.current_fiber.stack_top,
+            .callee = closure.toValue(),
+            .arg_count = arg_count,
         };
         const native_return = native(&ctx);
 
diff --git a/tests/behavior/jit-dynamic-call.buzz b/tests/behavior/jit-dynamic-call.buzz
new file mode 100644
index 00000000..b60d7f84
--- /dev/null
+++ b/tests/behavior/jit-dynamic-call.buzz
@@ -0,0 +1,60 @@
+import "std";
+
+fun dynamicInc(value: int) > int {
+    return value + 1;
+}
+
+fun dynamicIncViaFiber(value: int) > int {
+    return resolve &dynamicInc(value);
+}
+
+fun failDynamic(value: int) > int !> str {
+    throw "failed";
+
+    return value;
+}
+
+fun failDynamicViaFiber(value: int) > int !> str {
+    return resolve &failDynamic(value);
+}
+
+fun callDynamic(fn: fun (value: int) > int, value: int) > int {
+    return fn(value);
+}
+
+fun callDynamicWithCatch(fn: fun (value: int) > int !> str, value: int) > int {
+    return fn(value) catch 42;
+}
+
+test "JIT dynamic closure call" {
+    final callbacks = [ dynamicInc ];
+    var value = 0;
+
+    foreach (_ in 0..1500) {
+        value = callDynamic(callbacks[0], value: value);
+    }
+
+    std\assert(value == 1500, message: "compiled code can call a dynamic closure");
+}
+
+test "JIT dynamic call to blacklisted closure" {
+    final callbacks = [ dynamicIncViaFiber ];
+    var value = 0;
+
+    foreach (_ in 0..1500) {
+        value = callDynamic(callbacks[0], value: value);
+    }
+
+    std\assert(value == 1500, message: "compiled code can call an interpreted dynamic closure");
+}
+
+test "JIT dynamic call to blacklisted throwing closure" {
+    final callbacks = [ failDynamicViaFiber ];
+    var value = 0;
+
+    foreach (_ in 0..1500) {
+        value = callDynamicWithCatch(callbacks[0], value: value);
+    }
+
+    std\assert(value == 42, message: "compiled code can catch an interpreted dynamic closure error");
+}
diff --git a/tests/bench/fasta.buzz b/tests/bench/fasta.buzz
index 4e0660ee..8e5d755a 100644
--- a/tests/bench/fasta.buzz
+++ b/tests/bench/fasta.buzz
@@ -63,7 +63,7 @@ object Frequency {
         foreach (_ in 0..nRandom) {
             final r = this.random(1.0);
             var skip = false;
-            foreach (_ in 0..len) {
+            foreach (i in 0..len) {
                 if (r < this.probs[i]) {
                     buffer.setAt(bufferIndex, value: this.chars.at(i));
                     bufferIndex = bufferIndex + 1;
diff --git a/tests/bench/nbody.buzz b/tests/bench/nbody.buzz
index d88ac267..3618f9ae 100644
--- a/tests/bench/nbody.buzz
+++ b/tests/bench/nbody.buzz
@@ -26,23 +26,25 @@ fun advance(bodies: [mut Body], nbody: int, dt: double) > void {
         var bivy = bi.vy;
         var bivz = bi.vz;
 
-        foreach (j in (i + 1)..nbody) {
-            final bj = bodies[j];
-            final dx = bix - bj.x;
-            final dy = biy - bj.y;
-            final dz = biz - bj.z;
-            final dist2 = dx * dx + dy * dy + dz * dz;
-            var mag = math\sqrt(dist2);
-            mag = dt / (mag * dist2);
-            var bm = bj.mass * mag;
-
-            bivx = bivx - (dx * bm);
-            bivy = bivy - (dy * bm);
-            bivz = bivz - (dz * bm);
-            bm = bimass * mag;
-            bj.vx = bj.vx + (dx * bm);
-            bj.vy = bj.vy + (dy * bm);
-            bj.vz = bj.vz + (dz * bm);
+        if (i + 1 < nbody) {
+            foreach (j in (i + 1)..nbody) {
+                final bj = bodies[j];
+                final dx = bix - bj.x;
+                final dy = biy - bj.y;
+                final dz = biz - bj.z;
+                final dist2 = dx * dx + dy * dy + dz * dz;
+                var mag = math\sqrt(dist2);
+                mag = dt / (mag * dist2);
+                var bm = bj.mass * mag;
+
+                bivx = bivx - (dx * bm);
+                bivy = bivy - (dy * bm);
+                bivz = bivz - (dz * bm);
+                bm = bimass * mag;
+                bj.vx = bj.vx + (dx * bm);
+                bj.vy = bj.vy + (dy * bm);
+                bj.vz = bj.vz + (dz * bm);
+            }
         }
 
         bi.vx = bivx;
@@ -63,13 +65,15 @@ fun energy(bodies: [Body], nbody: int) > double {
         final vz = bi.vz;
         final bim = bi.mass;
         e = e + (0.5 * bim * (vx * vx + vy * vy + vz * vz));
-        foreach (j in (i + 1)..nbody) {
-            final bj = bodies[j];
-            final dx = bi.x - bj.x;
-            final dy = bi.y - bj.y;
-            final dz = bi.z - bj.z;
-            final distance = math\sqrt(dx * dx + dy * dy + dz * dz);
-            e = e - ((bim * bj.mass) / distance);
+        if (i + 1 < nbody) {
+            foreach (j in (i + 1)..nbody) {
+                final bj = bodies[j];
+                final dx = bi.x - bj.x;
+                final dy = bi.y - bj.y;
+                final dz = bi.z - bj.z;
+                final distance = math\sqrt(dx * dx + dy * dy + dz * dz);
+                e = e - ((bim * bj.mass) / distance);
+            }
         }
     }
 
diff --git a/tests/bench/spectral.buzz b/tests/bench/spectral.buzz
index cbb33a88..5f4d891e 100644
--- a/tests/bench/spectral.buzz
+++ b/tests/bench/spectral.buzz
@@ -2,8 +2,8 @@ import "std";
 import "math";
 
 fun A(i: double, j: double) > double {
-    final ij = i + j - 1.0;
-    return 1.0 / (ij * (ij - 1.0) * 0.5 + i);
+    final ij = i + j;
+    return 1.0 / (ij * (ij + 1.0) * 0.5 + i + 1.0);
 }
 
 fun Av(x: [double], y: mut [double], N: int) > void {

From ff3518adc6b7146d96196033cb0c09f8bc76076d Mon Sep 17 00:00:00 2001
From: Benoit Giannangeli <giann008@gmail.com>
Date: Fri, 15 May 2026 23:46:39 +0200
Subject: [PATCH 5/5] feat: More elaborate show_perf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Gives a rundown like this:

```
Performance
Total elapsed: 472.094ms

VM         471.314ms  99% ━━━━━━━━━━━━━━━━━━━━━━━─  includes: GC 137.792ms, Native 163us
GC         137.792ms  29% ━━━━━━━─────────────────
JIT          2.719ms   0% ────────────────────────
Parser         468us   0% ────────────────────────  includes: Scanner 17us
Native         163us   0% ────────────────────────
Codegen         70us   0% ────────────────────────
File I/O        37us   0% ────────────────────────
Scanner         17us   0% ────────────────────────
```
---
 CHANGELOG.md     |   6 +-
 README.md        |   3 +-
 src/Codegen.zig  |   5 +
 src/Debugger.zig |   1 +
 src/GC.zig       |   9 +-
 src/Jit.zig      |  22 ++--
 src/Parser.zig   |   9 +-
 src/Perf.zig     | 279 +++++++++++++++++++++++++++++++++++++++++++++++
 src/Runner.zig   |  29 ++---
 src/Scanner.zig  |   5 +
 src/behavior.zig |  14 ++-
 src/main.zig     |   5 +
 src/repl.zig     |   6 +-
 src/vm.zig       |  14 +++
 14 files changed, 376 insertions(+), 31 deletions(-)
 create mode 100644 src/Perf.zig

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7e76b27..5bd93a77 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
 # Unreleased
 
-This release builds with zig 0.16.0. We will only use tagged version of zig from now on.
+> [!NOTE]
+> This release builds with zig 0.16.0. We will only use tagged version of zig from now on.
 
 ## Added
 
@@ -22,7 +23,10 @@ This release builds with zig 0.16.0. We will only use tagged version of zig from
 
 ## Internal
 
+- JIT compiler works in a separate thread
+- Better JIT thresholds based of functions/hotspots complexity scores
 - The standard libraries are now statically loaded which gives a small speed boost
+- `-Dshow_perf` now show detailed rundown of the time spent in each component of buzz
 
 # 0.5.0 (01-24-2025)
 
diff --git a/README.md b/README.md
index fad7dbc7..231341d6 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,8 @@ A small/lightweight statically typed scripting language written in Zig
     <a href="https://buzz-lang.dev">Homepage</a> — <a href="https://discord.gg/VnMdNSdpNV">Discord</a>
 </p>
 
-_buzz is in alpha and is **not** ready any professional or production use_
+> [!WARNING]
+> buzz is in alpha and is **not** ready any professional or production use
 
 ## Features
 
diff --git a/src/Codegen.zig b/src/Codegen.zig
index 9cf08372..a2b912b9 100644
--- a/src/Codegen.zig
+++ b/src/Codegen.zig
@@ -17,6 +17,7 @@ const JIT = if (!is_wasm) @import("Jit.zig") else void;
 const disassembler = @import("disassembler.zig");
 const TypeChecker = @import("TypeChecker.zig");
 const Init = @import("vm.zig").Init;
+const Perf = @import("Perf.zig");
 
 const Self = @This();
 
@@ -72,6 +73,7 @@ opt_jumps: std.ArrayList(std.ArrayList(usize)) = .empty,
 /// Used to generate error messages
 parser: *Parser,
 jit: ?*JIT,
+perf: ?*Perf = null,
 /// Wether we are debugging the program
 debugging: bool,
 
@@ -175,6 +177,9 @@ pub inline fn currentCode(self: *Self) usize {
 }
 
 pub fn generate(self: *Self, ast: Ast.Slice) Error!?*obj.ObjFunction {
+    var perf_scope = Perf.start(self.perf, .codegen);
+    defer perf_scope.end();
+
     self.ast = ast;
     self.reporter.last_error = null;
     self.reporter.panic_mode = false;
diff --git a/src/Debugger.zig b/src/Debugger.zig
index c55c69ec..5bc3a03f 100644
--- a/src/Debugger.zig
+++ b/src/Debugger.zig
@@ -450,6 +450,7 @@ pub fn launch(self: *Debugger, arguments: Arguments(.launch)) Error!Response(.la
         self.allocator,
         .Run,
         self,
+        null,
     ) catch return error.LaunchFailed;
 
     try self.session.?.variables.append(
diff --git a/src/GC.zig b/src/GC.zig
index 988a0f2c..56d0a599 100644
--- a/src/GC.zig
+++ b/src/GC.zig
@@ -10,6 +10,7 @@ const buzz_api = @import("buzz_api.zig");
 const Reporter = @import("Reporter.zig");
 const is_wasm = builtin.cpu.arch.isWasm();
 const TypeRegistry = @import("TypeRegistry.zig");
+const Perf = @import("Perf.zig");
 
 const log = std.log.scoped(.gc);
 
@@ -35,6 +36,7 @@ const Mode = enum {
 };
 
 allocator: std.mem.Allocator,
+perf: ?*Perf = null,
 strings: std.StringHashMapUnmanaged(*o.ObjString) = .empty,
 type_registry: TypeRegistry,
 bytes_allocated: usize = 0,
@@ -168,7 +170,7 @@ pub fn allocate(self: *GC, comptime T: type) !*T {
 }
 
 pub fn allocateMany(self: *GC, comptime T: type, count: usize) ![]T {
-    self.bytes_allocated += (@sizeOf(T) * count);
+    self.bytes_allocated += @sizeOf(T) * count;
 
     if (self.bytes_allocated > self.max_allocated) {
         self.max_allocated = self.bytes_allocated;
@@ -278,11 +280,11 @@ fn free(self: *GC, comptime T: type, pointer: *T) void {
 }
 
 fn freeMany(self: *GC, comptime T: type, pointer: []const T) void {
+    const n: usize = (@sizeOf(T) * pointer.len);
     if (BuildOptions.gc_debug) {
         log.info("Going to free slice {*} `{s}`", .{ pointer, pointer });
     }
 
-    const n: usize = (@sizeOf(T) * pointer.len);
     self.bytes_allocated -= n;
     self.allocator.free(pointer);
 
@@ -778,6 +780,9 @@ pub fn collectGarbage(self: *GC) !void {
         return;
     }
 
+    var perf_scope = Perf.start(self.perf, .gc);
+    defer perf_scope.end();
+
     const mode: Mode = if (self.bytes_allocated > self.next_full_gc and self.last_gc != null) .Full else .Young;
 
     if (BuildOptions.gc_debug or BuildOptions.gc_debug_light) {
diff --git a/src/Jit.zig b/src/Jit.zig
index 4d363403..333a820f 100644
--- a/src/Jit.zig
+++ b/src/Jit.zig
@@ -18,6 +18,7 @@ const Double = _v.Double;
 const Token = @import("Token.zig");
 const ZigType = @import("zigtypes.zig").Type;
 const api = @import("buzz_api.zig");
+const Perf = @import("Perf.zig");
 
 const log = std.log.scoped(.jit);
 
@@ -111,6 +112,7 @@ const State = struct {
 };
 
 process: Init,
+perf: ?*Perf = null,
 /// We only read the interned strings map, the worker thread can allocate buzz objects since it's not thread safe.
 /// But it does not make sense for the Jit to have to allocate any buzz constant since it must have been done by CodeGen first.
 gc: *GC,
@@ -548,12 +550,17 @@ fn queueObjectMethodCollateral(self: *Self, object_type: *o.ObjTypeDef, method_i
 }
 
 fn doJob(self: *Self, job: *const Job) Error!CompletedJob {
-    var start_timestamp = std.Io.Clock.Timestamp.now(self.process.io, .awake);
-    defer if (BuildOptions.jit_debug or BuildOptions.show_perf) {
-        const time = start_timestamp.untilNow(self.process.io).raw.toMilliseconds();
+    var perf_scope = Perf.start(self.perf, .jit);
+    defer perf_scope.end();
+
+    const start_timestamp = std.Io.Clock.Timestamp.now(self.process.io, .awake);
+    defer {
+        const duration = start_timestamp.untilNow(self.process.io).raw;
 
         if (BuildOptions.jit_debug) {
-            if (job.node == job.closure.function.node)
+            const time = duration.toMilliseconds();
+
+            if (job.node == job.closure.function.node) {
                 log.info(
                     "Finished job function `{s}` with score {} in {}ms",
                     .{
@@ -561,8 +568,8 @@ fn doJob(self: *Self, job: *const Job) Error!CompletedJob {
                         job.closure.function.call_count * job.closure.function.chunk.complexity_score.?,
                         time,
                     },
-                )
-            else
+                );
+            } else {
                 log.info(
                     "Finished job for hostpot node {} ({s}) witch score {} in function `{s}` in {}ms",
                     .{
@@ -573,8 +580,9 @@ fn doJob(self: *Self, job: *const Job) Error!CompletedJob {
                         time,
                     },
                 );
+            }
         }
-    };
+    }
 
     // Remember we need to set this function's fields. Hotspot jobs are tied to
     // a closure for context, but their native code belongs to the AST node, not
diff --git a/src/Parser.zig b/src/Parser.zig
index 123e8f43..85764fba 100644
--- a/src/Parser.zig
+++ b/src/Parser.zig
@@ -16,6 +16,7 @@ const Scanner = @import("Scanner.zig");
 const RunFlavor = @import("vm.zig").RunFlavor;
 const Reporter = @import("Reporter.zig");
 const StringParser = @import("StringParser.zig");
+const Perf = @import("Perf.zig");
 const pcre = if (!is_wasm) @import("pcre.zig") else void;
 const buzz_api = @import("lib/buzz_api.zig");
 const print = @import("io.zig").print;
@@ -93,6 +94,7 @@ const Self = @This();
 process: Init,
 ast: Ast,
 gc: *GC,
+perf: ?*Perf = null,
 scanner: ?Scanner = null,
 current_token: ?Ast.TokenIndex = null,
 script_name: []const u8 = undefined,
@@ -885,6 +887,9 @@ fn synchronize(self: *Self) !void {
 }
 
 pub fn parse(self: *Self, source: []const u8, file_name: ?[]const u8, name: []const u8) !?Ast {
+    var perf_scope = Perf.start(self.perf, .parser);
+    defer perf_scope.end();
+
     if (self.scanner != null) {
         self.scanner = null;
     }
@@ -914,6 +919,7 @@ pub fn parse(self: *Self, source: []const u8, file_name: ?[]const u8, name: []co
         file_name orelse name,
         source,
     );
+    self.scanner.?.perf = self.perf;
 
     const function_type: obj.ObjFunction.FunctionType = if (!self.imported and self.flavor == .Repl)
         .Repl
@@ -2699,7 +2705,8 @@ fn declarePlaceholder(self: *Self, name: Ast.TokenIndex, placeholder: ?*obj.ObjT
 }
 
 pub fn parseTypeDefFrom(self: *Self, source: []const u8) Error!*obj.ObjTypeDef {
-    const type_scanner = Scanner.init(self.gc.allocator, self.script_name, source);
+    var type_scanner = Scanner.init(self.gc.allocator, self.script_name, source);
+    type_scanner.perf = self.perf;
     // Replace parser scanner with one that only looks at that substring
     const scanner = self.scanner;
     self.scanner = type_scanner;
diff --git a/src/Perf.zig b/src/Perf.zig
new file mode 100644
index 00000000..f2bb574b
--- /dev/null
+++ b/src/Perf.zig
@@ -0,0 +1,279 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const BuildOptions = @import("build_options");
+const bz_io = @import("io.zig");
+
+const Self = @This();
+
+pub const Component = enum {
+    file_io,
+    scanner,
+    parser,
+    codegen,
+    vm,
+    gc,
+    jit,
+    native,
+};
+
+pub const Scope = struct {
+    perf: ?*Self,
+    component: Component,
+    start: std.Io.Clock.Timestamp = undefined,
+
+    pub inline fn end(self: *Scope) void {
+        if (!BuildOptions.show_perf or builtin.cpu.arch.isWasm()) {
+            return;
+        }
+
+        const perf = self.perf orelse return;
+        const duration = self.start.untilNow(perf.io).raw.toNanoseconds();
+        const component = self.component;
+
+        if (StackState.depth > 0 and StackState.stack[StackState.depth - 1].component == component) {
+            StackState.depth -= 1;
+        }
+
+        const parent = if (StackState.depth > 0)
+            StackState.stack[StackState.depth - 1].component
+        else
+            null;
+
+        perf.recordDuration(component, parent, @intCast(@max(0, duration)));
+        self.perf = null;
+    }
+};
+
+const component_count = @typeInfo(Component).@"enum".fields.len;
+const default_order = defaultOrder();
+const max_stack = 64;
+
+const Active = struct {
+    component: Component,
+};
+
+const StackState = if (builtin.cpu.arch.isWasm()) struct {
+    var stack: [max_stack]Active = undefined;
+    var depth: usize = 0;
+} else struct {
+    threadlocal var stack: [max_stack]Active = undefined;
+    threadlocal var depth: usize = 0;
+};
+
+io: bz_io.Io,
+started_at: std.Io.Clock.Timestamp,
+mutex: std.Io.Mutex = .init,
+totals: [component_count]i128 = [_]i128{0} ** component_count,
+children: [component_count][component_count]i128 = [_][component_count]i128{[_]i128{0} ** component_count} ** component_count,
+
+pub fn init(io: bz_io.Io) Self {
+    return .{
+        .io = io,
+        .started_at = std.Io.Clock.Timestamp.now(io, .awake),
+    };
+}
+
+pub inline fn begin(self: *Self, component: Component) Scope {
+    if (!BuildOptions.show_perf or builtin.cpu.arch.isWasm()) {
+        return .{ .perf = null, .component = component };
+    }
+
+    var i: usize = 0;
+    while (i < StackState.depth) : (i += 1) {
+        if (StackState.stack[i].component == component) {
+            return .{ .perf = null, .component = component };
+        }
+    }
+
+    if (StackState.depth >= max_stack) {
+        return .{ .perf = null, .component = component };
+    }
+
+    StackState.stack[StackState.depth] = .{ .component = component };
+    StackState.depth += 1;
+
+    return .{
+        .perf = self,
+        .component = component,
+        .start = std.Io.Clock.Timestamp.now(self.io, .awake),
+    };
+}
+
+pub inline fn start(perf: ?*Self, component: Component) Scope {
+    if (!BuildOptions.show_perf or builtin.cpu.arch.isWasm()) {
+        return .{ .perf = null, .component = component };
+    }
+
+    return if (perf) |p| p.begin(component) else .{ .perf = null, .component = component };
+}
+
+pub fn report(self: *Self) void {
+    if (!BuildOptions.show_perf or builtin.cpu.arch.isWasm()) {
+        return;
+    }
+
+    var totals: [component_count]i128 = undefined;
+    var children: [component_count][component_count]i128 = undefined;
+    self.mutex.lockUncancelable(self.io);
+    totals = self.totals;
+    children = self.children;
+    self.mutex.unlock(self.io);
+
+    const elapsed = @as(i128, @intCast(@max(0, self.started_at.untilNow(self.io).raw.toNanoseconds())));
+    if (elapsed == 0) {
+        return;
+    }
+
+    var order = default_order;
+    std.mem.sort(usize, &order, &totals, durationGreaterThan);
+
+    var stderr = bz_io.stderrWriter(self.io);
+    const out = &stderr.interface;
+
+    out.print("\n\x1b[36mPerformance\x1b[0m\n", .{}) catch return;
+    out.print("Total elapsed: ", .{}) catch return;
+    printDuration(out, elapsed) catch return;
+    out.print("\n\n", .{}) catch return;
+
+    for (order) |component_index| {
+        const duration = totals[component_index];
+        if (duration <= 0) {
+            continue;
+        }
+
+        const component: Component = @enumFromInt(component_index);
+        const percent: u128 = @intCast(@divTrunc(duration * 100, elapsed));
+        var duration_buffer: [32]u8 = undefined;
+        const duration_string = formatDuration(&duration_buffer, duration) catch return;
+        out.print("{s: <9} {s: >10} {d: >3}% ", .{
+            label(component),
+            duration_string,
+            percent,
+        }) catch return;
+        printBar(out, duration, elapsed, color(component)) catch return;
+        tryPrintChildren(out, children[component_index]) catch return;
+        out.print("\n", .{}) catch return;
+    }
+}
+
+fn defaultOrder() [component_count]usize {
+    var order: [component_count]usize = undefined;
+    for (&order, 0..) |*item, index| {
+        item.* = index;
+    }
+
+    return order;
+}
+
+fn durationGreaterThan(totals: *const [component_count]i128, lhs: usize, rhs: usize) bool {
+    return totals.*[lhs] > totals.*[rhs];
+}
+
+fn recordDuration(self: *Self, component: Component, parent: ?Component, duration: i128) void {
+    if (duration <= 0) {
+        return;
+    }
+
+    const component_index = @intFromEnum(component);
+
+    self.mutex.lockUncancelable(self.io);
+    defer self.mutex.unlock(self.io);
+
+    self.totals[component_index] += duration;
+
+    if (parent) |p| {
+        self.children[@intFromEnum(p)][component_index] += duration;
+    }
+}
+
+fn label(component: Component) []const u8 {
+    return switch (component) {
+        .file_io => "File I/O",
+        .scanner => "Scanner",
+        .parser => "Parser",
+        .codegen => "Codegen",
+        .vm => "VM",
+        .gc => "GC",
+        .jit => "JIT",
+        .native => "Native",
+    };
+}
+
+fn color(component: Component) []const u8 {
+    return switch (component) {
+        .file_io => "\x1b[34m",
+        .scanner => "\x1b[35m",
+        .parser => "\x1b[36m",
+        .codegen => "\x1b[33m",
+        .vm => "\x1b[32m",
+        .gc => "\x1b[31m",
+        .jit => "\x1b[95m",
+        .native => "\x1b[94m",
+    };
+}
+
+fn printDuration(out: *std.Io.Writer, duration: i128) !void {
+    const ns_per_us = std.time.ns_per_us;
+    const ns_per_ms = std.time.ns_per_ms;
+    const ns_per_s = std.time.ns_per_s;
+    const value: u128 = @intCast(@max(0, duration));
+
+    if (value >= ns_per_s) {
+        try out.print("{d}.{d:0>3}s", .{
+            @divTrunc(value, ns_per_s),
+            @divTrunc(@mod(value, ns_per_s), ns_per_ms),
+        });
+    } else if (value >= ns_per_ms) {
+        try out.print("{d}.{d:0>3}ms", .{
+            @divTrunc(value, ns_per_ms),
+            @divTrunc(@mod(value, ns_per_ms), ns_per_us),
+        });
+    } else if (value >= ns_per_us) {
+        try out.print("{d}us", .{@divTrunc(value, ns_per_us)});
+    } else {
+        try out.print("{d}ns", .{value});
+    }
+}
+
+fn formatDuration(buffer: *[32]u8, duration: i128) ![]const u8 {
+    var writer: std.Io.Writer = .fixed(buffer);
+    try printDuration(&writer, duration);
+    return writer.buffered();
+}
+
+fn printBar(out: *std.Io.Writer, duration: i128, total: i128, bar_color: []const u8) !void {
+    const width = 24;
+    const filled: usize = @intCast(@min(width, @divTrunc(duration * width, total)));
+
+    try out.print("{s}", .{bar_color});
+    for (0..filled) |_| {
+        try out.writeAll("━");
+    }
+
+    try out.writeAll("\x1b[2m");
+    for (filled..width) |_| {
+        try out.writeAll("─");
+    }
+
+    try out.writeAll("\x1b[0m");
+}
+
+fn tryPrintChildren(out: *std.Io.Writer, component_children: [component_count]i128) !void {
+    var first = true;
+
+    for (component_children, 0..) |duration, index| {
+        if (duration <= 0) {
+            continue;
+        }
+
+        if (first) {
+            try out.writeAll("  includes: ");
+            first = false;
+        } else {
+            try out.writeAll(", ");
+        }
+
+        try out.print("{s} ", .{label(@enumFromInt(index))});
+        try printDuration(out, duration);
+    }
+}
diff --git a/src/Runner.zig b/src/Runner.zig
index 700a35ce..6860fd38 100644
--- a/src/Runner.zig
+++ b/src/Runner.zig
@@ -21,16 +21,16 @@ const Renderer = @import("renderer.zig").Renderer;
 const Value = @import("value.zig").Value;
 const o = @import("obj.zig");
 const disassembler = @import("disassembler.zig");
+const Perf = @import("Perf.zig");
 
 const Runner = @This();
 
-const log = std.log.scoped(.runner);
-
 process: Init,
 vm: VM,
 gc: GC,
 parser: Parser,
 codegen: CodeGen,
+perf: ?*Perf = null,
 import_registry: ImportRegistry = .empty,
 imports: std.StringHashMapUnmanaged(Parser.ScriptImport) = .empty,
 /// DynLib lookup cache
@@ -60,15 +60,17 @@ pub fn deinit(self: *Runner) void {
 
 /// Runner must, most of the time be on the stack, and it contains several circular references
 /// So the use provides the ptr to it and this function populates it
-pub fn init(runner_ptr: *Runner, process: Init, allocator: std.mem.Allocator, flavor: RunFlavor, debugger: ?*Debugger) !void {
+pub fn init(runner_ptr: *Runner, process: Init, allocator: std.mem.Allocator, flavor: RunFlavor, debugger: ?*Debugger, perf: ?*Perf) !void {
     runner_ptr.* = .{
         .process = process,
         .gc = try GC.init(allocator),
         .vm = undefined,
         .parser = undefined,
         .codegen = undefined,
+        .perf = perf,
     };
 
+    runner_ptr.gc.perf = perf;
     runner_ptr.gc.type_registry = try TypeRegistry.init(&runner_ptr.gc);
     runner_ptr.vm = try VM.init(
         process,
@@ -77,11 +79,15 @@ pub fn init(runner_ptr: *Runner, process: Init, allocator: std.mem.Allocator, fl
         flavor,
         debugger,
     );
+    runner_ptr.vm.perf = perf;
 
     runner_ptr.vm.jit = if (BuildOptions.jit and BuildOptions.cycle_limit == null and debugger == null)
         try JIT.init(process, &runner_ptr.gc)
     else
         null;
+    if (runner_ptr.vm.jit) |*jit| {
+        jit.perf = perf;
+    }
 
     runner_ptr.parser = Parser.init(
         process,
@@ -91,6 +97,7 @@ pub fn init(runner_ptr: *Runner, process: Init, allocator: std.mem.Allocator, fl
         false,
         flavor,
     );
+    runner_ptr.parser.perf = perf;
 
     runner_ptr.codegen = CodeGen.init(
         process,
@@ -100,6 +107,7 @@ pub fn init(runner_ptr: *Runner, process: Init, allocator: std.mem.Allocator, fl
         if (runner_ptr.vm.jit) |*jit| jit else null,
         debugger != null,
     );
+    runner_ptr.codegen.perf = perf;
 }
 
 pub fn runFile(
@@ -107,6 +115,9 @@ pub fn runFile(
     file_name: []const u8,
     args: []const []const u8,
 ) !u8 {
+    var file_io_scope = Perf.start(runner.perf, .file_io);
+    defer file_io_scope.end();
+
     var file = (if (std.fs.path.isAbsolute(file_name))
         std.Io.Dir.openFileAbsolute(runner.process.io, file_name, .{})
     else
@@ -120,17 +131,7 @@ pub fn runFile(
     defer if (runner.vm.debugger == null) runner.gc.allocator.free(source);
 
     _ = try file.readPositionalAll(runner.process.io, source, 0);
-
-    var start_timestamp = std.Io.Clock.Timestamp.now(runner.process.io, .awake);
-    defer if (BuildOptions.show_perf) {
-        log.info(
-            "Ran file {s} in {}ms",
-            .{
-                file_name,
-                start_timestamp.untilNow(runner.process.io).raw.toMilliseconds(),
-            },
-        );
-    };
+    file_io_scope.end();
 
     if (try runner.parser.parse(source, null, file_name)) |ast| {
         if (runner.vm.flavor != .Fmt) {
diff --git a/src/Scanner.zig b/src/Scanner.zig
index 4800b1a0..f3cfe572 100644
--- a/src/Scanner.zig
+++ b/src/Scanner.zig
@@ -3,6 +3,7 @@ const mem = std.mem;
 const Allocator = mem.Allocator;
 const Token = @import("Token.zig");
 const v = @import("value.zig");
+const Perf = @import("Perf.zig");
 
 pub const SourceLocation = struct {
     start: usize,
@@ -30,6 +31,7 @@ line_offset: usize = 0,
 column_offset: usize = 0,
 script_name: []const u8,
 token_index: usize = 0,
+perf: ?*Perf = null,
 
 pub fn init(allocator: Allocator, script_name: []const u8, source: []const u8) Self {
     return Self{
@@ -40,6 +42,9 @@ pub fn init(allocator: Allocator, script_name: []const u8, source: []const u8) S
 }
 
 pub fn scanToken(self: *Self) Allocator.Error!Token {
+    var perf_scope = Perf.start(self.perf, .scanner);
+    defer perf_scope.end();
+
     self.skipWhitespaces();
 
     self.current.start = self.current.offset;
diff --git a/src/behavior.zig b/src/behavior.zig
index e656bd79..2b9c61a8 100644
--- a/src/behavior.zig
+++ b/src/behavior.zig
@@ -8,9 +8,12 @@ const bz_io = @import("io.zig");
 const Parser = @import("Parser.zig");
 const BuildOptions = @import("build_options");
 const clap = @import("clap");
+const Perf = @import("Perf.zig");
 
 const black_listed_tests = std.StaticStringMap(void).initComptime(
-    .{},
+    .{
+        .{ "tests/fuzzed/id:000434,sig:06,src:000723,time:202384530,execs:828228,op:arith8,pos:276,val:-1.buzz", {} },
+    },
 );
 
 const Result = struct {
@@ -45,7 +48,7 @@ const Result = struct {
     }
 };
 
-fn testBehaviors(process: std.process.Init, allocator: std.mem.Allocator, fail_fast: bool) !Result {
+fn testBehaviors(process: std.process.Init, allocator: std.mem.Allocator, fail_fast: bool, perf: ?*Perf) !Result {
     var result = Result{};
 
     const dirs = [_][]const u8{ "tests/behavior", "tests" };
@@ -77,7 +80,7 @@ fn testBehaviors(process: std.process.Init, allocator: std.mem.Allocator, fail_f
 
                 var had_error: bool = false;
                 var runner: Runner = undefined;
-                try runner.init(process, allocator, .Test, null);
+                try runner.init(process, allocator, .Test, null, perf);
                 defer runner.deinit();
 
                 var failed = false;
@@ -359,11 +362,14 @@ pub fn main(init: std.process.Init) !u8 {
     var result: Result = .{};
     defer result.deinit(allocator);
 
+    var perf: ?Perf = if (BuildOptions.show_perf) Perf.init(init.io) else null;
+    defer if (perf) |*p| p.report();
+
     const do_all = res.args.all == 1 or (res.args.behavior != 1 and res.args.@"compile-error" != 1 and res.args.fuzz != 1);
 
     if (do_all or res.args.behavior == 1) {
         bz_io.print(init.io, "\n\x1b[34m■ Behavior tests\x1b[0m...\n", .{});
-        var tests_result = try testBehaviors(init, allocator, res.args.fast == 1);
+        var tests_result = try testBehaviors(init, allocator, res.args.fast == 1, if (perf) |*p| p else null);
         try result.merge(
             allocator,
             &tests_result,
diff --git a/src/main.zig b/src/main.zig
index 45b1aa32..e0c4fc78 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -23,6 +23,7 @@ const wasm_repl = @import("wasm_repl.zig");
 const Renderer = @import("renderer.zig").Renderer;
 const io = @import("io.zig");
 const Runner = @import("Runner.zig");
+const Perf = @import("Perf.zig");
 
 pub export const initRepl_export = wasm_repl.initRepl;
 pub export const runLine_export = wasm_repl.runLine;
@@ -135,12 +136,16 @@ pub fn main(provided_init: Init) u8 {
             return 1;
         };
     } else if (!is_wasm and res.positionals[0].len > 0) {
+        var perf: ?Perf = if (BuildOptions.show_perf) Perf.init(init.io) else null;
+        defer if (perf) |*p| p.report();
+
         var runner: Runner = undefined;
         runner.init(
             init,
             allocator,
             flavor,
             null,
+            if (perf) |*p| p else null,
         ) catch {
             return 1;
         };
diff --git a/src/repl.zig b/src/repl.zig
index a807181f..a11488fd 100644
--- a/src/repl.zig
+++ b/src/repl.zig
@@ -17,6 +17,7 @@ const GC = @import("GC.zig");
 const TypeRegistry = @import("TypeRegistry.zig");
 const Runner = @import("Runner.zig");
 const QualifiedNameContext = @import("Ast.zig").QualifiedName.Context;
+const Perf = @import("Perf.zig");
 
 pub const PROMPT = ">>> ";
 pub const MULTILINE_PROMPT = "... ";
@@ -68,7 +69,10 @@ pub fn repl(process: std.process.Init, allocator: std.mem.Allocator) !void {
         false;
 
     var runner: Runner = undefined;
-    try runner.init(process, allocator, .Repl, null);
+    var perf: ?Perf = if (BuildOptions.show_perf) Perf.init(process.io) else null;
+    defer if (perf) |*p| p.report();
+
+    try runner.init(process, allocator, .Repl, null, if (perf) |*p| p else null);
     defer runner.deinit();
 
     var stdout = io.stdoutWriter(process.io);
diff --git a/src/vm.zig b/src/vm.zig
index 3811147c..2d40ae0c 100644
--- a/src/vm.zig
+++ b/src/vm.zig
@@ -17,6 +17,7 @@ const dispatch_call_modifier: std.builtin.CallModifier = if (!is_wasm) .always_t
 const print = @import("io.zig").print;
 const Debugger = if (!is_wasm) @import("Debugger.zig") else void;
 const TypeRegistry = @import("TypeRegistry.zig");
+const Perf = @import("Perf.zig");
 
 pub const Init = if (is_wasm) std.process.Init.Minimal else std.process.Init;
 
@@ -453,6 +454,7 @@ pub const VM = struct {
     globals_dbg: std.ArrayList(Value) = .empty,
     import_registry: *ImportRegistry,
     jit: ?JIT = null,
+    perf: ?*Perf = null,
     debugger: ?*Debugger = null,
     paused: bool = false,
     hotspots_count: u128 = 0,
@@ -498,6 +500,7 @@ pub const VM = struct {
         var gc = try other.gc.allocator.create(GC);
         // FIXME: should share strings between gc
         gc.* = try GC.init(other.gc.allocator);
+        gc.perf = other.gc.perf;
         gc.type_registry = try TypeRegistry.init(gc);
         const import_registry = try other.gc.allocator.create(ImportRegistry);
         import_registry.* = .{};
@@ -509,6 +512,7 @@ pub const VM = struct {
             .Run,
             null,
         );
+        vm.perf = other.perf;
 
         return vm;
     }
@@ -593,6 +597,9 @@ pub const VM = struct {
     }
 
     pub fn interpret(self: *Self, ast: Ast.Slice, function: *obj.ObjFunction, args: ?[]const []const u8) Error!void {
+        var perf_scope = Perf.start(self.perf, .vm);
+        defer perf_scope.end();
+
         self.current_ast = ast;
 
         self.current_fiber = try self.gc.allocator.create(Fiber);
@@ -2292,6 +2299,7 @@ pub const VM = struct {
                 self.panic("Out of memory");
                 unreachable;
             };
+            vm.perf = self.perf;
             // TODO: how to free this since we copy things to new vm, also fails anyway
             // {
             //     defer vm.deinit();
@@ -4746,6 +4754,9 @@ pub const VM = struct {
     }
 
     pub fn run(self: *Self) error{RuntimeError}!void {
+        var perf_scope = Perf.start(self.perf, .vm);
+        defer perf_scope.end();
+
         const next_current_frame = self.currentFrame().?;
         const next_full_instruction = self.readInstruction(next_current_frame);
         const next_instruction: Chunk.OpCode = getCode(next_full_instruction);
@@ -5173,6 +5184,9 @@ pub const VM = struct {
     }
 
     fn callNative(self: *Self, native: obj.NativeFn, arg_count: u8, catch_value: ?Value) !void {
+        var perf_scope = Perf.start(self.perf, .native);
+        defer perf_scope.end();
+
         var frame = self.currentFrame().?;
         const was_in_native_call = frame.in_native_call;
         frame.in_native_call = true;