Skip to content

Commit 8d4e125

Browse files
committed
ZJIT: Centralize recompilation decisions in a state machine
Replace 5 scattered fields on IseqPayload (side_exit_count, defer_count, deferred_stub_hits, no_profile_send_hits, has_inline_feedback) with a RecompileState struct that explicitly models the lifecycle phases: Monitoring, Deferred, and Complete. All recompilation decisions now flow through two methods: - on_signal(): handles runtime events (side exits, inline sends, ivar fallbacks, interpreter/stub entries during deferral) - post_hir_check(): handles compile-time deferral decisions after HIR The three runtime callbacks and two deferral gates in codegen.rs become thin wrappers that report events to the state machine and execute the returned actions. trigger_recompilation becomes execution-only with no decision logic. No behavioral changes — same transitions, same thresholds, same tests.
1 parent ff6c650 commit 8d4e125

3 files changed

Lines changed: 277 additions & 140 deletions

File tree

zjit/src/codegen.rs

Lines changed: 56 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ use crate::invariants::{
2828
track_root_box_assumption, track_single_ractor_assumption,
2929
track_stable_constant_names_assumption,
3030
};
31-
use crate::options::{get_option, rb_zjit_call_threshold, PerfMap};
31+
use crate::options::{get_option, PerfMap};
3232
use crate::payload::{
3333
get_or_create_iseq_payload, IseqCodePtrs, IseqPayload, IseqStatus, IseqVersion, IseqVersionRef,
34+
RecompileAction, RecompileSignal, RecompileState,
3435
};
3536
use crate::profile::ProfiledType;
3637
use crate::state::ZJITState;
@@ -57,34 +58,25 @@ pub extern "C" fn rb_zjit_count_side_exit(payload_raw: *mut std::ffi::c_void) {
5758
return;
5859
}
5960
let payload = unsafe { &mut *(payload_raw as *mut IseqPayload) };
60-
let threshold = get_option!(recompile_threshold) as u64;
61-
if threshold == 0 || payload.side_exit_count >= threshold {
62-
return;
63-
}
64-
payload.side_exit_count += 1;
65-
if payload.side_exit_count == threshold && payload.versions.len() < MAX_ISEQ_VERSIONS {
61+
if let RecompileAction::Recompile { preserve_profiles } = payload
62+
.recompile
63+
.on_signal(RecompileSignal::SideExit, payload.versions.len())
64+
{
6665
let iseq = match payload.versions.last() {
6766
Some(version_ref) => unsafe { version_ref.as_ref() }.iseq,
6867
None => return,
6968
};
7069
with_vm_lock(src_loc!(), || {
71-
trigger_recompilation(payload_raw, iseq, true);
70+
trigger_recompilation(payload_raw, iseq, preserve_profiles);
7271
});
7372
}
7473
}
7574

7675
static GLOBAL_RECOMPILE_COUNT: AtomicU64 = AtomicU64::new(0);
7776

78-
/// Escalating threshold for deferred re-profiling. Higher deferral levels
79-
/// give cold branches progressively more time to warm up.
80-
fn deferred_threshold(defer_count: u32) -> u32 {
81-
match defer_count {
82-
1 => unsafe { rb_zjit_call_threshold as u32 },
83-
2 => 1_000,
84-
_ => 100_000,
85-
}
86-
}
87-
77+
/// Execute a recompilation: check global cap, reset profiles, invalidate version,
78+
/// reset JIT func, re-enable profiling. All decisions about *whether* to recompile
79+
/// are made by RecompileState before this is called.
8880
/// When `preserve_profiles` is true, only counters are reset (type distributions survive).
8981
/// When false, both counters and type distributions are cleared.
9082
fn trigger_recompilation(
@@ -115,10 +107,8 @@ fn trigger_recompilation(
115107
payload.profile.reset_for_recompile();
116108
}
117109

118-
// Reset deferral state so V2 compilation goes straight to building the HIR.
119-
// If the HIR still has unresolved issues, the post-HIR deferral trigger handles escalation.
120-
payload.defer_count = 0;
121-
payload.deferred_stub_hits = 0;
110+
// Reset the state machine so V2 compilation goes straight to the HIR check.
111+
payload.recompile.reset_after_trigger();
122112

123113
if let Some(version) = payload.versions.last_mut() {
124114
let version = unsafe { version.as_mut() };
@@ -129,9 +119,6 @@ fn trigger_recompilation(
129119
}
130120

131121
/// Runtime helper called from JIT code to collect inline type feedback for NoProfile sends.
132-
/// When a NoProfile send executes, this records the receiver's class into the profiling data
133-
/// structure. After enough observations, triggers recompilation so the previously-NoProfile
134-
/// sends compile to direct calls using the collected type data.
135122
#[unsafe(no_mangle)]
136123
pub extern "C" fn rb_zjit_inline_profile_send(
137124
payload_raw: *mut std::ffi::c_void,
@@ -145,29 +132,28 @@ pub extern "C" fn rb_zjit_inline_profile_send(
145132
let payload = unsafe { &mut *(payload_raw as *mut IseqPayload) };
146133
let insn_idx = insn_idx as usize;
147134

148-
let threshold = (get_option!(recompile_threshold) as u64) / 2;
149-
if threshold == 0 || payload.no_profile_send_hits >= threshold {
150-
return;
151-
}
152-
153-
payload.no_profile_send_hits += 1;
154-
155-
if payload.no_profile_send_hits == threshold && payload.versions.len() < MAX_ISEQ_VERSIONS {
156-
if !payload.profile.inline_feedback_is_high_quality() {
135+
let quality_ok = payload.profile.inline_feedback_is_high_quality();
136+
let action = payload.recompile.on_signal(
137+
RecompileSignal::InlineSend { quality_ok },
138+
payload.versions.len(),
139+
);
140+
match action {
141+
RecompileAction::Recompile { preserve_profiles } => {
142+
let iseq = match payload.versions.last() {
143+
Some(version_ref) => unsafe { version_ref.as_ref() }.iseq,
144+
None => return,
145+
};
146+
with_vm_lock(src_loc!(), || {
147+
trigger_recompilation(payload_raw, iseq, preserve_profiles);
148+
});
157149
return;
158150
}
159-
payload.has_inline_feedback = true;
160-
let iseq = match payload.versions.last() {
161-
Some(version_ref) => unsafe { version_ref.as_ref() }.iseq,
162-
None => return,
163-
};
164-
with_vm_lock(src_loc!(), || {
165-
trigger_recompilation(payload_raw, iseq, true);
166-
});
167-
return;
151+
RecompileAction::Ignore => return,
152+
_ => {}
168153
}
169154

170-
const INLINE_PROFILE_LIMIT: u32 = 5;
155+
// Type recording stays outside the state machine (data collection, not a decision).
156+
const INLINE_PROFILE_LIMIT: u16 = 5;
171157
if payload.profile.num_profiles_for(insn_idx) >= INLINE_PROFILE_LIMIT {
172158
return;
173159
}
@@ -184,28 +170,23 @@ pub extern "C" fn rb_zjit_inline_profile_send(
184170
}
185171

186172
/// Lightweight runtime helper for not_monomorphic ivar fallbacks.
187-
/// Only increments the recompilation trigger counter — no type recording.
188173
#[unsafe(no_mangle)]
189174
pub extern "C" fn rb_zjit_count_ivar_fallback(payload_raw: *mut std::ffi::c_void) {
190175
if payload_raw.is_null() {
191176
return;
192177
}
193178
let payload = unsafe { &mut *(payload_raw as *mut IseqPayload) };
194179

195-
let threshold = get_option!(recompile_threshold) as u64;
196-
if threshold == 0 || payload.no_profile_send_hits >= threshold {
197-
return;
198-
}
199-
200-
payload.no_profile_send_hits += 1;
201-
202-
if payload.no_profile_send_hits == threshold && payload.versions.len() < MAX_ISEQ_VERSIONS {
180+
if let RecompileAction::Recompile { preserve_profiles } = payload
181+
.recompile
182+
.on_signal(RecompileSignal::IvarFallback, payload.versions.len())
183+
{
203184
let iseq = match payload.versions.last() {
204185
Some(version_ref) => unsafe { version_ref.as_ref() }.iseq,
205186
None => return,
206187
};
207188
with_vm_lock(src_loc!(), || {
208-
trigger_recompilation(payload_raw, iseq, true);
189+
trigger_recompilation(payload_raw, iseq, preserve_profiles);
209190
});
210191
}
211192
}
@@ -391,20 +372,21 @@ fn gen_iseq_entry_point(
391372
return Err(CompileError::ExceptionHandler);
392373
}
393374

394-
// If this ISEQ is in a deferred re-profiling window, don't compile yet.
395-
// Count this interpreter entry toward the threshold and keep the ISEQ
396-
// running in the interpreter with profiling active. Both interpreter
397-
// entries and stub fallbacks count toward the same escalating threshold.
375+
// Check the recompilation state machine for deferral.
398376
{
399377
let payload = get_or_create_iseq_payload(iseq);
400-
if payload.defer_count > 0 {
401-
let threshold = deferred_threshold(payload.defer_count);
402-
if payload.deferred_stub_hits < threshold {
403-
let call_threshold = unsafe { rb_zjit_call_threshold as u32 };
404-
payload.deferred_stub_hits += call_threshold;
378+
let call_threshold = unsafe { crate::options::rb_zjit_call_threshold as u32 };
379+
match payload.recompile.on_signal(
380+
RecompileSignal::Entry {
381+
credit: call_threshold,
382+
},
383+
payload.versions.len(),
384+
) {
385+
RecompileAction::DeferToInterpreter => {
405386
unsafe { rb_iseq_reset_jit_func(iseq) };
406387
return Err(CompileError::DeferredForReprofiling);
407388
}
389+
_ => {}
408390
}
409391
}
410392

@@ -415,29 +397,21 @@ fn gen_iseq_entry_point(
415397
})
416398
})?;
417399

418-
// Adaptive deferral for recompilations. First compilations never defer.
419-
// For recompilations (latest version invalidated), if the HIR has a
420-
// significant fraction of unresolved sends or any unresolved ivars,
421-
// defer for 1K interpreter calls to exercise cold branches.
422-
// A single dead-branch NoProfile send does NOT trigger deferral —
423-
// ISEQs where most sends are well-profiled compile immediately.
400+
// Post-HIR quality check: decide if recompilation should be deferred.
424401
if get_option!(recompile_threshold) > 0 {
425402
let payload = get_or_create_iseq_payload(iseq);
426403
let is_recompile = payload
427404
.versions
428405
.last()
429406
.map(|v| unsafe { v.as_ref() }.status == IseqStatus::Invalidated)
430407
.unwrap_or(false);
431-
// Use ratio-based check for sends: only defer if >25% of sends lack profiles.
432408
let (no_profile_sends, total_sends) = function.count_no_profile_sends();
433409
let sends_need_deferral = total_sends > 0 && no_profile_sends * 4 > total_sends;
434410
let has_unresolved = sends_need_deferral || function.has_not_monomorphic_ivars();
435-
let skip_deferral = payload.has_inline_feedback;
436-
if is_recompile && payload.defer_count < 2 && has_unresolved && !skip_deferral {
437-
payload.defer_count = 2; // level 2: deferred_threshold(2) = 1K calls
438-
payload.deferred_stub_hits = 0;
439-
// Preserve inline feedback — only reset counters so the interpreter
440-
// adds observations on top during the 1K-call deferral window.
411+
if let RecompileAction::Defer = payload
412+
.recompile
413+
.post_hir_check(is_recompile, has_unresolved)
414+
{
441415
payload.profile.reset_counters_for_recompile();
442416
unsafe { rb_zjit_profile_enable(iseq) };
443417
unsafe { rb_iseq_reset_jit_func(iseq) };
@@ -2701,7 +2675,8 @@ fn gen_guarded_inline_profile(
27012675

27022676
asm_comment!(asm, "guard: skip inline profiling if self-disabled");
27032677
let payload_addr = asm.load(Opnd::UImm(jit.payload_ptr as u64));
2704-
let offset = std::mem::offset_of!(crate::payload::IseqPayload, no_profile_send_hits) as i32;
2678+
let offset = (std::mem::offset_of!(crate::payload::IseqPayload, recompile)
2679+
+ std::mem::offset_of!(RecompileState, no_profile_send_hits)) as i32;
27052680
let hits = asm.load(Opnd::mem(64, payload_addr, offset));
27062681
asm.cmp(hits, Opnd::UImm(threshold));
27072682
asm.jge(jit, skip_edge());
@@ -4457,32 +4432,14 @@ c_callable! {
44574432
let cb = ZJITState::get_code_block();
44584433
let payload = get_or_create_iseq_payload(iseq);
44594434

4460-
// If this ISEQ is being re-profiled after deferral, fall back to
4461-
// the interpreter — the zjit_* profiling instructions are active
4462-
// and collect type data on each fallback. The threshold escalates
4463-
// with each deferral level to give cold branches progressively more
4464-
// time to warm up. This gate fires for both first-compilation deferrals
4465-
// (versions empty) and inline-triggered recompilation deferrals
4466-
// (latest version invalidated).
4467-
let latest_invalidated = payload.versions.last()
4468-
.map(|v| unsafe { v.as_ref() }.status == IseqStatus::Invalidated)
4469-
.unwrap_or(false);
4470-
if payload.defer_count > 0 && (payload.versions.is_empty() || latest_invalidated) {
4471-
// Count stub hits toward the deferral threshold for BOTH initial
4472-
// deferrals (versions empty) and recompilation deferrals (latest
4473-
// invalidated). Previously, recompilation deferrals returned the
4474-
// exit trampoline unconditionally without counting, causing the
4475-
// method to stay in the interpreter indefinitely — a catastrophic
4476-
// overhead for hot methods (addressable-merge lost 2.5s).
4477-
let threshold = deferred_threshold(payload.defer_count);
4478-
payload.deferred_stub_hits += 1;
4479-
if payload.deferred_stub_hits <= threshold {
4480-
// Still collecting profile data — fall back to interpreter
4435+
// Check the recompilation state machine for deferral.
4436+
match payload.recompile.on_signal(RecompileSignal::Entry { credit: 1 }, payload.versions.len()) {
4437+
RecompileAction::DeferToInterpreter => {
44814438
unsafe { Rc::increment_strong_count(iseq_call_ptr as *const IseqCall); }
44824439
prepare_for_exit(iseq, cfp, sp, &CompileError::DeferredForReprofiling);
44834440
return ZJITState::get_exit_trampoline().raw_ptr(cb);
44844441
}
4485-
// Enough profile data collected — fall through to compile
4442+
_ => {}
44864443
}
44874444

44884445
let last_status = payload.versions.last().map(|version| &unsafe { version.as_ref() }.status);

0 commit comments

Comments
 (0)