From 20113a228d2098a4ee61a2466c809a1081b0e403 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Tue, 20 Jan 2026 12:16:20 +0100 Subject: [PATCH 1/4] Handle `on_sp` in `syntax_suggest` when using prism It used to not emit this token type, but now it does. So when a newer version of prism is present, we can fall back to the same code that ripper uses. --- lib/syntax_suggest/code_line.rb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/syntax_suggest/code_line.rb b/lib/syntax_suggest/code_line.rb index 58197e95d0f17c..892c273c4123cb 100644 --- a/lib/syntax_suggest/code_line.rb +++ b/lib/syntax_suggest/code_line.rb @@ -180,10 +180,13 @@ def ignore_newline_not_beg? # EOM # expect(lines.first.trailing_slash?).to eq(true) # - if SyntaxSuggest.use_prism_parser? + if SyntaxSuggest.use_prism_parser? && Prism::VERSION <= "1.8.0" + # Older versions of prism didn't correctly emit on_sp def trailing_slash? last = @lex.last - last&.type == :on_tstring_end + return false unless last + + last.type == :on_tstring_end || (last.type == :on_sp && last.token == TRAILING_SLASH) end else def trailing_slash? From 2842e61c92022c475ddea220f6ab1d7c5f441203 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Tue, 20 Jan 2026 12:16:40 +0100 Subject: [PATCH 2/4] Reapply "[ruby/prism] Add Ripper :on_sp events for Prism.lex_compat and Prism::Translation::Ripper" This reverts commit 58f1127b51cf4fbb1f334f8701a041f40701dca2. --- lib/prism.rb | 8 +- lib/prism/lex_compat.rb | 101 ++++++++++++++++++++-- lib/prism/lex_ripper.rb | 2 - test/prism/fixtures/bom_leading_space.txt | 1 + test/prism/fixtures/bom_spaces.txt | 1 + test/prism/ruby/ripper_test.rb | 12 ++- 6 files changed, 106 insertions(+), 19 deletions(-) create mode 100644 test/prism/fixtures/bom_leading_space.txt create mode 100644 test/prism/fixtures/bom_spaces.txt diff --git a/lib/prism.rb b/lib/prism.rb index d809557fce101f..dab3420377214f 100644 --- a/lib/prism.rb +++ b/lib/prism.rb @@ -61,8 +61,7 @@ def initialize(version) # Prism::lex_compat(source, **options) -> LexCompat::Result # # Returns a parse result whose value is an array of tokens that closely - # resembles the return value of Ripper::lex. The main difference is that the - # `:on_sp` token is not emitted. + # resembles the return value of Ripper::lex. # # For supported options, see Prism::parse. def self.lex_compat(source, **options) @@ -72,9 +71,8 @@ def self.lex_compat(source, **options) # :call-seq: # Prism::lex_ripper(source) -> Array # - # This lexes with the Ripper lex. It drops any space events but otherwise - # returns the same tokens. Raises SyntaxError if the syntax in source is - # invalid. + # This wraps the result of Ripper.lex. It produces almost exactly the + # same tokens. Raises SyntaxError if the syntax in source is invalid. def self.lex_ripper(source) LexRipper.new(source).result # steep:ignore end diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index f7b9a0effc969d..597e63c73e73b7 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -226,7 +226,7 @@ def state end # Tokens where state should be ignored - # used for :on_comment, :on_heredoc_end, :on_embexpr_end + # used for :on_sp, :on_comment, :on_heredoc_end, :on_embexpr_end class IgnoreStateToken < Token def ==(other) # :nodoc: self[0...-1] == other[0...-1] @@ -611,10 +611,10 @@ def self.build(opening) BOM_FLUSHED = RUBY_VERSION >= "3.3.0" private_constant :BOM_FLUSHED - attr_reader :source, :options + attr_reader :options - def initialize(source, **options) - @source = source + def initialize(code, **options) + @code = code @options = options end @@ -624,12 +624,14 @@ def result state = :default heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]] - result = Prism.lex(source, **options) + result = Prism.lex(@code, **options) + source = result.source result_value = result.value previous_state = nil #: State? last_heredoc_end = nil #: Integer? + eof_token = nil - bom = source.byteslice(0..2) == "\xEF\xBB\xBF" + bom = source.slice(0, 3) == "\xEF\xBB\xBF" result_value.each_with_index do |(token, lex_state), index| lineno = token.location.start_line @@ -741,6 +743,7 @@ def result Token.new([[lineno, column], event, value, lex_state]) when :on_eof + eof_token = token previous_token = result_value[index - 1][0] # If we're at the end of the file and the previous token was a @@ -763,7 +766,7 @@ def result end_offset += 3 end - tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state]) + tokens << Token.new([[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state]) end end @@ -857,7 +860,89 @@ def result # We sort by location to compare against Ripper's output tokens.sort_by!(&:location) - Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.for(source)) + # Add :on_sp tokens + tokens = add_on_sp_tokens(tokens, source, result.data_loc, bom, eof_token) + + Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source) + end + + def add_on_sp_tokens(tokens, source, data_loc, bom, eof_token) + new_tokens = [] + + prev_token_state = Translation::Ripper::Lexer::State.cached(Translation::Ripper::EXPR_BEG) + prev_token_end = bom ? 3 : 0 + + tokens.each do |token| + line, column = token.location + start_offset = source.line_to_byte_offset(line) + column + # Ripper reports columns on line 1 without counting the BOM, so we adjust to get the real offset + start_offset += 3 if line == 1 && bom + + if start_offset > prev_token_end + sp_value = source.slice(prev_token_end, start_offset - prev_token_end) + sp_line = source.line(prev_token_end) + sp_column = source.column(prev_token_end) + # Ripper reports columns on line 1 without counting the BOM + sp_column -= 3 if sp_line == 1 && bom + continuation_index = sp_value.byteindex("\\") + + # ripper emits up to three :on_sp tokens when line continuations are used + if continuation_index + next_whitespace_index = continuation_index + 1 + next_whitespace_index += 1 if sp_value.byteslice(next_whitespace_index) == "\r" + next_whitespace_index += 1 + first_whitespace = sp_value[0...continuation_index] + continuation = sp_value[continuation_index...next_whitespace_index] + second_whitespace = sp_value[next_whitespace_index..] + + new_tokens << IgnoreStateToken.new([ + [sp_line, sp_column], + :on_sp, + first_whitespace, + prev_token_state + ]) unless first_whitespace.empty? + + new_tokens << IgnoreStateToken.new([ + [sp_line, sp_column + continuation_index], + :on_sp, + continuation, + prev_token_state + ]) + + new_tokens << IgnoreStateToken.new([ + [sp_line + 1, 0], + :on_sp, + second_whitespace, + prev_token_state + ]) unless second_whitespace.empty? + else + new_tokens << IgnoreStateToken.new([ + [sp_line, sp_column], + :on_sp, + sp_value, + prev_token_state + ]) + end + end + + new_tokens << token + prev_token_state = token.state + prev_token_end = start_offset + token.value.bytesize + end + + unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl + end_offset = eof_token.location.end_offset + if prev_token_end < end_offset + new_tokens << IgnoreStateToken.new([ + [source.line(prev_token_end), source.column(prev_token_end)], + :on_sp, + source.slice(prev_token_end, end_offset - prev_token_end), + prev_token_state + ]) + end + end + + new_tokens end end diff --git a/lib/prism/lex_ripper.rb b/lib/prism/lex_ripper.rb index 4b5c3b77fd6112..2054cf55ac0c70 100644 --- a/lib/prism/lex_ripper.rb +++ b/lib/prism/lex_ripper.rb @@ -19,8 +19,6 @@ def result lex(source).each do |token| case token[1] - when :on_sp - # skip when :on_tstring_content if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@")) previous[2] << token[2] diff --git a/test/prism/fixtures/bom_leading_space.txt b/test/prism/fixtures/bom_leading_space.txt new file mode 100644 index 00000000000000..48d3ee50ea47b0 --- /dev/null +++ b/test/prism/fixtures/bom_leading_space.txt @@ -0,0 +1 @@ + p (42) diff --git a/test/prism/fixtures/bom_spaces.txt b/test/prism/fixtures/bom_spaces.txt new file mode 100644 index 00000000000000..c18ad4c21ad7e7 --- /dev/null +++ b/test/prism/fixtures/bom_spaces.txt @@ -0,0 +1 @@ +p ( 42 ) diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index 2a0504c19f35f0..280abd94ea3e64 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -39,6 +39,8 @@ class RipperTest < TestCase # Skip these tests that we haven't implemented yet. omitted_sexp_raw = [ + "bom_leading_space.txt", + "bom_spaces.txt", "dos_endings.txt", "heredocs_with_fake_newlines.txt", "heredocs_with_ignored_newlines.txt", @@ -92,7 +94,7 @@ def test_lexer assert_equal(expected, lexer.parse[0].to_a) assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a) - assert_equal(%i[on_int on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event)) + assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event)) assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) } end @@ -121,15 +123,17 @@ def assert_ripper_sexp_raw(source) def assert_ripper_lex(source) prism = Translation::Ripper.lex(source) ripper = Ripper.lex(source) - ripper.reject! { |elem| elem[1] == :on_sp } # Prism doesn't emit on_sp - ripper.sort_by! { |elem| elem[0] } # Prism emits tokens by their order in the code, not in parse order + + # Prism emits tokens by their order in the code, not in parse order + ripper.sort_by! { |elem| elem[0] } [prism.size, ripper.size].max.times do |i| expected = ripper[i] actual = prism[i] + # Since tokens related to heredocs are not emitted in the same order, # the state also doesn't line up. - if expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end + if expected && actual && expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end expected[3] = actual[3] = nil end From c27ae8d91aadca0660070ee1eeae9598b1fe47ee Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 20 Jan 2026 23:00:08 +0900 Subject: [PATCH 3/4] [ruby/psych] Remove excessive check of message The order of "unexpected" and "missing" keyword argument errors at `Data#initialize` had not been defined strictly. [Bug #21844](https://bugs.ruby-lang.org/issues/21844) ruby/ruby#15910 https://github.com/ruby/psych/commit/ba748d9b04 --- test/psych/test_data.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/psych/test_data.rb b/test/psych/test_data.rb index 57c3478193a526..5e340c580a3f24 100644 --- a/test/psych/test_data.rb +++ b/test/psych/test_data.rb @@ -83,12 +83,11 @@ def test_members_must_be_identical # completely different members TestData.send :remove_const, :D - TestData.const_set :D, Data.define(:foo, :bar) + TestData.const_set :D, Data.define(:a, :c) e = assert_raise(ArgumentError) { Psych.unsafe_load d } - assert_equal 'unknown keywords: :a, :b', e.message + assert_include e.message, 'keyword:' ensure TestData.send :remove_const, :D end end end - From d225bb8b464e4e03d2eb6c09ef15adf727af9e2b Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Tue, 20 Jan 2026 10:50:43 -0500 Subject: [PATCH 4/4] ZJIT: Compile IsA into load + compare for String/Array/Hash (#15878) Resolves https://github.com/Shopify/ruby/issues/880 Implemented this by using the code generation for `GuardType` as a reference. Not sure if this is the best way to go about it, but it seems to work. --- test/ruby/test_zjit.rb | 54 ++++++++++++++++++++++++++++++++++++++++++ zjit/src/codegen.rs | 41 +++++++++++++++++++++++++++++++- 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index ad2df806d59958..2cfe9dd7e3a530 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -4417,6 +4417,60 @@ def test }, call_threshold: 14, num_profiles: 5 end + def test_is_a_string_special_case + assert_compiles '[true, false, false, false, false, false]', %q{ + def test(x) + x.is_a?(String) + end + test("foo") + [test("bar"), test(1), test(false), test(:foo), test([]), test({})] + } + end + + def test_is_a_array_special_case + assert_compiles '[true, true, false, false, false, false, false]', %q{ + def test(x) + x.is_a?(Array) + end + test([]) + [test([1,2,3]), test([]), test(1), test(false), test(:foo), test("foo"), test({})] + } + end + + def test_is_a_hash_special_case + assert_compiles '[true, true, false, false, false, false, false]', %q{ + def test(x) + x.is_a?(Hash) + end + test({}) + [test({:a => "b"}), test({}), test(1), test(false), test(:foo), test([]), test("foo")] + } + end + + def test_is_a_hash_subclass + assert_compiles 'true', %q{ + class MyHash < Hash + end + def test(x) + x.is_a?(Hash) + end + test({}) + test(MyHash.new) + } + end + + def test_is_a_normal_case + assert_compiles '[true, false]', %q{ + class MyClass + end + def test(x) + x.is_a?(MyClass) + end + test("a") + [test(MyClass.new), test("a")] + } + end + private # Assert that every method call in `test_script` can be compiled by ZJIT diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 7afcff5863bc84..0ae85c24a2f1bf 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -1743,7 +1743,46 @@ fn gen_dup_array_include( } fn gen_is_a(asm: &mut Assembler, obj: Opnd, class: Opnd) -> lir::Opnd { - asm_ccall!(asm, rb_obj_is_kind_of, obj, class) + let builtin_type = match class { + Opnd::Value(value) if value == unsafe { rb_cString } => Some(RUBY_T_STRING), + Opnd::Value(value) if value == unsafe { rb_cArray } => Some(RUBY_T_ARRAY), + Opnd::Value(value) if value == unsafe { rb_cHash } => Some(RUBY_T_HASH), + _ => None + }; + + if let Some(builtin_type) = builtin_type { + asm_comment!(asm, "IsA by matching builtin type"); + let ret_label = asm.new_label("is_a_ret"); + let false_label = asm.new_label("is_a_false"); + + let val = match obj { + Opnd::Reg(_) | Opnd::VReg { .. } => obj, + _ => asm.load(obj), + }; + + // Check special constant + asm.test(val, Opnd::UImm(RUBY_IMMEDIATE_MASK as u64)); + asm.jnz(ret_label.clone()); + + // Check false + asm.cmp(val, Qfalse.into()); + asm.je(false_label.clone()); + + let flags = asm.load(Opnd::mem(VALUE_BITS, val, RUBY_OFFSET_RBASIC_FLAGS)); + let obj_builtin_type = asm.and(flags, Opnd::UImm(RUBY_T_MASK as u64)); + asm.cmp(obj_builtin_type, Opnd::UImm(builtin_type as u64)); + asm.jmp(ret_label.clone()); + + // If we get here then the value was false, unset the Z flag + // so that csel_e will select false instead of true + asm.write_label(false_label); + asm.test(Opnd::UImm(1), Opnd::UImm(1)); + + asm.write_label(ret_label); + asm.csel_e(Qtrue.into(), Qfalse.into()) + } else { + asm_ccall!(asm, rb_obj_is_kind_of, obj, class) + } } /// Compile a new hash instruction