turkdevops · pull · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026
diff --git a/lib/prism.rb b/lib/prism.rb
@@ -61,8 +61,7 @@ def initialize(version)
   #   Prism::lex_compat(source, **options) -> LexCompat::Result
   #
   # Returns a parse result whose value is an array of tokens that closely
-  # resembles the return value of Ripper::lex. The main difference is that the
-  # `:on_sp` token is not emitted.
+  # resembles the return value of Ripper::lex.
   #
   # For supported options, see Prism::parse.
   def self.lex_compat(source, **options)
@@ -72,9 +71,8 @@ def self.lex_compat(source, **options)
   # :call-seq:
   #   Prism::lex_ripper(source) -> Array
   #
-  # This lexes with the Ripper lex. It drops any space events but otherwise
-  # returns the same tokens. Raises SyntaxError if the syntax in source is
-  # invalid.
+  # This wraps the result of Ripper.lex. It produces almost exactly the
+  # same tokens. Raises SyntaxError if the syntax in source is invalid.
   def self.lex_ripper(source)
     LexRipper.new(source).result # steep:ignore
   end

diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
@@ -226,7 +226,7 @@ def state
     end
 
     # Tokens where state should be ignored
-    # used for :on_comment, :on_heredoc_end, :on_embexpr_end
+    # used for :on_sp, :on_comment, :on_heredoc_end, :on_embexpr_end
     class IgnoreStateToken < Token
       def ==(other) # :nodoc:
         self[0...-1] == other[0...-1]
@@ -611,10 +611,10 @@ def self.build(opening)
     BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
     private_constant :BOM_FLUSHED
 
-    attr_reader :source, :options
+    attr_reader :options
 
-    def initialize(source, **options)
-      @source = source
+    def initialize(code, **options)
+      @code = code
       @options = options
     end
 
@@ -624,12 +624,14 @@ def result
       state = :default
       heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]]
 
-      result = Prism.lex(source, **options)
+      result = Prism.lex(@code, **options)
+      source = result.source
       result_value = result.value
       previous_state = nil #: State?
       last_heredoc_end = nil #: Integer?
+      eof_token = nil
 
-      bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
+      bom = source.slice(0, 3) == "\xEF\xBB\xBF"
 
       result_value.each_with_index do |(token, lex_state), index|
         lineno = token.location.start_line
@@ -741,6 +743,7 @@ def result
 
             Token.new([[lineno, column], event, value, lex_state])
           when :on_eof
+            eof_token = token
             previous_token = result_value[index - 1][0]
 
             # If we're at the end of the file and the previous token was a
@@ -763,7 +766,7 @@ def result
                   end_offset += 3
                 end
 
-                tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
+                tokens << Token.new([[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state])
               end
             end
 
@@ -857,7 +860,89 @@ def result
       # We sort by location to compare against Ripper's output
       tokens.sort_by!(&:location)
 
-      Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.for(source))
+      # Add :on_sp tokens
+      tokens = add_on_sp_tokens(tokens, source, result.data_loc, bom, eof_token)
+
+      Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source)
+    end
+
+    def add_on_sp_tokens(tokens, source, data_loc, bom, eof_token)
+      new_tokens = []
+
+      prev_token_state = Translation::Ripper::Lexer::State.cached(Translation::Ripper::EXPR_BEG)
+      prev_token_end = bom ? 3 : 0
+
+      tokens.each do |token|
+        line, column = token.location
+        start_offset = source.line_to_byte_offset(line) + column
+        # Ripper reports columns on line 1 without counting the BOM, so we adjust to get the real offset
+        start_offset += 3 if line == 1 && bom
+
+        if start_offset > prev_token_end
+          sp_value = source.slice(prev_token_end, start_offset - prev_token_end)
+          sp_line = source.line(prev_token_end)
+          sp_column = source.column(prev_token_end)
+          # Ripper reports columns on line 1 without counting the BOM
+          sp_column -= 3 if sp_line == 1 && bom
+          continuation_index = sp_value.byteindex("\\")
+
+          # ripper emits up to three :on_sp tokens when line continuations are used
+          if continuation_index
+            next_whitespace_index = continuation_index + 1
+            next_whitespace_index += 1 if sp_value.byteslice(next_whitespace_index) == "\r"
+            next_whitespace_index += 1
+            first_whitespace = sp_value[0...continuation_index]
+            continuation = sp_value[continuation_index...next_whitespace_index]
+            second_whitespace = sp_value[next_whitespace_index..]
+
+            new_tokens << IgnoreStateToken.new([
+              [sp_line, sp_column],
+              :on_sp,
+              first_whitespace,
+              prev_token_state
+            ]) unless first_whitespace.empty?
+
+            new_tokens << IgnoreStateToken.new([
+              [sp_line, sp_column + continuation_index],
+              :on_sp,
+              continuation,
+              prev_token_state
+            ])
+
+            new_tokens << IgnoreStateToken.new([
+              [sp_line + 1, 0],
+              :on_sp,
+              second_whitespace,
+              prev_token_state
+            ]) unless second_whitespace.empty?
+          else
+            new_tokens << IgnoreStateToken.new([
+              [sp_line, sp_column],
+              :on_sp,
+              sp_value,
+              prev_token_state
+            ])
+          end
+        end
+
+        new_tokens << token
+        prev_token_state = token.state
+        prev_token_end = start_offset + token.value.bytesize
+      end
+
+      unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl
+        end_offset = eof_token.location.end_offset
+        if prev_token_end < end_offset
+          new_tokens << IgnoreStateToken.new([
+            [source.line(prev_token_end), source.column(prev_token_end)],
+            :on_sp,
+            source.slice(prev_token_end, end_offset - prev_token_end),
+            prev_token_state
+          ])
+        end
+      end
+
+      new_tokens
     end
   end
 

diff --git a/lib/prism/lex_ripper.rb b/lib/prism/lex_ripper.rb
@@ -19,8 +19,6 @@ def result
 
       lex(source).each do |token|
         case token[1]
-        when :on_sp
-          # skip
         when :on_tstring_content
           if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
             previous[2] << token[2]

diff --git a/lib/syntax_suggest/code_line.rb b/lib/syntax_suggest/code_line.rb
@@ -180,10 +180,13 @@ def ignore_newline_not_beg?
     #     EOM
     #     expect(lines.first.trailing_slash?).to eq(true)
     #
-    if SyntaxSuggest.use_prism_parser?
+    if SyntaxSuggest.use_prism_parser? && Prism::VERSION <= "1.8.0"
+      # Older versions of prism didn't correctly emit on_sp
       def trailing_slash?
         last = @lex.last
-        last&.type == :on_tstring_end
+        return false unless last
+
+        last.type == :on_tstring_end || (last.type == :on_sp && last.token == TRAILING_SLASH)
       end
     else
       def trailing_slash?

diff --git a/test/prism/fixtures/bom_leading_space.txt b/test/prism/fixtures/bom_leading_space.txt
@@ -0,0 +1 @@
+ p (42)
diff --git a/test/prism/fixtures/bom_spaces.txt b/test/prism/fixtures/bom_spaces.txt
@@ -0,0 +1 @@
+p ( 42 )
diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb
@@ -39,6 +39,8 @@ class RipperTest < TestCase
 
     # Skip these tests that we haven't implemented yet.
     omitted_sexp_raw = [
+      "bom_leading_space.txt",
+      "bom_spaces.txt",
       "dos_endings.txt",
       "heredocs_with_fake_newlines.txt",
       "heredocs_with_ignored_newlines.txt",
@@ -92,7 +94,7 @@ def test_lexer
       assert_equal(expected, lexer.parse[0].to_a)
       assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a)
 
-      assert_equal(%i[on_int on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event))
+      assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event))
       assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) }
     end
 
@@ -121,15 +123,17 @@ def assert_ripper_sexp_raw(source)
     def assert_ripper_lex(source)
       prism = Translation::Ripper.lex(source)
       ripper = Ripper.lex(source)
-      ripper.reject! { |elem| elem[1] == :on_sp } # Prism doesn't emit on_sp
-      ripper.sort_by! { |elem| elem[0] } # Prism emits tokens by their order in the code, not in parse order
+
+      # Prism emits tokens by their order in the code, not in parse order
+      ripper.sort_by! { |elem| elem[0] }
 
       [prism.size, ripper.size].max.times do |i|
         expected = ripper[i]
         actual = prism[i]
+
         # Since tokens related to heredocs are not emitted in the same order,
         # the state also doesn't line up.
-        if expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end
+        if expected && actual && expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end
           expected[3] = actual[3] = nil
         end
 

diff --git a/test/psych/test_data.rb b/test/psych/test_data.rb
@@ -83,12 +83,11 @@ def test_members_must_be_identical
 
       # completely different members
       TestData.send :remove_const, :D
-      TestData.const_set :D, Data.define(:foo, :bar)
+      TestData.const_set :D, Data.define(:a, :c)
       e = assert_raise(ArgumentError) { Psych.unsafe_load d }
-      assert_equal 'unknown keywords: :a, :b', e.message
+      assert_include e.message, 'keyword:'
     ensure
       TestData.send :remove_const, :D
     end
   end
 end
-
diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb
@@ -4417,6 +4417,60 @@ def test
     }, call_threshold: 14, num_profiles: 5
   end
 
+  def test_is_a_string_special_case
+    assert_compiles '[true, false, false, false, false, false]', %q{
+      def test(x)
+        x.is_a?(String)
+      end
+      test("foo")
+      [test("bar"), test(1), test(false), test(:foo), test([]), test({})]
+    }
+  end
+
+  def test_is_a_array_special_case
+    assert_compiles '[true, true, false, false, false, false, false]', %q{
+      def test(x)
+        x.is_a?(Array)
+      end
+      test([])
+      [test([1,2,3]), test([]), test(1), test(false), test(:foo), test("foo"), test({})]
+    }
+  end
+
+  def test_is_a_hash_special_case
+    assert_compiles '[true, true, false, false, false, false, false]', %q{
+      def test(x)
+        x.is_a?(Hash)
+      end
+      test({})
+      [test({:a => "b"}), test({}), test(1), test(false), test(:foo), test([]), test("foo")]
+    }
+  end
+
+  def test_is_a_hash_subclass
+    assert_compiles 'true', %q{
+      class MyHash < Hash
+      end
+      def test(x)
+        x.is_a?(Hash)
+      end
+      test({})
+      test(MyHash.new)
+    }
+  end
+
+  def test_is_a_normal_case
+    assert_compiles '[true, false]', %q{
+      class MyClass
+      end
+      def test(x)
+        x.is_a?(MyClass)
+      end
+      test("a")
+      [test(MyClass.new), test("a")]
+    }
+  end
+
   private
 
   # Assert that every method call in `test_script` can be compiled by ZJIT

diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
@@ -1743,7 +1743,46 @@ fn gen_dup_array_include(
 }
 
 fn gen_is_a(asm: &mut Assembler, obj: Opnd, class: Opnd) -> lir::Opnd {
-    asm_ccall!(asm, rb_obj_is_kind_of, obj, class)
+    let builtin_type = match class {
+        Opnd::Value(value) if value == unsafe { rb_cString } => Some(RUBY_T_STRING),
+        Opnd::Value(value) if value == unsafe { rb_cArray } => Some(RUBY_T_ARRAY),
+        Opnd::Value(value) if value == unsafe { rb_cHash } => Some(RUBY_T_HASH),
+        _ => None
+    };
+
+    if let Some(builtin_type) = builtin_type {
+        asm_comment!(asm, "IsA by matching builtin type");
+        let ret_label = asm.new_label("is_a_ret");
+        let false_label = asm.new_label("is_a_false");
+
+        let val = match obj {
+            Opnd::Reg(_) | Opnd::VReg { .. } => obj,
+            _ => asm.load(obj),
+        };
+
+        // Check special constant
+        asm.test(val, Opnd::UImm(RUBY_IMMEDIATE_MASK as u64));
+        asm.jnz(ret_label.clone());
+
+        // Check false
+        asm.cmp(val, Qfalse.into());
+        asm.je(false_label.clone());
+
+        let flags = asm.load(Opnd::mem(VALUE_BITS, val, RUBY_OFFSET_RBASIC_FLAGS));
+        let obj_builtin_type = asm.and(flags, Opnd::UImm(RUBY_T_MASK as u64));
+        asm.cmp(obj_builtin_type, Opnd::UImm(builtin_type as u64));
+        asm.jmp(ret_label.clone());
+
+        // If we get here then the value was false, unset the Z flag
+        // so that csel_e will select false instead of true
+        asm.write_label(false_label);
+        asm.test(Opnd::UImm(1), Opnd::UImm(1));
+
+        asm.write_label(ret_label);
+        asm.csel_e(Qtrue.into(), Qfalse.into())
+    } else {
+        asm_ccall!(asm, rb_obj_is_kind_of, obj, class)
+    }
 }
 
 /// Compile a new hash instruction