ruby · kddnewton · Jan 28, 2026 · Jan 28, 2026
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
@@ -196,57 +196,6 @@ def deconstruct_keys(keys)
       "__END__": :on___end__
     }.freeze
 
-    # When we produce tokens, we produce the same arrays that Ripper does.
-    # However, we add a couple of convenience methods onto them to make them a
-    # little easier to work with. We delegate all other methods to the array.
-    class Token < BasicObject
-      # Create a new token object with the given ripper-compatible array.
-      def initialize(array)
-        @array = array
-      end
-
-      # The location of the token in the source.
-      def location
-        @array[0]
-      end
-
-      # The type of the token.
-      def event
-        @array[1]
-      end
-
-      # The slice of the source that this token represents.
-      def value
-        @array[2]
-      end
-
-      # The state of the lexer when this token was produced.
-      def state
-        @array[3]
-      end
-
-      # We want to pretend that this is just an Array.
-      def ==(other) # :nodoc:
-        @array == other
-      end
-
-      def respond_to_missing?(name, include_private = false) # :nodoc:
-        @array.respond_to?(name, include_private)
-      end
-
-      def method_missing(name, ...) # :nodoc:
-        @array.send(name, ...)
-      end
-    end
-
-    # Tokens where state should be ignored
-    # used for :on_sp, :on_comment, :on_heredoc_end, :on_embexpr_end
-    class IgnoreStateToken < Token
-      def ==(other) # :nodoc:
-        self[0...-1] == other[0...-1]
-      end
-    end
-
     # A heredoc in this case is a list of tokens that belong to the body of the
     # heredoc that should be appended onto the list of tokens when the heredoc
     # closes.
@@ -290,7 +239,7 @@ def to_a
           embexpr_balance = 0
 
           tokens.each_with_object([]) do |token, results| #$ Array[Token]
-            case token.event
+            case token[1]
             when :on_embexpr_beg
               embexpr_balance += 1
               results << token
@@ -305,9 +254,9 @@ def to_a
                 if split
                   # Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind
                   # to keep the delimiter in the result.
-                  token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index|
+                  token[2].split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index|
                     column = 0 if index > 0
-                    results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
+                    results << [[lineno, column], :on_tstring_content, value, token[3]]
                     lineno += value.count("\n")
                   end
                 else
@@ -350,15 +299,15 @@ def initialize
         # whitespace on plain string content tokens. This allows us to later
         # remove that amount of whitespace from the beginning of each line.
         def <<(token)
-          case token.event
+          case token[1]
           when :on_embexpr_beg, :on_heredoc_beg
             @embexpr_balance += 1
             @dedent = 0 if @dedent_next && @ended_on_newline
           when :on_embexpr_end, :on_heredoc_end
             @embexpr_balance -= 1
           when :on_tstring_content
             if embexpr_balance == 0
-              line = token.value
+              line = token[2]
 
               if dedent_next && !(line.strip.empty? && line.end_with?("\n"))
                 leading = line[/\A(\s*)\n?/, 1]
@@ -381,7 +330,7 @@ def <<(token)
             end
           end
 
-          @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0
+          @dedent_next = token[1] == :on_tstring_content && embexpr_balance == 0
           @ended_on_newline = false
           tokens << token
         end
@@ -394,7 +343,7 @@ def to_a
             embexpr_balance = 0
 
             tokens.each do |token|
-              case token.event
+              case token[1]
               when :on_embexpr_beg, :on_heredoc_beg
                 embexpr_balance += 1
                 results << token
@@ -406,9 +355,9 @@ def to_a
                   lineno = token[0][0]
                   column = token[0][1]
 
-                  token.value.split(/(?<=\n)/).each_with_index do |value, index|
+                  token[2].split(/(?<=\n)/).each_with_index do |value, index|
                     column = 0 if index > 0
-                    results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
+                    results << [[lineno, column], :on_tstring_content, value, token[3]]
                     lineno += 1
                   end
                 else
@@ -436,15 +385,15 @@ def to_a
               results << token
               index += 1
 
-              case token.event
+              case token[1]
               when :on_embexpr_beg, :on_heredoc_beg
                 embexpr_balance += 1
               when :on_embexpr_end, :on_heredoc_end
                 embexpr_balance -= 1
               when :on_tstring_content
                 if embexpr_balance == 0
-                  while index < max_index && tokens[index].event == :on_tstring_content && !token.value.match?(/\\\r?\n\z/)
-                    token.value << tokens[index].value
+                  while index < max_index && tokens[index][1] == :on_tstring_content && !token[2].match?(/\\\r?\n\z/)
+                    token[2] << tokens[index][2]
                     index += 1
                   end
                 end
@@ -467,7 +416,7 @@ def to_a
             # whitespace calculation we performed above. This is because
             # checking if the subsequent token needs to be dedented is common to
             # both the dedent calculation and the ignored_sp insertion.
-            case token.event
+            case token[1]
             when :on_embexpr_beg
               embexpr_balance += 1
               results << token
@@ -479,7 +428,7 @@ def to_a
                 # Here we're going to split the string on newlines, but maintain
                 # the newlines in the resulting array. We'll do that with a look
                 # behind assertion.
-                splits = token.value.split(/(?<=\n)/)
+                splits = token[2].split(/(?<=\n)/)
                 index = 0
 
                 while index < splits.length
@@ -536,12 +485,12 @@ def to_a
                       ignored = deleted_chars.join
                       line.delete_prefix!(ignored)
 
-                      results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]])
+                      results << [[lineno, 0], :on_ignored_sp, ignored, token[3]]
                       column = ignored.length
                     end
                   end
 
-                  results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty?
+                  results << [[lineno, column], token[1], line, token[3]] unless line.empty?
                   index += 1
                 end
               else
@@ -552,7 +501,7 @@ def to_a
             end
 
             dedent_next =
-              ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) &&
+              ((token[1] == :on_tstring_content) || (token[1] == :on_heredoc_end)) &&
               embexpr_balance == 0
           end
 
@@ -563,11 +512,11 @@ def to_a
       # Here we will split between the two types of heredocs and return the
       # object that will store their tokens.
       def self.build(opening)
-        case opening.value[2]
+        case opening[2][2]
         when "~"
           DedentingHeredoc.new
         when "-"
-          DashHeredoc.new(opening.value[3] != "'")
+          DashHeredoc.new(opening[2][3] != "'")
         else
           PlainHeredoc.new
         end
@@ -647,24 +596,24 @@ def result
             # Ripper doesn't include the rest of the token in the event, so we need to
             # trim it down to just the content on the first line.
             value = value[0..value.index("\n")]
-            Token.new([[lineno, column], event, value, lex_state])
+            [[lineno, column], event, value, lex_state]
           when :on_comment
-            IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+            [[lineno, column], event, value, lex_state]
           when :on_heredoc_end
             # Heredoc end tokens can be emitted in an odd order, so we don't
             # want to bother comparing the state on them.
             last_heredoc_end = token.location.end_offset
-            IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+            [[lineno, column], event, value, lex_state]
           when :on_embexpr_end
-            IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+            [[lineno, column], event, value, lex_state]
           when :on_words_sep
             # Ripper emits one token each per line.
             value.each_line.with_index do |line, index|
               if index > 0
                 lineno += 1
                 column = 0
               end
-              tokens << Token.new([[lineno, column], event, line, lex_state])
+              tokens << [[lineno, column], event, line, lex_state]
             end
             tokens.pop
           when :on_regexp_end
@@ -696,7 +645,7 @@ def result
                 previous_state
               end
 
-            Token.new([[lineno, column], event, value, lex_state])
+            [[lineno, column], event, value, lex_state]
           when :on_eof
             eof_token = token
             previous_token = result_value[index - 1][0]
@@ -721,13 +670,13 @@ def result
                   end_offset += 3
                 end
 
-                tokens << Token.new([[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state])
+                tokens << [[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state]
               end
             end
 
-            Token.new([[lineno, column], event, value, lex_state])
+            [[lineno, column], event, value, lex_state]
           else
-            Token.new([[lineno, column], event, value, lex_state])
+            [[lineno, column], event, value, lex_state]
           end
 
         previous_state = lex_state
@@ -813,9 +762,8 @@ def result
       tokens = tokens[0...-1]
 
       # We sort by location because Ripper.lex sorts.
-      # Manually implemented instead of `sort_by!(&:location)` for performance.
       tokens.sort_by! do |token|
-        line, column = token.location
+        line, column = token[0]
         source.byte_offset(line, column)
       end
 
@@ -834,7 +782,7 @@ def insert_on_sp(tokens, source, data_loc, bom, eof_token)
       prev_token_end = bom ? 3 : 0
 
       tokens.each do |token|
-        line, column = token.location
+        line, column = token[0]
         start_offset = source.byte_offset(line, column)
 
         # Ripper reports columns on line 1 without counting the BOM, so we
@@ -858,50 +806,28 @@ def insert_on_sp(tokens, source, data_loc, bom, eof_token)
             continuation = sp_value[continuation_index...next_whitespace_index]
             second_whitespace = sp_value[next_whitespace_index..]
 
-            new_tokens << IgnoreStateToken.new([
-              [sp_line, sp_column],
-              :on_sp,
-              first_whitespace,
-              prev_token_state
-            ]) unless first_whitespace.empty?
-
-            new_tokens << IgnoreStateToken.new([
-              [sp_line, sp_column + continuation_index],
-              :on_sp,
-              continuation,
-              prev_token_state
-            ])
-
-            new_tokens << IgnoreStateToken.new([
-              [sp_line + 1, 0],
-              :on_sp,
-              second_whitespace,
-              prev_token_state
-            ]) unless second_whitespace.empty?
+            new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty?
+            new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state]
+            new_tokens << [[sp_line + 1, 0], :on_sp, second_whitespace, prev_token_state] unless second_whitespace.empty?
           else
-            new_tokens << IgnoreStateToken.new([
-              [sp_line, sp_column],
-              :on_sp,
-              sp_value,
-              prev_token_state
-            ])
+            new_tokens << [[sp_line, sp_column], :on_sp, sp_value, prev_token_state]
           end
         end
 
         new_tokens << token
-        prev_token_state = token.state
-        prev_token_end = start_offset + token.value.bytesize
+        prev_token_state = token[3]
+        prev_token_end = start_offset + token[2].bytesize
       end
 
       unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl
         end_offset = eof_token.location.end_offset
         if prev_token_end < end_offset
-          new_tokens << IgnoreStateToken.new([
+          new_tokens << [
             [source.line(prev_token_end), source.column(prev_token_end)],
             :on_sp,
             source.slice(prev_token_end, end_offset - prev_token_end),
             prev_token_state
-          ])
+          ]
         end
       end
 

diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
@@ -88,7 +88,7 @@ def self.lex(src, filename = "-", lineno = 1, raise_errors: false)
       #      # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"]
       #
       def self.tokenize(...)
-        lex(...).map(&:value)
+        lex(...).map { |token| token[2] }
       end
 
       # This contains a table of all of the parser events and their

diff --git a/rakelib/lex.rake b/rakelib/lex.rake
@@ -30,7 +30,7 @@ module Prism
       end
 
       result = Prism.lex_compat(source)
-      if result.errors.empty? && Ripper.lex(source) == result.value
+      if result.errors.empty? && compare_lex(Ripper.lex(source), result.value)
         @passing_file_count += 1
         true
       else
@@ -54,6 +54,23 @@ module Prism
         PERCENT=#{(passing_file_count.to_f / (passing_file_count + failing_file_count) * 100).round(2)}%
       RESULTS
     end
+
+    private
+
+    def compare_lex(ripper, prism)
+      [ripper.length, prism.length].max.times do |index|
+        ripper_token = ripper[index]
+        prism_token = prism[index]
+
+        # There are some tokens that have slightly different state that do not
+        # effect the parse tree, so they may not match.
+        if ripper_token && prism_token && ripper_token[1] == prism_token[1] && %i[on_comment on_heredoc_end on_embexpr_end on_sp].include?(ripper_token[1])
+          ripper_token[3] = prism_token[3] = nil
+        end
+      end
+
+      ripper == prism
+    end
   end
 
   class << self