From 7759acdd26b60c6677391b1ea3884f12537d5ac4 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 27 Jan 2026 14:55:09 -0500 Subject: [PATCH] Rename line_to_byte_offset -> byte_offset Also, include the column in here. Hopefully we can do some additional optimizations later. --- lib/prism/lex_compat.rb | 8 +++-- lib/prism/parse_result.rb | 14 ++++---- sig/prism/parse_result.rbs | 2 +- templates/lib/prism/node.rb.erb | 5 ++- test/prism/ruby/source_test.rb | 60 ++++++++++++++++++--------------- 5 files changed, 47 insertions(+), 42 deletions(-) diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 523ad39586..4960230bcf 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -816,7 +816,7 @@ def result # Manually implemented instead of `sort_by!(&:location)` for performance. tokens.sort_by! do |token| line, column = token.location - source.line_to_byte_offset(line) + column + source.byte_offset(line, column) end # Add :on_sp tokens @@ -833,8 +833,10 @@ def add_on_sp_tokens(tokens, source, data_loc, bom, eof_token) tokens.each do |token| line, column = token.location - start_offset = source.line_to_byte_offset(line) + column - # Ripper reports columns on line 1 without counting the BOM, so we adjust to get the real offset + start_offset = source.byte_offset(line, column) + + # Ripper reports columns on line 1 without counting the BOM, so we + # adjust to get the real offset start_offset += 3 if line == 1 && bom if start_offset > prev_token_end diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index 12d19da562..be1c13f97c 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -76,13 +76,13 @@ def slice(byte_offset, length) source.byteslice(byte_offset, length) or raise end - # Converts the line number to a byte offset corresponding to the start of that line - def line_to_byte_offset(line) - l = line - @start_line - if l < 0 || l >= offsets.size - raise ArgumentError, "line #{line} is out of range" - end - offsets[l] + # Converts the line number and column in bytes to a byte offset. + def byte_offset(line, column) + normal = line - @start_line + raise IndexError if normal < 0 + offsets.fetch(normal) + column + rescue IndexError + raise ArgumentError, "line #{line} is out of range" end # Binary search through the offsets to find the line number for the given diff --git a/sig/prism/parse_result.rbs b/sig/prism/parse_result.rbs index d878ca2edd..cbcf3fc2f8 100644 --- a/sig/prism/parse_result.rbs +++ b/sig/prism/parse_result.rbs @@ -14,7 +14,7 @@ module Prism def encoding: () -> Encoding def lines: () -> Array[String] def slice: (Integer byte_offset, Integer length) -> String - def line_to_byte_offset: (Integer line) -> Integer + def byte_offset: (Integer line, Integer column) -> Integer def line: (Integer byte_offset) -> Integer def line_start: (Integer byte_offset) -> Integer def line_end: (Integer byte_offset) -> Integer diff --git a/templates/lib/prism/node.rb.erb b/templates/lib/prism/node.rb.erb index 8225bfb328..6f8e8b0acc 100644 --- a/templates/lib/prism/node.rb.erb +++ b/templates/lib/prism/node.rb.erb @@ -183,14 +183,13 @@ module Prism def tunnel(line, column) queue = [self] #: Array[Prism::node] result = [] #: Array[Prism::node] - - search_offset = source.line_to_byte_offset(line) + column + offset = source.byte_offset(line, column) while (node = queue.shift) result << node node.each_child_node do |child_node| - if child_node.start_offset <= search_offset && search_offset < child_node.end_offset + if child_node.start_offset <= offset && offset < child_node.end_offset queue << child_node break end diff --git a/test/prism/ruby/source_test.rb b/test/prism/ruby/source_test.rb index afd2825765..f7cf4fe83a 100644 --- a/test/prism/ruby/source_test.rb +++ b/test/prism/ruby/source_test.rb @@ -4,44 +4,48 @@ module Prism class SourceTest < TestCase - def test_line_to_byte_offset - parse_result = Prism.parse(<<~SRC) + def test_byte_offset + source = Prism.parse(<<~SRC).source abcd efgh ijkl SRC - source = parse_result.source - - assert_equal 0, source.line_to_byte_offset(1) - assert_equal 5, source.line_to_byte_offset(2) - assert_equal 10, source.line_to_byte_offset(3) - assert_equal 15, source.line_to_byte_offset(4) - e = assert_raise(ArgumentError) { source.line_to_byte_offset(5) } - assert_equal "line 5 is out of range", e.message - e = assert_raise(ArgumentError) { source.line_to_byte_offset(0) } - assert_equal "line 0 is out of range", e.message - e = assert_raise(ArgumentError) { source.line_to_byte_offset(-1) } - assert_equal "line -1 is out of range", e.message + + assert_equal 0, source.byte_offset(1, 0) + assert_equal 5, source.byte_offset(2, 0) + assert_equal 10, source.byte_offset(3, 0) + assert_equal 15, source.byte_offset(4, 0) + + error = assert_raise(ArgumentError) { source.byte_offset(5, 0) } + assert_equal "line 5 is out of range", error.message + + error = assert_raise(ArgumentError) { source.byte_offset(0, 0) } + assert_equal "line 0 is out of range", error.message + + error = assert_raise(ArgumentError) { source.byte_offset(-1, 0) } + assert_equal "line -1 is out of range", error.message end - def test_line_to_byte_offset_with_start_line - parse_result = Prism.parse(<<~SRC, line: 11) + def test_byte_offset_with_start_line + source = Prism.parse(<<~SRC, line: 11).source abcd efgh ijkl SRC - source = parse_result.source - - assert_equal 0, source.line_to_byte_offset(11) - assert_equal 5, source.line_to_byte_offset(12) - assert_equal 10, source.line_to_byte_offset(13) - assert_equal 15, source.line_to_byte_offset(14) - e = assert_raise(ArgumentError) { source.line_to_byte_offset(15) } - assert_equal "line 15 is out of range", e.message - e = assert_raise(ArgumentError) { source.line_to_byte_offset(10) } - assert_equal "line 10 is out of range", e.message - e = assert_raise(ArgumentError) { source.line_to_byte_offset(9) } - assert_equal "line 9 is out of range", e.message + + assert_equal 0, source.byte_offset(11, 0) + assert_equal 5, source.byte_offset(12, 0) + assert_equal 10, source.byte_offset(13, 0) + assert_equal 15, source.byte_offset(14, 0) + + error = assert_raise(ArgumentError) { source.byte_offset(15, 0) } + assert_equal "line 15 is out of range", error.message + + error = assert_raise(ArgumentError) { source.byte_offset(10, 0) } + assert_equal "line 10 is out of range", error.message + + error = assert_raise(ArgumentError) { source.byte_offset(9, 0) } + assert_equal "line 9 is out of range", error.message end end end