Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions lib/prism.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ def initialize(version)
# Prism::lex_compat(source, **options) -> LexCompat::Result
#
# Returns a parse result whose value is an array of tokens that closely
# resembles the return value of Ripper::lex. The main difference is that the
# `:on_sp` token is not emitted.
# resembles the return value of Ripper::lex.
#
# For supported options, see Prism::parse.
def self.lex_compat(source, **options)
Expand All @@ -72,9 +71,8 @@ def self.lex_compat(source, **options)
# :call-seq:
# Prism::lex_ripper(source) -> Array
#
# This lexes with the Ripper lex. It drops any space events but otherwise
# returns the same tokens. Raises SyntaxError if the syntax in source is
# invalid.
# This wraps the result of Ripper.lex. It produces almost exactly the
# same tokens. Raises SyntaxError if the syntax in source is invalid.
def self.lex_ripper(source)
LexRipper.new(source).result # steep:ignore
end
Expand Down
101 changes: 93 additions & 8 deletions lib/prism/lex_compat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def state
end

# Tokens where state should be ignored
# used for :on_comment, :on_heredoc_end, :on_embexpr_end
# used for :on_sp, :on_comment, :on_heredoc_end, :on_embexpr_end
class IgnoreStateToken < Token
def ==(other) # :nodoc:
self[0...-1] == other[0...-1]
Expand Down Expand Up @@ -611,10 +611,10 @@ def self.build(opening)
BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
private_constant :BOM_FLUSHED

attr_reader :source, :options
attr_reader :options

def initialize(source, **options)
@source = source
def initialize(code, **options)
@code = code
@options = options
end

Expand All @@ -624,12 +624,14 @@ def result
state = :default
heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]]

result = Prism.lex(source, **options)
result = Prism.lex(@code, **options)
source = result.source
result_value = result.value
previous_state = nil #: State?
last_heredoc_end = nil #: Integer?
eof_token = nil

bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
bom = source.slice(0, 3) == "\xEF\xBB\xBF"

result_value.each_with_index do |(token, lex_state), index|
lineno = token.location.start_line
Expand Down Expand Up @@ -741,6 +743,7 @@ def result

Token.new([[lineno, column], event, value, lex_state])
when :on_eof
eof_token = token
previous_token = result_value[index - 1][0]

# If we're at the end of the file and the previous token was a
Expand All @@ -763,7 +766,7 @@ def result
end_offset += 3
end

tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
tokens << Token.new([[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state])
end
end

Expand Down Expand Up @@ -857,7 +860,89 @@ def result
# We sort by location to compare against Ripper's output
tokens.sort_by!(&:location)

Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.for(source))
# Add :on_sp tokens
tokens = add_on_sp_tokens(tokens, source, result.data_loc, bom, eof_token)

Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source)
end

def add_on_sp_tokens(tokens, source, data_loc, bom, eof_token)
new_tokens = []

prev_token_state = Translation::Ripper::Lexer::State.cached(Translation::Ripper::EXPR_BEG)
prev_token_end = bom ? 3 : 0

tokens.each do |token|
line, column = token.location
start_offset = source.line_to_byte_offset(line) + column
# Ripper reports columns on line 1 without counting the BOM, so we adjust to get the real offset
start_offset += 3 if line == 1 && bom

if start_offset > prev_token_end
sp_value = source.slice(prev_token_end, start_offset - prev_token_end)
sp_line = source.line(prev_token_end)
sp_column = source.column(prev_token_end)
# Ripper reports columns on line 1 without counting the BOM
sp_column -= 3 if sp_line == 1 && bom
continuation_index = sp_value.byteindex("\\")

# ripper emits up to three :on_sp tokens when line continuations are used
if continuation_index
next_whitespace_index = continuation_index + 1
next_whitespace_index += 1 if sp_value.byteslice(next_whitespace_index) == "\r"
next_whitespace_index += 1
first_whitespace = sp_value[0...continuation_index]
continuation = sp_value[continuation_index...next_whitespace_index]
second_whitespace = sp_value[next_whitespace_index..]

new_tokens << IgnoreStateToken.new([
[sp_line, sp_column],
:on_sp,
first_whitespace,
prev_token_state
]) unless first_whitespace.empty?

new_tokens << IgnoreStateToken.new([
[sp_line, sp_column + continuation_index],
:on_sp,
continuation,
prev_token_state
])

new_tokens << IgnoreStateToken.new([
[sp_line + 1, 0],
:on_sp,
second_whitespace,
prev_token_state
]) unless second_whitespace.empty?
else
new_tokens << IgnoreStateToken.new([
[sp_line, sp_column],
:on_sp,
sp_value,
prev_token_state
])
end
end

new_tokens << token
prev_token_state = token.state
prev_token_end = start_offset + token.value.bytesize
end

unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl
end_offset = eof_token.location.end_offset
if prev_token_end < end_offset
new_tokens << IgnoreStateToken.new([
[source.line(prev_token_end), source.column(prev_token_end)],
:on_sp,
source.slice(prev_token_end, end_offset - prev_token_end),
prev_token_state
])
end
end

new_tokens
end
end

Expand Down
2 changes: 0 additions & 2 deletions lib/prism/lex_ripper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ def result

lex(source).each do |token|
case token[1]
when :on_sp
# skip
when :on_tstring_content
if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
previous[2] << token[2]
Expand Down
7 changes: 5 additions & 2 deletions lib/syntax_suggest/code_line.rb
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,13 @@ def ignore_newline_not_beg?
# EOM
# expect(lines.first.trailing_slash?).to eq(true)
#
if SyntaxSuggest.use_prism_parser?
if SyntaxSuggest.use_prism_parser? && Prism::VERSION <= "1.8.0"
# Older versions of prism didn't correctly emit on_sp
def trailing_slash?
last = @lex.last
last&.type == :on_tstring_end
return false unless last

last.type == :on_tstring_end || (last.type == :on_sp && last.token == TRAILING_SLASH)
end
else
def trailing_slash?
Expand Down
1 change: 1 addition & 0 deletions test/prism/fixtures/bom_leading_space.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
 p (42)
1 change: 1 addition & 0 deletions test/prism/fixtures/bom_spaces.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
p ( 42 )
12 changes: 8 additions & 4 deletions test/prism/ruby/ripper_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class RipperTest < TestCase

# Skip these tests that we haven't implemented yet.
omitted_sexp_raw = [
"bom_leading_space.txt",
"bom_spaces.txt",
"dos_endings.txt",
"heredocs_with_fake_newlines.txt",
"heredocs_with_ignored_newlines.txt",
Expand Down Expand Up @@ -92,7 +94,7 @@ def test_lexer
assert_equal(expected, lexer.parse[0].to_a)
assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a)

assert_equal(%i[on_int on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event))
assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event))
assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) }
end

Expand Down Expand Up @@ -121,15 +123,17 @@ def assert_ripper_sexp_raw(source)
def assert_ripper_lex(source)
prism = Translation::Ripper.lex(source)
ripper = Ripper.lex(source)
ripper.reject! { |elem| elem[1] == :on_sp } # Prism doesn't emit on_sp
ripper.sort_by! { |elem| elem[0] } # Prism emits tokens by their order in the code, not in parse order

# Prism emits tokens by their order in the code, not in parse order
ripper.sort_by! { |elem| elem[0] }

[prism.size, ripper.size].max.times do |i|
expected = ripper[i]
actual = prism[i]

# Since tokens related to heredocs are not emitted in the same order,
# the state also doesn't line up.
if expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end
if expected && actual && expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end
expected[3] = actual[3] = nil
end

Expand Down
5 changes: 2 additions & 3 deletions test/psych/test_data.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,11 @@ def test_members_must_be_identical

# completely different members
TestData.send :remove_const, :D
TestData.const_set :D, Data.define(:foo, :bar)
TestData.const_set :D, Data.define(:a, :c)
e = assert_raise(ArgumentError) { Psych.unsafe_load d }
assert_equal 'unknown keywords: :a, :b', e.message
assert_include e.message, 'keyword:'
ensure
TestData.send :remove_const, :D
end
end
end

54 changes: 54 additions & 0 deletions test/ruby/test_zjit.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4417,6 +4417,60 @@ def test
}, call_threshold: 14, num_profiles: 5
end

def test_is_a_string_special_case
assert_compiles '[true, false, false, false, false, false]', %q{
def test(x)
x.is_a?(String)
end
test("foo")
[test("bar"), test(1), test(false), test(:foo), test([]), test({})]
}
end

def test_is_a_array_special_case
assert_compiles '[true, true, false, false, false, false, false]', %q{
def test(x)
x.is_a?(Array)
end
test([])
[test([1,2,3]), test([]), test(1), test(false), test(:foo), test("foo"), test({})]
}
end

def test_is_a_hash_special_case
assert_compiles '[true, true, false, false, false, false, false]', %q{
def test(x)
x.is_a?(Hash)
end
test({})
[test({:a => "b"}), test({}), test(1), test(false), test(:foo), test([]), test("foo")]
}
end

def test_is_a_hash_subclass
assert_compiles 'true', %q{
class MyHash < Hash
end
def test(x)
x.is_a?(Hash)
end
test({})
test(MyHash.new)
}
end

def test_is_a_normal_case
assert_compiles '[true, false]', %q{
class MyClass
end
def test(x)
x.is_a?(MyClass)
end
test("a")
[test(MyClass.new), test("a")]
}
end

private

# Assert that every method call in `test_script` can be compiled by ZJIT
Expand Down
41 changes: 40 additions & 1 deletion zjit/src/codegen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1743,7 +1743,46 @@ fn gen_dup_array_include(
}

fn gen_is_a(asm: &mut Assembler, obj: Opnd, class: Opnd) -> lir::Opnd {
asm_ccall!(asm, rb_obj_is_kind_of, obj, class)
let builtin_type = match class {
Opnd::Value(value) if value == unsafe { rb_cString } => Some(RUBY_T_STRING),
Opnd::Value(value) if value == unsafe { rb_cArray } => Some(RUBY_T_ARRAY),
Opnd::Value(value) if value == unsafe { rb_cHash } => Some(RUBY_T_HASH),
_ => None
};

if let Some(builtin_type) = builtin_type {
asm_comment!(asm, "IsA by matching builtin type");
let ret_label = asm.new_label("is_a_ret");
let false_label = asm.new_label("is_a_false");

let val = match obj {
Opnd::Reg(_) | Opnd::VReg { .. } => obj,
_ => asm.load(obj),
};

// Check special constant
asm.test(val, Opnd::UImm(RUBY_IMMEDIATE_MASK as u64));
asm.jnz(ret_label.clone());

// Check false
asm.cmp(val, Qfalse.into());
asm.je(false_label.clone());

let flags = asm.load(Opnd::mem(VALUE_BITS, val, RUBY_OFFSET_RBASIC_FLAGS));
let obj_builtin_type = asm.and(flags, Opnd::UImm(RUBY_T_MASK as u64));
asm.cmp(obj_builtin_type, Opnd::UImm(builtin_type as u64));
asm.jmp(ret_label.clone());

// If we get here then the value was false, unset the Z flag
// so that csel_e will select false instead of true
asm.write_label(false_label);
asm.test(Opnd::UImm(1), Opnd::UImm(1));

asm.write_label(ret_label);
asm.csel_e(Qtrue.into(), Qfalse.into())
} else {
asm_ccall!(asm, rb_obj_is_kind_of, obj, class)
}
}

/// Compile a new hash instruction
Expand Down