diff --git a/.github/workflows/java-wasm-bindings.yml b/.github/workflows/java-wasm-bindings.yml
index 7fe24455a7..de90a4173c 100644
--- a/.github/workflows/java-wasm-bindings.yml
+++ b/.github/workflows/java-wasm-bindings.yml
@@ -27,7 +27,7 @@ jobs:
bundler-cache: true
- name: rake templates
- run: PRISM_EXCLUDE_PRETTYPRINT=1 PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 bundle exec rake templates
+ run: PRISM_EXCLUDE_PRETTYPRINT=1 PRISM_OMIT_NODE_ID=1 bundle exec rake templates
- name: Set up WASI-SDK
run: |
diff --git a/Makefile b/Makefile
index 0f6f5264d1..5e83dfa146 100644
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@ all: shared static
shared: build/libprism.$(SOEXT)
static: build/libprism.a
wasm: javascript/src/prism.wasm
-java-wasm: java/wasm/src/main/wasm/prism.wasm
+java-wasm: java/wasm/src/main/wasm/prism.wasm java/wasm-full/src/main/wasm/prism-full.wasm
build/libprism.$(SOEXT): $(SHARED_OBJECTS)
$(ECHO) "linking $@ with $(CC)"
@@ -52,6 +52,17 @@ javascript/src/prism.wasm: Makefile $(SOURCES) $(HEADERS)
-o $@ $(SOURCES)
java/wasm/src/main/wasm/prism.wasm: Makefile $(SOURCES) $(HEADERS)
+ $(ECHO) "building $@"
+ $(Q) $(MAKEDIRS) $(@D)
+ $(Q) $(WASI_SDK_PATH)/bin/clang \
+ $(DEBUG_FLAGS) \
+ -DPRISM_EXCLUDE_PRETTYPRINT -DPRISM_EXPORT_SYMBOLS -D_WASI_EMULATED_MMAN -DPRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 \
+ -DPRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS \
+ -lwasi-emulated-mman $(CPPFLAGS) $(JAVA_WASM_CFLAGS) \
+ -Wl,--export-all -Wl,--no-entry -mexec-model=reactor -lc++ -lc++abi \
+ -o $@ $(SOURCES)
+
+java/wasm-full/src/main/wasm/prism-full.wasm: Makefile $(SOURCES) $(HEADERS)
$(ECHO) "building $@"
$(Q) $(MAKEDIRS) $(@D)
$(Q) $(WASI_SDK_PATH)/bin/clang \
diff --git a/Rakefile b/Rakefile
index eb96249985..998640eede 100644
--- a/Rakefile
+++ b/Rakefile
@@ -55,6 +55,7 @@ CLOBBER.concat(Prism::Template::TEMPLATES)
CLOBBER.concat(["build"])
CLOBBER << "lib/prism/prism.#{RbConfig::CONFIG["DLEXT"]}"
CLOBBER << "java/wasm/src/main/resources/prism.wasm"
+CLOBBER << "java/wasm-full/src/main/resources/prism-full.wasm"
Prism::Template::TEMPLATES.each do |filepath|
desc "Generate #{filepath}"
diff --git a/Steepfile b/Steepfile
index e6e1a8efb5..9db20dfa1e 100644
--- a/Steepfile
+++ b/Steepfile
@@ -15,4 +15,7 @@ target :lib do
# Ignored because we do not want to overlap with the C extension.
ignore "lib/prism/ffi.rb"
+ ignore "lib/prism/ffi/common.rb"
+ ignore "lib/prism/ffi/native_ffi.rb"
+ ignore "lib/prism/ffi/wasm_ffi.rb"
end
diff --git a/java/README.md b/java/README.md
index 0dd4215777..bd06639f67 100644
--- a/java/README.md
+++ b/java/README.md
@@ -15,7 +15,7 @@ Some files need to be generated before the Maven artifacts can build:
Sources under `api` are generated from templates in `../templates`. Those sources are generated using the follow command line:
```
-$ PRISM_EXCLUDE_PRETTYPRINT=1 PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 bundle exec rake templates
+$ PRISM_EXCLUDE_PRETTYPRINT=1 bundle exec rake templates
```
The files are generated under `api/src/main/java-templates` and will not be removed with `mvn clean`.
diff --git a/java/api/pom.xml b/java/api/pom.xml
index 396fa33161..4fe0252d08 100644
--- a/java/api/pom.xml
+++ b/java/api/pom.xml
@@ -9,7 +9,7 @@
prism-parser-api
- Java Prism
+ Java Prism API
Java API for the Prism Ruby language parser
https://github.com/ruby/prism
diff --git a/java/pom.xml b/java/pom.xml
index f405af5304..8b71d4efee 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@
prism-parser
0.0.2-SNAPSHOT
pom
- Java Prism
+ Java Prism parent
Java API for the Prism Ruby language parser
https://github.com/ruby/prism
@@ -51,6 +51,7 @@
api
wasm
+ wasm-full
diff --git a/java/wasm-full/pom.xml b/java/wasm-full/pom.xml
new file mode 100644
index 0000000000..e70172d838
--- /dev/null
+++ b/java/wasm-full/pom.xml
@@ -0,0 +1,80 @@
+
+
+ 4.0.0
+
+
+ org.ruby-lang
+ prism-parser
+ 0.0.2-SNAPSHOT
+
+
+ prism-parser-wasm-full
+ Java Prism WASM with full parsed content
+ Java WASM bindings for the Prism parser shared library
+ https://github.com/ruby/prism
+
+
+ 1.7.5
+
+
+
+
+
+ com.dylibso.chicory
+ bom
+ ${chicory.version}
+ pom
+ import
+
+
+
+
+
+
+ com.dylibso.chicory
+ runtime
+
+
+ com.dylibso.chicory
+ log
+
+
+ com.dylibso.chicory
+ wasi
+
+
+ com.dylibso.chicory
+ wasm
+
+
+ org.junit.jupiter
+ junit-jupiter-engine
+ ${junit.version}
+ test
+
+
+
+
+
+
+ com.dylibso.chicory
+ chicory-compiler-maven-plugin
+ ${chicory.version}
+
+
+ prism
+
+ compile
+
+
+ org.ruby_lang.prism.wasm.full.PrismParser
+ org.ruby_lang.prism.wasm.full.Prism
+ src/main/wasm/prism-full.wasm
+
+
+
+
+
+
+
+
diff --git a/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java b/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java
new file mode 100644
index 0000000000..68c9707f7a
--- /dev/null
+++ b/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java
@@ -0,0 +1,153 @@
+package org.ruby_lang.prism.wasm.full;
+
+import com.dylibso.chicory.runtime.ByteArrayMemory;
+import com.dylibso.chicory.runtime.ImportValues;
+import com.dylibso.chicory.runtime.Instance;
+import com.dylibso.chicory.wasi.WasiOptions;
+import com.dylibso.chicory.wasi.WasiPreview1;
+
+import java.nio.charset.StandardCharsets;
+
+public class Prism implements AutoCloseable {
+ private final WasiPreview1 wasi;
+ protected final Prism_ModuleExports exports;
+ private final Instance instance;
+
+ public Prism() {
+ this(WasiOptions.builder().build());
+ }
+
+ public Prism(WasiOptions wasiOpts) {
+ wasi = WasiPreview1.builder().withOptions(wasiOpts).build();
+ instance = Instance.builder(PrismParser.load())
+ .withMemoryFactory(ByteArrayMemory::new)
+ .withMachineFactory(PrismParser::create)
+ .withImportValues(ImportValues.builder().addFunction(wasi.toHostFunctions()).build())
+ .build();
+ exports = new Prism_ModuleExports(instance);
+ }
+
+ public String version() {
+ int versionPointer = exports.pmVersion();
+ int length = exports.strchr(versionPointer, 0);
+
+ return new String(instance.memory().readBytes(versionPointer, length - versionPointer));
+ }
+
+ public byte[] parse(byte[] sourceBytes, byte[] packedOptions) {
+ try (
+ Buffer buffer = new Buffer();
+ Source source = new Source(sourceBytes, 0, sourceBytes.length);
+ Options options = new Options(packedOptions)) {
+
+ return parse(buffer, source, options);
+ }
+ }
+
+ public byte[] lex(byte[] sourceBytes, byte[] packedOptions) {
+ try (
+ Buffer buffer = new Buffer();
+ Source source = new Source(sourceBytes, 0, sourceBytes.length);
+ Options options = new Options(packedOptions)) {
+
+ return lex(buffer, source, options);
+ }
+ }
+
+ public byte[] parse(byte[] sourceBytes, int sourceOffset, int sourceLength, byte[] packedOptions) {
+ try (
+ Buffer buffer = new Buffer();
+ Source source = new Source(sourceBytes, sourceOffset, sourceLength);
+ Options options = new Options(packedOptions)) {
+
+ return parse(buffer, source, options);
+ }
+ }
+
+ public byte[] parse(Buffer buffer, Source source, Options options) {
+ exports.pmSerializeParse(
+ buffer.pointer, source.pointer, source.length, options.pointer);
+
+ return buffer.read();
+ }
+
+ public byte[] lex(Buffer buffer, Source source, Options options) {
+ exports.pmSerializeLex(
+ buffer.pointer, source.pointer, source.length, options.pointer);
+
+ return buffer.read();
+ }
+
+ public class Buffer implements AutoCloseable {
+ final int pointer;
+
+ Buffer() {
+ pointer = exports.pmBufferNew();
+ clear();
+ }
+
+ public void clear() {
+ exports.pmBufferClear(pointer);
+ }
+
+ public void close() {
+ exports.pmBufferFree(pointer);
+ }
+
+ public byte[] read() {
+ return instance.memory().readBytes(
+ exports.pmBufferValue(pointer),
+ exports.pmBufferLength(pointer));
+ }
+ }
+
+ public class Source implements AutoCloseable{
+ final int pointer;
+ final int length;
+
+ public Source(int length) {
+ pointer = exports.calloc(1, length);
+ this.length = length;
+ }
+
+ public Source(byte[] bytes, int offset, int length) {
+ this(length + 1);
+ write(bytes, offset, length);
+ }
+
+ public Source(byte[] bytes) {
+ this(bytes, 0, bytes.length);
+ }
+
+ public void write(byte[] bytes, int offset, int length) {
+ assert length + 1 <= this.length;
+ instance.memory().write(pointer, bytes, offset, length);
+ instance.memory().writeByte(pointer + length, (byte) 0);
+ }
+
+ public void close() {
+ exports.free(pointer);
+ }
+ }
+
+ class Options implements AutoCloseable {
+ final int pointer;
+
+ Options(byte[] packedOptions) {
+ int pointer = exports.calloc(1, packedOptions.length);
+ instance.memory().write(pointer, packedOptions);
+ this.pointer = pointer;
+ }
+
+ public void close() {
+ exports.free(pointer);
+ }
+ }
+
+ @Override
+ public void close() {
+ if (wasi != null) {
+ wasi.close();
+ }
+ }
+}
diff --git a/java/wasm-full/src/main/wasm/prism-full.wasm b/java/wasm-full/src/main/wasm/prism-full.wasm
new file mode 100755
index 0000000000..5af8299c37
Binary files /dev/null and b/java/wasm-full/src/main/wasm/prism-full.wasm differ
diff --git a/java/wasm/pom.xml b/java/wasm/pom.xml
index b389c57ed1..c0772a7269 100644
--- a/java/wasm/pom.xml
+++ b/java/wasm/pom.xml
@@ -9,7 +9,7 @@
prism-parser-wasm
- Java Prism WASM
+ Java Prism WASM with semantic-only content
Java WASM bindings for the Prism parser shared library
https://github.com/ruby/prism
diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb
index 6b9bde51ea..93efcd0836 100644
--- a/lib/prism/ffi.rb
+++ b/lib/prism/ffi.rb
@@ -1,286 +1,57 @@
# frozen_string_literal: true
# :markup: markdown
+# --
# typed: ignore
-# This file is responsible for mirroring the API provided by the C extension by
-# using FFI to call into the shared library.
-
-require "rbconfig"
-require "ffi"
-
-# We want to eagerly load this file if there are Ractors so that it does not get
-# autoloaded from within a non-main Ractor.
-require "prism/serialize" if defined?(Ractor)
-
-module Prism # :nodoc:
- module LibRubyParser # :nodoc:
- extend FFI::Library
-
- # Define the library that we will be pulling functions from. Note that this
- # must align with the build shared library from make/rake.
- libprism_in_build = File.expand_path("../../build/libprism.#{RbConfig::CONFIG["SOEXT"]}", __dir__)
- libprism_in_libdir = "#{RbConfig::CONFIG["libdir"]}/prism/libprism.#{RbConfig::CONFIG["SOEXT"]}"
-
- if File.exist?(libprism_in_build)
- INCLUDE_DIR = File.expand_path("../../include", __dir__)
- ffi_lib libprism_in_build
- else
- INCLUDE_DIR = "#{RbConfig::CONFIG["libdir"]}/prism/include"
- ffi_lib libprism_in_libdir
- end
-
- # Convert a native C type declaration into a symbol that FFI understands.
- # For example:
- #
- # const char * -> :pointer
- # bool -> :bool
- # size_t -> :size_t
- # void -> :void
- #
- def self.resolve_type(type, callbacks)
- type = type.strip
-
- if !type.end_with?("*")
- type.delete_prefix("const ").to_sym
- else
- type = type.delete_suffix("*").rstrip
- callbacks.include?(type.to_sym) ? type.to_sym : :pointer
- end
- end
-
- # Read through the given header file and find the declaration of each of the
- # given functions. For each one, define a function with the same name and
- # signature as the C function.
- def self.load_exported_functions_from(header, *functions, callbacks)
- File.foreach("#{INCLUDE_DIR}/#{header}") do |line|
- # We only want to attempt to load exported functions.
- next unless line.start_with?("PRISM_EXPORTED_FUNCTION ")
-
- # We only want to load the functions that we are interested in.
- next unless functions.any? { |function| line.include?(function) }
-
- # Strip trailing attributes (PRISM_NODISCARD, PRISM_NONNULL(...), etc.)
- line = line.sub(/\)(\s+PRISM_\w+(?:\([^)]*\))?)+\s*;/, ");")
-
- # Parse the function declaration.
- unless /^PRISM_EXPORTED_FUNCTION (?.+) (?\w+)\((?.+)\);$/ =~ line
- raise "Could not parse #{line}"
- end
-
- # Delete the function from the list of functions we are looking for to
- # mark it as having been found.
- functions.delete(name)
-
- # Split up the argument types into an array, ensure we handle the case
- # where there are no arguments (by explicit void).
- arg_types = arg_types.split(",").map(&:strip)
- arg_types = [] if arg_types == %w[void]
-
- # Resolve the type of the argument by dropping the name of the argument
- # first if it is present.
- arg_types.map! { |type| resolve_type(type.sub(/\w+$/, ""), callbacks) }
-
- # Attach the function using the FFI library.
- attach_function name, arg_types, resolve_type(return_type, [])
- end
-
- # If we didn't find all of the functions, raise an error.
- raise "Could not find functions #{functions.inspect}" unless functions.empty?
- end
-
- callback :pm_source_stream_fgets_t, [:pointer, :int, :pointer], :pointer
- callback :pm_source_stream_feof_t, [:pointer], :int
- pm_source_init_result_values = %i[PM_SOURCE_INIT_SUCCESS PM_SOURCE_INIT_ERROR_GENERIC PM_SOURCE_INIT_ERROR_DIRECTORY PM_SOURCE_INIT_ERROR_NON_REGULAR]
- enum :pm_source_init_result_t, pm_source_init_result_values
- enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE]
-
- # Ractor-safe lookup table for pm_source_init_result_t, since FFI's
- # enum_type accesses module instance variables that are not shareable.
- SOURCE_INIT_RESULT = pm_source_init_result_values.freeze
-
- load_exported_functions_from(
- "prism/version.h",
- "pm_version",
- []
- )
-
- load_exported_functions_from(
- "prism/serialize.h",
- "pm_serialize_parse",
- "pm_serialize_parse_stream",
- "pm_serialize_parse_comments",
- "pm_serialize_lex",
- "pm_serialize_parse_lex",
- "pm_serialize_parse_success_p",
- []
- )
-
- load_exported_functions_from(
- "prism/string_query.h",
- "pm_string_query_local",
- "pm_string_query_constant",
- "pm_string_query_method_name",
- []
- )
-
- load_exported_functions_from(
- "prism/buffer.h",
- "pm_buffer_new",
- "pm_buffer_value",
- "pm_buffer_length",
- "pm_buffer_free",
- []
- )
-
- load_exported_functions_from(
- "prism/source.h",
- "pm_source_file_new",
- "pm_source_mapped_new",
- "pm_source_stream_new",
- "pm_source_free",
- "pm_source_source",
- "pm_source_length",
- [:pm_source_stream_fgets_t, :pm_source_stream_feof_t]
- )
-
- # This object represents a pm_buffer_t. We only use it as an opaque pointer,
- # so it doesn't need to know the fields of pm_buffer_t.
- class PrismBuffer # :nodoc:
- attr_reader :pointer
-
- def initialize(pointer)
- @pointer = pointer
- end
-
- def value
- LibRubyParser.pm_buffer_value(pointer)
- end
-
- def length
- LibRubyParser.pm_buffer_length(pointer)
- end
-
- def read
- value.read_string(length)
- end
-
- # Initialize a new buffer and yield it to the block. The buffer will be
- # automatically freed when the block returns.
- def self.with
- buffer = LibRubyParser.pm_buffer_new
- raise unless buffer
-
- begin
- yield new(buffer)
- ensure
- LibRubyParser.pm_buffer_free(buffer)
- end
- end
- end
-
- # This object represents source code to be parsed. For strings it wraps a
- # pointer directly; for files it uses a pm_source_t under the hood.
- class PrismSource # :nodoc:
- PLATFORM_EXPECTS_UTF8 =
- RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i)
-
- attr_reader :pointer, :length
-
- def initialize(pointer, length, from_string)
- @pointer = pointer
- @length = length
- @from_string = from_string
- end
-
- def read
- raise "should use the original String instead" if @from_string
- @pointer.read_string(@length)
- end
-
- # Yields a PrismSource backed by the given string to the block.
- def self.with_string(string)
- raise TypeError unless string.is_a?(String)
-
- length = string.bytesize
- # + 1 to never get an address of 0, which pm_parser_init() asserts
- FFI::MemoryPointer.new(:char, length + 1, false) do |pointer|
- pointer.write_string(string)
- # since we have the extra byte we might as well \0-terminate
- pointer.put_char(length, 0)
- return yield new(pointer, length, true)
- end
- end
-
- # Yields a PrismSource to the given block, backed by a pm_source_t.
- def self.with_file(filepath)
- raise TypeError unless filepath.is_a?(String)
-
- # On Windows and Mac, it's expected that filepaths will be encoded in
- # UTF-8. If they are not, we need to convert them to UTF-8 before
- # passing them into pm_source_mapped_new.
- if PLATFORM_EXPECTS_UTF8 && (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8
- filepath = filepath.encode(Encoding::UTF_8)
- end
-
- FFI::MemoryPointer.new(:int) do |result_ptr|
- pm_source = LibRubyParser.pm_source_mapped_new(filepath, 0, result_ptr)
-
- case SOURCE_INIT_RESULT[result_ptr.read_int]
- when :PM_SOURCE_INIT_SUCCESS
- pointer = LibRubyParser.pm_source_source(pm_source)
- length = LibRubyParser.pm_source_length(pm_source)
- return yield new(pointer, length, false)
- when :PM_SOURCE_INIT_ERROR_GENERIC
- raise SystemCallError.new(filepath, FFI.errno)
- when :PM_SOURCE_INIT_ERROR_DIRECTORY
- raise Errno::EISDIR.new(filepath)
- when :PM_SOURCE_INIT_ERROR_NON_REGULAR
- # Fall back to reading the file through Ruby IO for non-regular
- # files (pipes, character devices, etc.)
- return with_string(File.read(filepath)) { |string| yield string }
- else
- raise "Unknown error initializing pm_source_t: #{result_ptr.read_int}"
- end
- ensure
- LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null?
- end
- end
- end
+# This file is responsible for mirroring the API provided by the C extension. There
+# are two backends:
+#
+# * Native FFI based on the 'ffi' gem
+# * WASM compiled to JVM bytecode (JRuby only)
+
+require_relative "ffi/common"
+
+begin
+ require_relative "ffi/native_ffi.rb"
+rescue LoadError
+ if RUBY_ENGINE == "jruby"
+ require_relative "ffi/wasm_ffi.rb"
+ else
+ raise
end
+end
- # Mark the LibRubyParser module as private as it should only be called through
- # the prism module.
- private_constant :LibRubyParser
+module Prism # :nodoc:
# The version constant is set by reading the result of calling pm_version.
- VERSION = LibRubyParser.pm_version.read_string.freeze
+ VERSION = FFICommon.version
class << self
# Mirror the Prism.dump API by using the serialization API.
def dump(source, **options)
- LibRubyParser::PrismSource.with_string(source) { |string| dump_common(string, options) }
+ FFICommon.with_string(source) { |string| FFICommon.dump(string, options) }
end
# Mirror the Prism.dump_file API by using the serialization API.
def dump_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismSource.with_file(filepath) { |string| dump_common(string, options) }
+ FFICommon.with_file(filepath) { |string| FFICommon.dump(string, options) }
end
# Mirror the Prism.lex API by using the serialization API.
def lex(code, **options)
- LibRubyParser::PrismSource.with_string(code) { |string| lex_common(string, code, options) }
+ FFICommon.with_string(code) { |string| FFICommon.lex(string, code, options) }
end
# Mirror the Prism.lex_file API by using the serialization API.
def lex_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismSource.with_file(filepath) { |string| lex_common(string, string.read, options) }
+ FFICommon.with_file(filepath) { |string| FFICommon.lex(string, string.read, options) }
end
# Mirror the Prism.parse API by using the serialization API.
def parse(code, **options)
- LibRubyParser::PrismSource.with_string(code) { |string| parse_common(string, code, options) }
+ FFICommon.with_string(code) { |string| FFICommon.parse(string, code, options) }
end
# Mirror the Prism.parse_file API by using the serialization API. This uses
@@ -288,12 +59,12 @@ def parse(code, **options)
# when it is available.
def parse_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismSource.with_file(filepath) { |string| parse_common(string, string.read, options) }
+ FFICommon.with_file(filepath) { |string| FFICommon.parse(string, string.read, options) }
end
# Mirror the Prism.parse_stream API by using the serialization API.
def parse_stream(stream, **options)
- LibRubyParser::PrismBuffer.with do |buffer|
+ FFICommon.with_buffer do |buffer|
source = +""
callback = -> (string, size, _) {
raise "Expected size to be >= 0, got: #{size}" if size <= 0
@@ -306,19 +77,13 @@ def parse_stream(stream, **options)
eof_callback = -> (_) { stream.eof? }
- pm_source = LibRubyParser.pm_source_stream_new(nil, callback, eof_callback)
- begin
- LibRubyParser.pm_serialize_parse_stream(buffer.pointer, pm_source, dump_options(options))
- Prism.load(source, buffer.read, options.fetch(:freeze, false))
- ensure
- LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null?
- end
+ FFICommon.parse_stream(buffer, callback, eof_callback, options, source)
end
end
# Mirror the Prism.parse_comments API by using the serialization API.
def parse_comments(code, **options)
- LibRubyParser::PrismSource.with_string(code) { |string| parse_comments_common(string, code, options) }
+ FFICommon.with_string(code) { |string| FFICommon.parse_comments(string, code, options) }
end
# Mirror the Prism.parse_file_comments API by using the serialization
@@ -326,23 +91,23 @@ def parse_comments(code, **options)
# to use mmap when it is available.
def parse_file_comments(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismSource.with_file(filepath) { |string| parse_comments_common(string, string.read, options) }
+ FFICommon.with_file(filepath) { |string| FFICommon.parse_comments(string, string.read, options) }
end
# Mirror the Prism.parse_lex API by using the serialization API.
def parse_lex(code, **options)
- LibRubyParser::PrismSource.with_string(code) { |string| parse_lex_common(string, code, options) }
+ FFICommon.with_string(code) { |string| FFICommon.parse_lex(string, code, options) }
end
# Mirror the Prism.parse_lex_file API by using the serialization API.
def parse_lex_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismSource.with_file(filepath) { |string| parse_lex_common(string, string.read, options) }
+ FFICommon.with_file(filepath) { |string| FFICommon.parse_lex(string, string.read, options) }
end
# Mirror the Prism.parse_success? API by using the serialization API.
def parse_success?(code, **options)
- LibRubyParser::PrismSource.with_string(code) { |string| parse_file_success_common(string, options) }
+ FFICommon.with_string(code) { |string| FFICommon.parse_file_success(string, options) }
end
# Mirror the Prism.parse_failure? API by using the serialization API.
@@ -353,7 +118,7 @@ def parse_failure?(code, **options)
# Mirror the Prism.parse_file_success? API by using the serialization API.
def parse_file_success?(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismSource.with_file(filepath) { |string| parse_file_success_common(string, options) }
+ FFICommon.with_file(filepath) { |string| FFICommon.parse_file_success(string, options) }
end
# Mirror the Prism.parse_file_failure? API by using the serialization API.
@@ -363,9 +128,9 @@ def parse_file_failure?(filepath, **options)
# Mirror the Prism.profile API by using the serialization API.
def profile(source, **options)
- LibRubyParser::PrismSource.with_string(source) do |string|
- LibRubyParser::PrismBuffer.with do |buffer|
- LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
+ FFICommon.with_string(source) do |string|
+ FFICommon.with_buffer do |buffer|
+ FFICommon.parse_only(buffer, string, options)
nil
end
end
@@ -373,205 +138,15 @@ def profile(source, **options)
# Mirror the Prism.profile_file API by using the serialization API.
def profile_file(filepath, **options)
- LibRubyParser::PrismSource.with_file(filepath) do |string|
- LibRubyParser::PrismBuffer.with do |buffer|
+ FFICommon.with_file(filepath) do |string|
+ FFICommon.with_buffer do |buffer|
options[:filepath] = filepath
- LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
+ FFICommon.parse_only(buffer, string, options)
nil
end
end
end
- private
-
- def dump_common(string, options) # :nodoc:
- LibRubyParser::PrismBuffer.with do |buffer|
- LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
-
- dumped = buffer.read
- dumped.freeze if options.fetch(:freeze, false)
-
- dumped
- end
- end
-
- def lex_common(string, code, options) # :nodoc:
- LibRubyParser::PrismBuffer.with do |buffer|
- LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
- Serialize.load_lex(code, buffer.read, options.fetch(:freeze, false))
- end
- end
-
- def parse_common(string, code, options) # :nodoc:
- serialized = dump_common(string, options)
- Serialize.load_parse(code, serialized, options.fetch(:freeze, false))
- end
-
- def parse_comments_common(string, code, options) # :nodoc:
- LibRubyParser::PrismBuffer.with do |buffer|
- LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options))
- Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false))
- end
- end
-
- def parse_lex_common(string, code, options) # :nodoc:
- LibRubyParser::PrismBuffer.with do |buffer|
- LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
- Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false))
- end
- end
-
- def parse_file_success_common(string, options) # :nodoc:
- LibRubyParser.pm_serialize_parse_success_p(string.pointer, string.length, dump_options(options))
- end
-
- # Return the value that should be dumped for the command_line option.
- def dump_options_command_line(options)
- command_line = options.fetch(:command_line, "")
- raise ArgumentError, "command_line must be a string" unless command_line.is_a?(String)
-
- command_line.each_char.inject(0) do |value, char|
- case char
- when "a" then value | 0b000001
- when "e" then value | 0b000010
- when "l" then value | 0b000100
- when "n" then value | 0b001000
- when "p" then value | 0b010000
- when "x" then value | 0b100000
- else raise ArgumentError, "invalid command_line option: #{char}"
- end
- end
- end
-
- # Return the value that should be dumped for the version option.
- def dump_options_version(version)
- case version
- when "current"
- version_string_to_number(RUBY_VERSION) || raise(CurrentVersionError, RUBY_VERSION)
- when "latest", nil
- 0 # Handled in pm_parser_init
- when "nearest"
- dump = version_string_to_number(RUBY_VERSION)
- return dump if dump
- if RUBY_VERSION < "3.3"
- version_string_to_number("3.3")
- else
- 0 # Handled in pm_parser_init
- end
- else
- version_string_to_number(version) || raise(ArgumentError, "invalid version: #{version}")
- end
- end
-
- # Converts a version string like "4.0.0" or "4.0" into a number.
- # Returns nil if the version is unknown.
- def version_string_to_number(version)
- case version
- when /\A3\.3(\.\d+)?\z/
- 1
- when /\A3\.4(\.\d+)?\z/
- 2
- when /\A3\.5(\.\d+)?\z/, /\A4\.0(\.\d+)?\z/
- 3
- when /\A4\.1(\.\d+)?\z/
- 4
- end
- end
-
- # Convert the given options into a serialized options string.
- def dump_options(options)
- template = +""
- values = []
-
- template << "L"
- if (filepath = options[:filepath])
- values.push(filepath.bytesize, filepath.b)
- template << "A*"
- else
- values << 0
- end
-
- template << "l"
- values << options.fetch(:line, 1)
-
- template << "L"
- if (encoding = options[:encoding])
- name = encoding.is_a?(Encoding) ? encoding.name : encoding
- values.push(name.bytesize, name.b)
- template << "A*"
- else
- values << 0
- end
-
- template << "C"
- values << (options.fetch(:frozen_string_literal, false) ? 1 : 0)
-
- template << "C"
- values << dump_options_command_line(options)
-
- template << "C"
- values << dump_options_version(options[:version])
-
- template << "C"
- values << (options[:encoding] == false ? 1 : 0)
-
- template << "C"
- values << (options.fetch(:main_script, false) ? 1 : 0)
-
- template << "C"
- values << (options.fetch(:partial_script, false) ? 1 : 0)
-
- template << "C"
- values << (options.fetch(:freeze, false) ? 1 : 0)
-
- template << "L"
- if (scopes = options[:scopes])
- values << scopes.length
-
- scopes.each do |scope|
- locals = nil
- forwarding = 0
-
- case scope
- when Array
- locals = scope
- when Scope
- locals = scope.locals
-
- scope.forwarding.each do |forward|
- case forward
- when :* then forwarding |= 0x1
- when :** then forwarding |= 0x2
- when :& then forwarding |= 0x4
- when :"..." then forwarding |= 0x8
- else raise ArgumentError, "invalid forwarding value: #{forward}"
- end
- end
- else
- raise TypeError, "wrong argument type #{scope.class.inspect} (expected Array or Prism::Scope)"
- end
-
- template << "L"
- values << locals.length
-
- template << "C"
- values << forwarding
-
- locals.each do |local|
- name = local.name
- template << "L"
- values << name.bytesize
-
- template << "A*"
- values << name.b
- end
- end
- else
- values << 0
- end
-
- values.pack(template)
- end
end
# Here we are going to patch StringQuery to put in the class-level methods so
@@ -580,17 +155,17 @@ class StringQuery # :nodoc:
class << self
# Mirrors the C extension's StringQuery::local? method.
def local?(string)
- query(LibRubyParser.pm_string_query_local(string, string.bytesize, string.encoding.name))
+ query(FFICommon.string_query_local(string))
end
# Mirrors the C extension's StringQuery::constant? method.
def constant?(string)
- query(LibRubyParser.pm_string_query_constant(string, string.bytesize, string.encoding.name))
+ query(FFICommon.string_query_constant(string))
end
# Mirrors the C extension's StringQuery::method_name? method.
def method_name?(string)
- query(LibRubyParser.pm_string_query_method_name(string, string.bytesize, string.encoding.name))
+ query(FFICommon.string_query_method_name(string))
end
private
diff --git a/lib/prism/ffi/common.rb b/lib/prism/ffi/common.rb
new file mode 100644
index 0000000000..552810c059
--- /dev/null
+++ b/lib/prism/ffi/common.rb
@@ -0,0 +1,233 @@
+# frozen_string_literal: true
+# :markup: markdown
+# typed: ignore
+
+module Prism
+
+ class Common # :nodoc:
+ def dump(string, options) # :nodoc:
+ with_buffer do |buffer|
+ parse_only(buffer, string, options)
+
+ dumped = buffer.read
+ dumped.freeze if options.fetch(:freeze, false)
+
+ dumped
+ end
+ end
+
+ def parse(string, code, options) # :nodoc:
+ serialized = dump(string, options)
+ Serialize.load_parse(code, serialized, options.fetch(:freeze, false))
+ end
+
+ def lex(string, code, options) # :nodoc:
+ with_buffer do |buffer|
+ lex_only(buffer, string, options)
+ Serialize.load_lex(code, buffer.read, options.fetch(:freeze, false))
+ end
+ end
+
+ # Return the value that should be dumped for the command_line option.
+ def dump_options_command_line(options)
+ command_line = options.fetch(:command_line, "")
+ raise ArgumentError, "command_line must be a string" unless command_line.is_a?(String)
+
+ command_line.each_char.inject(0) do |value, char|
+ case char
+ when "a" then value | 0b000001
+ when "e" then value | 0b000010
+ when "l" then value | 0b000100
+ when "n" then value | 0b001000
+ when "p" then value | 0b010000
+ when "x" then value | 0b100000
+ else raise ArgumentError, "invalid command_line option: #{char}"
+ end
+ end
+ end
+
+ # Return the value that should be dumped for the version option.
+ def dump_options_version(version)
+ case version
+ when "current"
+ version_string_to_number(RUBY_VERSION) || raise(CurrentVersionError, RUBY_VERSION)
+ when "latest", nil
+ 0 # Handled in pm_parser_init
+ when "nearest"
+ dump = version_string_to_number(RUBY_VERSION)
+ return dump if dump
+ if RUBY_VERSION < "3.3"
+ version_string_to_number("3.3")
+ else
+ 0 # Handled in pm_parser_init
+ end
+ else
+ version_string_to_number(version) || raise(ArgumentError, "invalid version: #{version}")
+ end
+ end
+
+ # Converts a version string like "4.0.0" or "4.0" into a number.
+ # Returns nil if the version is unknown.
+ def version_string_to_number(version)
+ case version
+ when /\A3\.3(\.\d+)?\z/
+ 1
+ when /\A3\.4(\.\d+)?\z/
+ 2
+ when /\A3\.5(\.\d+)?\z/, /\A4\.0(\.\d+)?\z/
+ 3
+ when /\A4\.1(\.\d+)?\z/
+ 4
+ end
+ end
+
+ # Convert the given options into a serialized options string.
+ def dump_options(options)
+ template = +""
+ values = []
+
+ template << "L"
+ if (filepath = options[:filepath])
+ values.push(filepath.bytesize, filepath.b)
+ template << "A*"
+ else
+ values << 0
+ end
+
+ template << "l"
+ values << options.fetch(:line, 1)
+
+ template << "L"
+ if (encoding = options[:encoding])
+ name = encoding.is_a?(Encoding) ? encoding.name : encoding
+ values.push(name.bytesize, name.b)
+ template << "A*"
+ else
+ values << 0
+ end
+
+ template << "C"
+ values << (options.fetch(:frozen_string_literal, false) ? 1 : 0)
+
+ template << "C"
+ values << dump_options_command_line(options)
+
+ template << "C"
+ values << dump_options_version(options[:version])
+
+ template << "C"
+ values << (options[:encoding] == false ? 1 : 0)
+
+ template << "C"
+ values << (options.fetch(:main_script, false) ? 1 : 0)
+
+ template << "C"
+ values << (options.fetch(:partial_script, false) ? 1 : 0)
+
+ template << "C"
+ values << (options.fetch(:freeze, false) ? 1 : 0)
+
+ template << "L"
+ if (scopes = options[:scopes])
+ values << scopes.length
+
+ scopes.each do |scope|
+ locals = nil
+ forwarding = 0
+
+ case scope
+ when Array
+ locals = scope
+ when Scope
+ locals = scope.locals
+
+ scope.forwarding.each do |forward|
+ case forward
+ when :* then forwarding |= 0x1
+ when :** then forwarding |= 0x2
+ when :& then forwarding |= 0x4
+ when :"..." then forwarding |= 0x8
+ else raise ArgumentError, "invalid forwarding value: #{forward}"
+ end
+ end
+ else
+ raise TypeError, "wrong argument type #{scope.class.inspect} (expected Array or Prism::Scope)"
+ end
+
+ template << "L"
+ values << locals.length
+
+ template << "C"
+ values << forwarding
+
+ locals.each do |local|
+ name = local.name
+ template << "L"
+ values << name.bytesize
+
+ template << "A*"
+ values << name.b
+ end
+ end
+ else
+ values << 0
+ end
+
+ values.pack(template)
+ end
+
+ # Required APIs below
+
+ def version # :nodoc:
+ raise NotImplementedError
+ end
+
+ def with_buffer(&b) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def with_string(string, &b) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def with_file(string, &b) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def lex_only(buffer, string, options) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def parse_only(buffer, string, options) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def parse_stream(buffer, callback, eof_callback, options, source) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def parse_comments(string, code, options) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def parse_lex(string, code, options) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def parse_file_success(string, options) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def string_query_method_name(string) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def string_query_constant(string) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def string_query_local(string) # :nodoc:
+ raise NotImplementedError
+ end
+ end
+end
diff --git a/lib/prism/ffi/native_ffi.rb b/lib/prism/ffi/native_ffi.rb
new file mode 100644
index 0000000000..c4fbbcb660
--- /dev/null
+++ b/lib/prism/ffi/native_ffi.rb
@@ -0,0 +1,323 @@
+# frozen_string_literal: true
+# :markup: markdown
+# --
+# typed: ignore
+
+require "rbconfig"
+require "ffi"
+
+# We want to eagerly load this file if there are Ractors so that it does not get
+# autoloaded from within a non-main Ractor.
+require "prism/serialize" if defined?(Ractor)
+
+module Prism # :nodoc:
+ module LibRubyParser # :nodoc:
+ extend FFI::Library
+
+ # Define the library that we will be pulling functions from. Note that this
+ # must align with the build shared library from make/rake.
+ libprism_in_build = File.expand_path("../../../build/libprism.#{RbConfig::CONFIG["SOEXT"]}", __dir__)
+ libprism_in_libdir = "#{RbConfig::CONFIG["libdir"]}/prism/libprism.#{RbConfig::CONFIG["SOEXT"]}"
+
+ if File.exist?(libprism_in_build)
+ INCLUDE_DIR = File.expand_path("../../../include", __dir__)
+ ffi_lib libprism_in_build
+ else
+ INCLUDE_DIR = "#{RbConfig::CONFIG["libdir"]}/prism/include"
+ ffi_lib libprism_in_libdir
+ end
+
+ # Convert a native C type declaration into a symbol that FFI understands.
+ # For example:
+ #
+ # const char * -> :pointer
+ # bool -> :bool
+ # size_t -> :size_t
+ # void -> :void
+ #
+ def self.resolve_type(type, callbacks)
+ type = type.strip
+
+ if !type.end_with?("*")
+ type.delete_prefix("const ").to_sym
+ else
+ type = type.delete_suffix("*").rstrip
+ callbacks.include?(type.to_sym) ? type.to_sym : :pointer
+ end
+ end
+
+ # Read through the given header file and find the declaration of each of the
+ # given functions. For each one, define a function with the same name and
+ # signature as the C function.
+ def self.load_exported_functions_from(header, *functions, callbacks)
+ File.foreach("#{INCLUDE_DIR}/#{header}") do |line|
+ # We only want to attempt to load exported functions.
+ next unless line.start_with?("PRISM_EXPORTED_FUNCTION ")
+
+ # We only want to load the functions that we are interested in.
+ next unless functions.any? { |function| line.include?(function) }
+
+ # Strip trailing attributes (PRISM_NODISCARD, PRISM_NONNULL(...), etc.)
+ line = line.sub(/\)(\s+PRISM_\w+(?:\([^)]*\))?)+\s*;/, ");")
+
+ # Parse the function declaration.
+ unless /^PRISM_EXPORTED_FUNCTION (?.+) (?\w+)\((?.+)\);$/ =~ line
+ raise "Could not parse #{line}"
+ end
+
+ # Delete the function from the list of functions we are looking for to
+ # mark it as having been found.
+ functions.delete(name)
+
+ # Split up the argument types into an array, ensure we handle the case
+ # where there are no arguments (by explicit void).
+ arg_types = arg_types.split(",").map(&:strip)
+ arg_types = [] if arg_types == %w[void]
+
+ # Resolve the type of the argument by dropping the name of the argument
+ # first if it is present.
+ arg_types.map! { |type| resolve_type(type.sub(/\w+$/, ""), callbacks) }
+
+ # Attach the function using the FFI library.
+ attach_function name, arg_types, resolve_type(return_type, [])
+ end
+
+ # If we didn't find all of the functions, raise an error.
+ raise "Could not find functions #{functions.inspect}" unless functions.empty?
+ end
+
+ callback :pm_source_stream_fgets_t, [:pointer, :int, :pointer], :pointer
+ callback :pm_source_stream_feof_t, [:pointer], :int
+ pm_source_init_result_values = %i[PM_SOURCE_INIT_SUCCESS PM_SOURCE_INIT_ERROR_GENERIC PM_SOURCE_INIT_ERROR_DIRECTORY PM_SOURCE_INIT_ERROR_NON_REGULAR]
+ enum :pm_source_init_result_t, pm_source_init_result_values
+ enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE]
+
+ # Ractor-safe lookup table for pm_source_init_result_t, since FFI's
+ # enum_type accesses module instance variables that are not shareable.
+ SOURCE_INIT_RESULT = pm_source_init_result_values.freeze
+
+ load_exported_functions_from(
+ "prism/version.h",
+ "pm_version",
+ []
+ )
+
+ load_exported_functions_from(
+ "prism/serialize.h",
+ "pm_serialize_parse",
+ "pm_serialize_parse_stream",
+ "pm_serialize_parse_comments",
+ "pm_serialize_lex",
+ "pm_serialize_parse_lex",
+ "pm_serialize_parse_success_p",
+ []
+ )
+
+ load_exported_functions_from(
+ "prism/string_query.h",
+ "pm_string_query_local",
+ "pm_string_query_constant",
+ "pm_string_query_method_name",
+ []
+ )
+
+ load_exported_functions_from(
+ "prism/buffer.h",
+ "pm_buffer_new",
+ "pm_buffer_value",
+ "pm_buffer_length",
+ "pm_buffer_free",
+ []
+ )
+
+ load_exported_functions_from(
+ "prism/source.h",
+ "pm_source_file_new",
+ "pm_source_mapped_new",
+ "pm_source_stream_new",
+ "pm_source_free",
+ "pm_source_source",
+ "pm_source_length",
+ [:pm_source_stream_fgets_t, :pm_source_stream_feof_t]
+ )
+
+ # This object represents a pm_buffer_t. We only use it as an opaque pointer,
+ # so it doesn't need to know the fields of pm_buffer_t.
+ class NativeBuffer # :nodoc:
+ attr_reader :pointer
+
+ def initialize(pointer)
+ @pointer = pointer
+ end
+
+ def value
+ LibRubyParser.pm_buffer_value(pointer)
+ end
+
+ def length
+ LibRubyParser.pm_buffer_length(pointer)
+ end
+
+ def read
+ value.read_string(length)
+ end
+
+ # Initialize a new buffer and yield it to the block. The buffer will be
+ # automatically freed when the block returns.
+ def self.with
+ buffer = LibRubyParser.pm_buffer_new
+ raise unless buffer
+
+ begin
+ yield new(buffer)
+ ensure
+ LibRubyParser.pm_buffer_free(buffer)
+ end
+ end
+ end
+
+ # This object represents source code to be parsed. For strings it wraps a
+ # pointer directly; for files it uses a pm_source_t under the hood.
+ class NativeSource # :nodoc:
+ PLATFORM_EXPECTS_UTF8 =
+ RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i)
+
+ attr_reader :pointer, :length
+
+ def initialize(pointer, length, from_string)
+ @pointer = pointer
+ @length = length
+ @from_string = from_string
+ end
+
+ def read
+ raise "should use the original String instead" if @from_string
+ @pointer.read_string(@length)
+ end
+
+ # Yields a PrismSource backed by the given string to the block.
+ def self.with_string(string)
+ raise TypeError unless string.is_a?(String)
+
+ length = string.bytesize
+ # + 1 to never get an address of 0, which pm_parser_init() asserts
+ FFI::MemoryPointer.new(:char, length + 1, false) do |pointer|
+ pointer.write_string(string)
+ # since we have the extra byte we might as well \0-terminate
+ pointer.put_char(length, 0)
+ return yield new(pointer, length, true)
+ end
+ end
+
+ # Yields a PrismSource to the given block, backed by a pm_source_t.
+ def self.with_file(filepath)
+ raise TypeError unless filepath.is_a?(String)
+
+ # On Windows and Mac, it's expected that filepaths will be encoded in
+ # UTF-8. If they are not, we need to convert them to UTF-8 before
+ # passing them into pm_source_mapped_new.
+ if PLATFORM_EXPECTS_UTF8 && (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8
+ filepath = filepath.encode(Encoding::UTF_8)
+ end
+
+ FFI::MemoryPointer.new(:int) do |result_ptr|
+ pm_source = LibRubyParser.pm_source_mapped_new(filepath, 0, result_ptr)
+
+ case SOURCE_INIT_RESULT[result_ptr.read_int]
+ when :PM_SOURCE_INIT_SUCCESS
+ pointer = LibRubyParser.pm_source_source(pm_source)
+ length = LibRubyParser.pm_source_length(pm_source)
+ return yield new(pointer, length, false)
+ when :PM_SOURCE_INIT_ERROR_GENERIC
+ raise SystemCallError.new(filepath, FFI.errno)
+ when :PM_SOURCE_INIT_ERROR_DIRECTORY
+ raise Errno::EISDIR.new(filepath)
+ when :PM_SOURCE_INIT_ERROR_NON_REGULAR
+ # Fall back to reading the file through Ruby IO for non-regular
+ # files (pipes, character devices, etc.)
+ return with_string(File.read(filepath)) { |string| yield string }
+ else
+ raise "Unknown error initializing pm_source_t: #{result_ptr.read_int}"
+ end
+ ensure
+ LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null?
+ end
+ end
+ end
+ end
+
+ # Mark the LibRubyParser module as private as it should only be called through
+ # the prism module.
+ private_constant :LibRubyParser
+
+ class NativeCommon < Common # :nodoc:
+
+ # The version constant is set by reading the result of calling pm_version.
+ def version
+ LibRubyParser.pm_version.read_string.freeze
+ end
+
+ def with_buffer(&b) # :nodoc:
+ LibRubyParser::NativeBuffer.with(&b)
+ end
+
+ def with_string(string, &b) # :nodoc:
+ LibRubyParser::NativeSource.with_string(string, &b)
+ end
+
+ def with_file(string, &b) # :nodoc:
+ LibRubyParser::NativeSource.with_file(string, &b)
+ end
+
+ def lex_only(buffer, string, options) # :nodoc:
+ LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
+ end
+
+ def parse_only(buffer, string, options) # :nodoc:
+ LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
+ end
+
+ def parse_stream(buffer, callback, eof_callback, options, source) # :nodoc:
+ pm_source = LibRubyParser.pm_source_stream_new(nil, callback, eof_callback)
+ begin
+ LibRubyParser.pm_serialize_parse_stream(buffer.pointer, pm_source, dump_options(options))
+ Prism.load(source, buffer.read, options.fetch(:freeze, false))
+ ensure
+ LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null?
+ end
+ end
+
+ def parse_comments(string, code, options) # :nodoc:
+ with_buffer do |buffer|
+ LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options))
+ Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false))
+ end
+ end
+
+ def parse_lex(string, code, options) # :nodoc:
+ with_buffer do |buffer|
+ LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
+ Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false))
+ end
+ end
+
+ def parse_file_success(string, options) # :nodoc:
+ LibRubyParser.pm_serialize_parse_success_p(string.pointer, string.length, dump_options(options))
+ end
+
+ def string_query_method_name(string) # :nodoc:
+ LibRubyParser.pm_string_query_method_name(string, string.bytesize, string.encoding.name)
+ end
+
+ def string_query_constant(string) # :nodoc:
+ LibRubyParser.pm_string_query_constant(string, string.bytesize, string.encoding.name)
+ end
+
+ def string_query_local(string) # :nodoc:
+ LibRubyParser.pm_string_query_local(string, string.bytesize, string.encoding.name)
+ end
+ end
+
+ FFICommon = NativeCommon.new.freeze
+ private_constant(:FFICommon)
+end
diff --git a/lib/prism/ffi/wasm_ffi.rb b/lib/prism/ffi/wasm_ffi.rb
new file mode 100644
index 0000000000..560001fcb0
--- /dev/null
+++ b/lib/prism/ffi/wasm_ffi.rb
@@ -0,0 +1,94 @@
+# frozen_string_literal: true
+# :markup: markdown
+# --
+# typed: ignore
+
+require "rbconfig"
+require "ffi"
+
+# We want to eagerly load this file if there are Ractors so that it does not get
+# autoloaded from within a non-main Ractor.
+require "prism/serialize" if defined?(Ractor)
+
+# Load the prism-parser-wasm jar
+require 'jar-dependencies'
+require_jar('org.ruby-lang', 'prism-parser-wasm-full', '0.0.2-SNAPSHOT')
+require_jar('com.dylibso.chicory', 'runtime', '1.6.1')
+require_jar('com.dylibso.chicory', 'wasi', '1.6.1')
+require_jar('com.dylibso.chicory', 'wasm', '1.6.1')
+require_jar('com.dylibso.chicory', 'log', '1.6.1')
+
+module Prism # :nodoc:
+ class WASMCommon < Common # :nodoc:
+ java_import org.ruby_lang.prism.wasm.full.Prism
+
+ # TODO: concurrency
+ PRISM = org.ruby_lang.prism.wasm.full.Prism.new
+
+ def version
+ # The version constant is set by reading the result of calling pm_version.
+ PRISM.version
+ end
+
+ def with_buffer(&b) # :nodoc:
+ buffer = Prism::Buffer.new
+ begin
+ b.call(buffer)
+ ensure
+ buffer.close
+ end
+ end
+
+ def with_string(string, &b) # :nodoc:
+ source = Prism::Source.new(string.to_java_bytes)
+ begin
+ b.call(source)
+ ensure
+ source.close
+ end
+ end
+
+ def with_file(string, &b) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def lex_only(buffer, string, options) # :nodoc:
+ String.from_java_bytes(Prism.lex(buffer, string, dump_options(options)))
+ end
+
+ def parse_only(buffer, string, options) # :nodoc:
+ String.from_java_bytes(Prism.lex(buffer, string, dump_options(options)))
+ end
+
+ def parse_stream(buffer, callback, eof_callback, options, source) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def parse_comments(string, code, options) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def parse_lex(string, code, options) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def parse_file_success(string, options) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def string_query_method_name(string) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def string_query_constant(string) # :nodoc:
+ raise NotImplementedError
+ end
+
+ def string_query_local(string) # :nodoc:
+ raise NotImplementedError
+ end
+ end
+
+ FFICommon = WASMCommon.new.freeze
+ private_constant(:FFICommon)
+end
diff --git a/prism.gemspec b/prism.gemspec
index aac056b3f8..449b9c052c 100644
--- a/prism.gemspec
+++ b/prism.gemspec
@@ -113,6 +113,9 @@ Gem::Specification.new do |spec|
"lib/prism/dot_visitor.rb",
"lib/prism/dsl.rb",
"lib/prism/ffi.rb",
+ "lib/prism/ffi/common.rb",
+ "lib/prism/ffi/native_ffi.rb",
+ "lib/prism/ffi/wasm_ffi.rb",
"lib/prism/inspect_visitor.rb",
"lib/prism/lex_compat.rb",
"lib/prism/mutation_compiler.rb",
diff --git a/rakelib/serialization.rake b/rakelib/serialization.rake
index 516e8fe5ba..65c382fcc3 100644
--- a/rakelib/serialization.rake
+++ b/rakelib/serialization.rake
@@ -1,12 +1,9 @@
# frozen_string_literal: true
task "test:java_loader" do
- # Recompile with PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1
- # Due to some JRuby bug this does not get propagated to the compile task, so require the caller to set the env var
- # ENV["PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS"] = "1"
- raise "this task requires $SERIALIZE_ONLY_SEMANTICS_FIELDS to be set" unless ENV["PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS"]
-
Rake::Task["clobber"].invoke
+ # All Java API consumers want semantic-only build
+ ENV["CFLAGS"] = "-DPRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1"
Rake::Task["test:java_loader:internal"].invoke
end
diff --git a/src/prism.c b/src/prism.c
index 72c49da6f2..61233b5d43 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -22843,7 +22843,11 @@ pm_serialize_header(pm_buffer_t *buffer) {
pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
- pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
+ #ifdef PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
+ pm_buffer_append_byte(buffer, 1);
+ #else
+ pm_buffer_append_byte(buffer, 0);
+ #endif
}
/**
diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb
index 3b3be25e76..5306c0abe4 100644
--- a/templates/include/prism/ast.h.erb
+++ b/templates/include/prism/ast.h.erb
@@ -267,12 +267,4 @@ typedef enum pm_<%= flag.human %> {
PRISM_EXPORTED_FUNCTION pm_<%= node.human %>_t * pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>);
<%- end -%>
-/**
- * When we're serializing to Java, we want to skip serializing the location
- * fields as they won't be used by JRuby or TruffleRuby. This boolean allows us
- * to specify that through the environment. It will never be true except for in
- * those build systems.
- */
-#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0 %>
-
#endif
diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb
index 3d9811e5db..2e2fa65251 100644
--- a/templates/src/serialize.c.erb
+++ b/templates/src/serialize.c.erb
@@ -91,8 +91,12 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
size_t length_offset = buffer->length;
pm_buffer_append_string(buffer, "\0\0\0\0", 4); /* consume 4 bytes, updated below */
<%- end -%>
- <%- unless Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS && !node.flags -%>
+ <%- if node.flags -%>
pm_buffer_append_varuint(buffer, (uint32_t) node->flags);
+ <%- else -%>
+ #ifndef PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
+ pm_buffer_append_varuint(buffer, (uint32_t) node->flags);
+ #endif
<%- end -%>
<%- node.fields.each do |field| -%>
<%- case field -%>
@@ -121,17 +125,25 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.ids[index]));
}
<%- when Prism::Template::LocationField -%>
- <%- if field.should_be_serialized? -%>
+ <%- unless field.semantic_field? -%>
+ #ifndef PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
+ <%- end -%>
pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+ <%- unless field.semantic_field? -%>
+ #endif
<%- end -%>
<%- when Prism::Template::OptionalLocationField -%>
- <%- if field.should_be_serialized? -%>
+ <%- unless field.semantic_field? -%>
+ #ifndef PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
+ <%- end -%>
if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.length == 0) {
pm_buffer_append_byte(buffer, 0);
} else {
pm_buffer_append_byte(buffer, 1);
pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
}
+ <%- unless field.semantic_field? -%>
+ #endif
<%- end -%>
<%- when Prism::Template::UInt8Field -%>
pm_buffer_append_byte(buffer, ((pm_<%= node.human %>_t *)node)-><%= field.name %>);
@@ -261,9 +273,9 @@ pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) {
pm_serialize_encoding(parser->encoding, buffer);
pm_buffer_append_varsint(buffer, parser->start_line);
pm_serialize_line_offset_list(&parser->line_offsets, buffer);
-<%- unless Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS -%>
- pm_serialize_comment_list(&parser->comment_list, buffer);
-<%- end -%>
+ #ifndef PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
+ pm_serialize_comment_list(&parser->comment_list, buffer);
+ #endif
pm_serialize_magic_comment_list(&parser->magic_comment_list, buffer);
pm_serialize_data_loc(parser, buffer);
pm_serialize_diagnostic_list(&parser->error_list, buffer);
diff --git a/templates/template.rb b/templates/template.rb
index 7638c9c058..92140851a3 100755
--- a/templates/template.rb
+++ b/templates/template.rb
@@ -7,12 +7,11 @@
module Prism
module Template # :nodoc: all
- SERIALIZE_ONLY_SEMANTICS_FIELDS = ENV.fetch("PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS", false)
CHECK_FIELD_KIND = ENV.fetch("CHECK_FIELD_KIND", false)
JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "default"
JAVA_IDENTIFIER_TYPE = JAVA_BACKEND == "truffleruby" ? "String" : "byte[]"
- INCLUDE_NODE_ID = !SERIALIZE_ONLY_SEMANTICS_FIELDS || JAVA_BACKEND == "jruby"
+ INCLUDE_NODE_ID = ENV.fetch("PRISM_INCLUDE_NODE_ID", "true") != "false"
COMMON_FLAGS_COUNT = 2
@@ -95,10 +94,6 @@ def each_comment_java_line(&block)
def semantic_field?
true
end
-
- def should_be_serialized?
- SERIALIZE_ONLY_SEMANTICS_FIELDS ? semantic_field? : true
- end
end
# Some node fields can be specialized if they point to a specific kind of