From 89fce054c7ae19f9c47b865a9718c2f4e9b32b21 Mon Sep 17 00:00:00 2001 From: Seong Yong-ju Date: Wed, 28 Jan 2026 21:45:00 +0900 Subject: [PATCH] refactor: modularize rust bindings following Ruby's structure Reorganize ruby-prism crate to mirror lib/prism/ structure: - node.rs: NodeList, ConstantList, Integer (like node.rb) - parse_result/mod.rs: ParseResult, Location (like parse_result.rb) - parse_result/comments.rs: Comment, MagicComment (like parse_result/comments.rb) - parse_result/diagnostics.rs: Diagnostic (like parse_result/errors.rb) Co-Authored-By: Claude Opus 4.5 --- rust/ruby-prism/src/lib.rs | 651 +----------------- rust/ruby-prism/src/node.rs | 303 ++++++++ rust/ruby-prism/src/parse_result/comments.rs | 143 ++++ .../src/parse_result/diagnostics.rs | 71 ++ rust/ruby-prism/src/parse_result/mod.rs | 188 +++++ 5 files changed, 712 insertions(+), 644 deletions(-) create mode 100644 rust/ruby-prism/src/node.rs create mode 100644 rust/ruby-prism/src/parse_result/comments.rs create mode 100644 rust/ruby-prism/src/parse_result/diagnostics.rs create mode 100644 rust/ruby-prism/src/parse_result/mod.rs diff --git a/rust/ruby-prism/src/lib.rs b/rust/ruby-prism/src/lib.rs index c96d66a956..6824768193 100644 --- a/rust/ruby-prism/src/lib.rs +++ b/rust/ruby-prism/src/lib.rs @@ -13,654 +13,17 @@ mod bindings { include!(concat!(env!("OUT_DIR"), "/bindings.rs")); } -use std::ffi::{c_char, CStr}; -use std::marker::PhantomData; +mod node; +mod parse_result; + use std::mem::MaybeUninit; use std::ptr::NonNull; pub use self::bindings::*; -use ruby_prism_sys::{pm_comment_t, pm_comment_type_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_parse, pm_parser_free, pm_parser_init, pm_parser_t}; - -/// A range in the source file, represented as a start offset and length. -pub struct Location<'pr> { - parser: NonNull, - pub(crate) start: u32, - pub(crate) length: u32, - marker: PhantomData<&'pr [u8]>, -} - -impl<'pr> Location<'pr> { - /// Returns a byte slice for the range. - #[must_use] - pub fn as_slice(&self) -> &'pr [u8] { - unsafe { - let parser_start = (*self.parser.as_ptr()).start; - std::slice::from_raw_parts(parser_start.add(self.start as usize), self.length as usize) - } - } - - /// Return a Location from the given `pm_location_t`. - #[must_use] - pub(crate) const fn new(parser: NonNull, location: &'pr pm_location_t) -> Self { - Location { - parser, - start: location.start, - length: location.length, - marker: PhantomData, - } - } - - /// Returns the end offset from the beginning of the parsed source. - #[must_use] - pub const fn end(&self) -> u32 { - self.start + self.length - } - - /// Return a Location starting at self and ending at the end of other. - /// Returns None if both locations did not originate from the same parser, - /// or if self starts after other. - #[must_use] - pub fn join(&self, other: &Self) -> Option { - if self.parser != other.parser || self.start > other.start { - None - } else { - Some(Location { - parser: self.parser, - start: self.start, - length: other.end() - self.start, - marker: PhantomData, - }) - } - } -} - -impl std::fmt::Debug for Location<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let slice: &[u8] = self.as_slice(); - - let mut visible = String::new(); - visible.push('"'); - - for &byte in slice { - let part: Vec = std::ascii::escape_default(byte).collect(); - visible.push_str(std::str::from_utf8(&part).unwrap()); - } - - visible.push('"'); - write!(f, "{visible}") - } -} - -/// An iterator over the nodes in a list. -pub struct NodeListIter<'pr> { - parser: NonNull, - pointer: NonNull, - index: usize, - marker: PhantomData<&'pr mut pm_node_list>, -} - -impl<'pr> Iterator for NodeListIter<'pr> { - type Item = Node<'pr>; - - fn next(&mut self) -> Option { - if self.index >= unsafe { self.pointer.as_ref().size } { - None - } else { - let node: *mut pm_node_t = unsafe { *(self.pointer.as_ref().nodes.add(self.index)) }; - self.index += 1; - Some(Node::new(self.parser, node)) - } - } -} - -/// A list of nodes. -pub struct NodeList<'pr> { - parser: NonNull, - pointer: NonNull, - marker: PhantomData<&'pr mut pm_node_list>, -} - -impl<'pr> NodeList<'pr> { - unsafe fn at(&self, index: usize) -> Node<'pr> { - let node: *mut pm_node_t = *(self.pointer.as_ref().nodes.add(index)); - Node::new(self.parser, node) - } - - /// Returns an iterator over the nodes. - #[must_use] - pub const fn iter(&self) -> NodeListIter<'pr> { - NodeListIter { - parser: self.parser, - pointer: self.pointer, - index: 0, - marker: PhantomData, - } - } - - /// Returns the length of the list. - #[must_use] - pub const fn len(&self) -> usize { - unsafe { self.pointer.as_ref().size } - } - - /// Returns whether the list is empty. - #[must_use] - pub const fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns the first element of the list, or `None` if it is empty. - #[must_use] - pub fn first(&self) -> Option> { - if self.is_empty() { - None - } else { - Some(unsafe { self.at(0) }) - } - } - - /// Returns the last element of the list, or `None` if it is empty. - #[must_use] - pub fn last(&self) -> Option> { - if self.is_empty() { - None - } else { - Some(unsafe { self.at(self.len() - 1) }) - } - } -} - -impl<'pr> IntoIterator for &NodeList<'pr> { - type Item = Node<'pr>; - type IntoIter = NodeListIter<'pr>; - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} +pub use self::node::{ConstantId, ConstantList, ConstantListIter, Integer, NodeList, NodeListIter}; +pub use self::parse_result::{Comment, CommentType, Comments, Diagnostic, Diagnostics, Location, MagicComment, MagicComments, ParseResult}; -impl std::fmt::Debug for NodeList<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.iter().collect::>()) - } -} - -/// A handle for a constant ID. -pub struct ConstantId<'pr> { - parser: NonNull, - id: pm_constant_id_t, - marker: PhantomData<&'pr mut pm_constant_id_t>, -} - -impl<'pr> ConstantId<'pr> { - const fn new(parser: NonNull, id: pm_constant_id_t) -> Self { - ConstantId { parser, id, marker: PhantomData } - } - - /// Returns a byte slice for the constant ID. - /// - /// # Panics - /// - /// Panics if the constant ID is not found in the constant pool. - #[must_use] - pub fn as_slice(&self) -> &'pr [u8] { - unsafe { - let pool = &(*self.parser.as_ptr()).constant_pool; - let constant = &(*pool.constants.add((self.id - 1).try_into().unwrap())); - std::slice::from_raw_parts(constant.start, constant.length) - } - } -} - -impl std::fmt::Debug for ConstantId<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.id) - } -} - -/// An iterator over the constants in a list. -pub struct ConstantListIter<'pr> { - parser: NonNull, - pointer: NonNull, - index: usize, - marker: PhantomData<&'pr mut pm_constant_id_list_t>, -} - -impl<'pr> Iterator for ConstantListIter<'pr> { - type Item = ConstantId<'pr>; - - fn next(&mut self) -> Option { - if self.index >= unsafe { self.pointer.as_ref().size } { - None - } else { - let constant_id: pm_constant_id_t = unsafe { *(self.pointer.as_ref().ids.add(self.index)) }; - self.index += 1; - Some(ConstantId::new(self.parser, constant_id)) - } - } -} - -/// A list of constants. -pub struct ConstantList<'pr> { - /// The raw pointer to the parser where this list came from. - parser: NonNull, - - /// The raw pointer to the list allocated by prism. - pointer: NonNull, - - /// The marker to indicate the lifetime of the pointer. - marker: PhantomData<&'pr mut pm_constant_id_list_t>, -} - -impl<'pr> ConstantList<'pr> { - const unsafe fn at(&self, index: usize) -> ConstantId<'pr> { - let constant_id: pm_constant_id_t = *(self.pointer.as_ref().ids.add(index)); - ConstantId::new(self.parser, constant_id) - } - - /// Returns an iterator over the constants in the list. - #[must_use] - pub const fn iter(&self) -> ConstantListIter<'pr> { - ConstantListIter { - parser: self.parser, - pointer: self.pointer, - index: 0, - marker: PhantomData, - } - } - - /// Returns the length of the list. - #[must_use] - pub const fn len(&self) -> usize { - unsafe { self.pointer.as_ref().size } - } - - /// Returns whether the list is empty. - #[must_use] - pub const fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns the first element of the list, or `None` if it is empty. - #[must_use] - pub const fn first(&self) -> Option> { - if self.is_empty() { - None - } else { - Some(unsafe { self.at(0) }) - } - } - - /// Returns the last element of the list, or `None` if it is empty. - #[must_use] - pub const fn last(&self) -> Option> { - if self.is_empty() { - None - } else { - Some(unsafe { self.at(self.len() - 1) }) - } - } -} - -impl<'pr> IntoIterator for &ConstantList<'pr> { - type Item = ConstantId<'pr>; - type IntoIter = ConstantListIter<'pr>; - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - -impl std::fmt::Debug for ConstantList<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.iter().collect::>()) - } -} - -/// A handle for an arbitarily-sized integer. -pub struct Integer<'pr> { - /// The raw pointer to the integer allocated by prism. - pointer: *const pm_integer_t, - - /// The marker to indicate the lifetime of the pointer. - marker: PhantomData<&'pr mut pm_constant_id_t>, -} - -impl Integer<'_> { - const fn new(pointer: *const pm_integer_t) -> Self { - Integer { pointer, marker: PhantomData } - } - - /// Returns the sign and the u32 digits representation of the integer, - /// ordered least significant digit first. - #[must_use] - pub const fn to_u32_digits(&self) -> (bool, &[u32]) { - let negative = unsafe { (*self.pointer).negative }; - let length = unsafe { (*self.pointer).length }; - let values = unsafe { (*self.pointer).values }; - - if values.is_null() { - let value_ptr = unsafe { std::ptr::addr_of!((*self.pointer).value) }; - let slice = unsafe { std::slice::from_raw_parts(value_ptr, 1) }; - (negative, slice) - } else { - let slice = unsafe { std::slice::from_raw_parts(values, length) }; - (negative, slice) - } - } -} - -impl std::fmt::Debug for Integer<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.pointer) - } -} - -impl TryInto for Integer<'_> { - type Error = (); - - fn try_into(self) -> Result { - let negative = unsafe { (*self.pointer).negative }; - let length = unsafe { (*self.pointer).length }; - - if length == 0 { - i32::try_from(unsafe { (*self.pointer).value }).map_or(Err(()), |value| if negative { Ok(-value) } else { Ok(value) }) - } else { - Err(()) - } - } -} - -/// A diagnostic message that came back from the parser. -#[derive(Debug)] -pub struct Diagnostic<'pr> { - diag: NonNull, - parser: NonNull, - marker: PhantomData<&'pr pm_diagnostic_t>, -} - -impl<'pr> Diagnostic<'pr> { - /// Returns the message associated with the diagnostic. - /// - /// # Panics - /// - /// Panics if the message is not able to be converted into a `CStr`. - /// - #[must_use] - pub fn message(&self) -> &str { - unsafe { - let message: *mut c_char = self.diag.as_ref().message.cast_mut(); - CStr::from_ptr(message).to_str().expect("prism allows only UTF-8 for diagnostics.") - } - } - - /// The location of the diagnostic in the source. - #[must_use] - pub const fn location(&self) -> Location<'pr> { - Location::new(self.parser, unsafe { &self.diag.as_ref().location }) - } -} - -/// A comment that was found during parsing. -#[derive(Debug)] -pub struct Comment<'pr> { - content: NonNull, - parser: NonNull, - marker: PhantomData<&'pr pm_comment_t>, -} - -/// The type of the comment -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum CommentType { - /// `InlineComment` corresponds to comments that start with #. - InlineComment, - /// `EmbDocComment` corresponds to comments that are surrounded by =begin and =end. - EmbDocComment, -} - -impl<'pr> Comment<'pr> { - /// Returns the text of the comment. - /// - /// # Panics - /// Panics if the end offset is not greater than the start offset. - #[must_use] - pub fn text(&self) -> &[u8] { - self.location().as_slice() - } - - /// Returns the type of the comment. - #[must_use] - pub fn type_(&self) -> CommentType { - let type_ = unsafe { self.content.as_ref().type_ }; - if type_ == pm_comment_type_t::PM_COMMENT_EMBDOC { - CommentType::EmbDocComment - } else { - CommentType::InlineComment - } - } - - /// The location of the comment in the source. - #[must_use] - pub const fn location(&self) -> Location<'pr> { - Location::new(self.parser, unsafe { &self.content.as_ref().location }) - } -} - -/// A magic comment that was found during parsing. -#[derive(Debug)] -pub struct MagicComment<'pr> { - parser: NonNull, - comment: NonNull, - marker: PhantomData<&'pr pm_magic_comment_t>, -} - -impl MagicComment<'_> { - /// Returns the text of the comment's key. - #[must_use] - pub const fn key(&self) -> &[u8] { - unsafe { - let start = self.parser.as_ref().start.add(self.comment.as_ref().key.start as usize); - let len = self.comment.as_ref().key.length as usize; - std::slice::from_raw_parts(start, len) - } - } - - /// Returns the text of the comment's value. - #[must_use] - pub const fn value(&self) -> &[u8] { - unsafe { - let start = self.parser.as_ref().start.add(self.comment.as_ref().value.start as usize); - let len = self.comment.as_ref().value.length as usize; - std::slice::from_raw_parts(start, len) - } - } -} - -/// A struct created by the `errors` or `warnings` methods on `ParseResult`. It -/// can be used to iterate over the diagnostics in the parse result. -pub struct Diagnostics<'pr> { - diagnostic: *mut pm_diagnostic_t, - parser: NonNull, - marker: PhantomData<&'pr pm_diagnostic_t>, -} - -impl<'pr> Iterator for Diagnostics<'pr> { - type Item = Diagnostic<'pr>; - - fn next(&mut self) -> Option { - if let Some(diagnostic) = NonNull::new(self.diagnostic) { - let current = Diagnostic { - diag: diagnostic, - parser: self.parser, - marker: PhantomData, - }; - self.diagnostic = unsafe { diagnostic.as_ref().node.next.cast::() }; - Some(current) - } else { - None - } - } -} - -/// A struct created by the `comments` method on `ParseResult`. It can be used -/// to iterate over the comments in the parse result. -pub struct Comments<'pr> { - comment: *mut pm_comment_t, - parser: NonNull, - marker: PhantomData<&'pr pm_comment_t>, -} - -impl<'pr> Iterator for Comments<'pr> { - type Item = Comment<'pr>; - - fn next(&mut self) -> Option { - if let Some(comment) = NonNull::new(self.comment) { - let current = Comment { - content: comment, - parser: self.parser, - marker: PhantomData, - }; - self.comment = unsafe { comment.as_ref().node.next.cast::() }; - Some(current) - } else { - None - } - } -} - -/// A struct created by the `magic_comments` method on `ParseResult`. It can be used -/// to iterate over the magic comments in the parse result. -pub struct MagicComments<'pr> { - parser: NonNull, - comment: *mut pm_magic_comment_t, - marker: PhantomData<&'pr pm_magic_comment_t>, -} - -impl<'pr> Iterator for MagicComments<'pr> { - type Item = MagicComment<'pr>; - - fn next(&mut self) -> Option { - if let Some(comment) = NonNull::new(self.comment) { - let current = MagicComment { parser: self.parser, comment, marker: PhantomData }; - self.comment = unsafe { comment.as_ref().node.next.cast::() }; - Some(current) - } else { - None - } - } -} - -/// The result of parsing a source string. -#[derive(Debug)] -pub struct ParseResult<'pr> { - source: &'pr [u8], - parser: NonNull, - node: NonNull, -} - -impl<'pr> ParseResult<'pr> { - /// Returns the source string that was parsed. - #[must_use] - pub const fn source(&self) -> &'pr [u8] { - self.source - } - - /// Returns whether we found a `frozen_string_literal` magic comment with a true value. - #[must_use] - pub fn frozen_string_literals(&self) -> bool { - unsafe { (*self.parser.as_ptr()).frozen_string_literal == 1 } - } - - /// Returns a slice of the source string that was parsed using the given - /// slice range. - #[must_use] - pub fn as_slice(&self, location: &Location<'pr>) -> &'pr [u8] { - let start = location.start as usize; - let end = start + location.length as usize; - &self.source[start..end] - } - - /// Returns an iterator that can be used to iterate over the errors in the - /// parse result. - #[must_use] - pub fn errors(&self) -> Diagnostics<'_> { - unsafe { - let list = &mut (*self.parser.as_ptr()).error_list; - Diagnostics { - diagnostic: list.head.cast::(), - parser: self.parser, - marker: PhantomData, - } - } - } - - /// Returns an iterator that can be used to iterate over the warnings in the - /// parse result. - #[must_use] - pub fn warnings(&self) -> Diagnostics<'_> { - unsafe { - let list = &mut (*self.parser.as_ptr()).warning_list; - Diagnostics { - diagnostic: list.head.cast::(), - parser: self.parser, - marker: PhantomData, - } - } - } - - /// Returns an iterator that can be used to iterate over the comments in the - /// parse result. - #[must_use] - pub fn comments(&self) -> Comments<'_> { - unsafe { - let list = &mut (*self.parser.as_ptr()).comment_list; - Comments { - comment: list.head.cast::(), - parser: self.parser, - marker: PhantomData, - } - } - } - - /// Returns an iterator that can be used to iterate over the magic comments in the - /// parse result. - #[must_use] - pub fn magic_comments(&self) -> MagicComments<'_> { - unsafe { - let list = &mut (*self.parser.as_ptr()).magic_comment_list; - MagicComments { - parser: self.parser, - comment: list.head.cast::(), - marker: PhantomData, - } - } - } - - /// Returns an optional location of the __END__ marker and the rest of the content of the file. - #[must_use] - pub fn data_loc(&self) -> Option> { - let location = unsafe { &(*self.parser.as_ptr()).data_loc }; - if location.length == 0 { - None - } else { - Some(Location::new(self.parser, location)) - } - } - - /// Returns the root node of the parse result. - #[must_use] - pub fn node(&self) -> Node<'_> { - Node::new(self.parser, self.node.as_ptr()) - } -} - -impl Drop for ParseResult<'_> { - fn drop(&mut self) { - unsafe { - pm_node_destroy(self.parser.as_ptr(), self.node.as_ptr()); - pm_parser_free(self.parser.as_ptr()); - drop(Box::from_raw(self.parser.as_ptr())); - } - } -} +use ruby_prism_sys::{pm_parse, pm_parser_init, pm_parser_t}; /// Parses the given source string and returns a parse result. /// @@ -682,7 +45,7 @@ pub fn parse(source: &[u8]) -> ParseResult<'_> { let node = pm_parse(parser.as_ptr()); let node = NonNull::new_unchecked(node); - ParseResult { source, parser, node } + ParseResult::new(source, parser, node) } } diff --git a/rust/ruby-prism/src/node.rs b/rust/ruby-prism/src/node.rs new file mode 100644 index 0000000000..cf44a119dc --- /dev/null +++ b/rust/ruby-prism/src/node.rs @@ -0,0 +1,303 @@ +//! Node-related types for the prism parser. +//! +//! This module contains types for working with AST nodes, including node lists, +//! constant IDs, and integer values. + +use std::marker::PhantomData; +use std::ptr::NonNull; + +use ruby_prism_sys::{pm_constant_id_list_t, pm_constant_id_t, pm_integer_t, pm_node_list, pm_node_t, pm_parser_t}; + +// Note: The `Node` enum is defined in the generated `bindings.rs` file. +// We import it here via `crate::Node` to avoid circular dependencies. +use crate::Node; + +// ============================================================================ +// NodeList +// ============================================================================ + +/// An iterator over the nodes in a list. +pub struct NodeListIter<'pr> { + pub(crate) parser: NonNull, + pub(crate) pointer: NonNull, + pub(crate) index: usize, + pub(crate) marker: PhantomData<&'pr mut pm_node_list>, +} + +impl<'pr> Iterator for NodeListIter<'pr> { + type Item = Node<'pr>; + + fn next(&mut self) -> Option { + if self.index >= unsafe { self.pointer.as_ref().size } { + None + } else { + let node: *mut pm_node_t = unsafe { *(self.pointer.as_ref().nodes.add(self.index)) }; + self.index += 1; + Some(Node::new(self.parser, node)) + } + } +} + +/// A list of nodes. +pub struct NodeList<'pr> { + pub(crate) parser: NonNull, + pub(crate) pointer: NonNull, + pub(crate) marker: PhantomData<&'pr mut pm_node_list>, +} + +impl<'pr> NodeList<'pr> { + unsafe fn at(&self, index: usize) -> Node<'pr> { + let node: *mut pm_node_t = *(self.pointer.as_ref().nodes.add(index)); + Node::new(self.parser, node) + } + + /// Returns an iterator over the nodes. + #[must_use] + pub const fn iter(&self) -> NodeListIter<'pr> { + NodeListIter { + parser: self.parser, + pointer: self.pointer, + index: 0, + marker: PhantomData, + } + } + + /// Returns the length of the list. + #[must_use] + pub const fn len(&self) -> usize { + unsafe { self.pointer.as_ref().size } + } + + /// Returns whether the list is empty. + #[must_use] + pub const fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the first element of the list, or `None` if it is empty. + #[must_use] + pub fn first(&self) -> Option> { + if self.is_empty() { + None + } else { + Some(unsafe { self.at(0) }) + } + } + + /// Returns the last element of the list, or `None` if it is empty. + #[must_use] + pub fn last(&self) -> Option> { + if self.is_empty() { + None + } else { + Some(unsafe { self.at(self.len() - 1) }) + } + } +} + +impl<'pr> IntoIterator for &NodeList<'pr> { + type Item = Node<'pr>; + type IntoIter = NodeListIter<'pr>; + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl std::fmt::Debug for NodeList<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.iter().collect::>()) + } +} + +// ============================================================================ +// ConstantId / ConstantList +// ============================================================================ + +/// A handle for a constant ID. +pub struct ConstantId<'pr> { + pub(crate) parser: NonNull, + pub(crate) id: pm_constant_id_t, + pub(crate) marker: PhantomData<&'pr mut pm_constant_id_t>, +} + +impl<'pr> ConstantId<'pr> { + pub(crate) const fn new(parser: NonNull, id: pm_constant_id_t) -> Self { + ConstantId { parser, id, marker: PhantomData } + } + + /// Returns a byte slice for the constant ID. + /// + /// # Panics + /// + /// Panics if the constant ID is not found in the constant pool. + #[must_use] + pub fn as_slice(&self) -> &'pr [u8] { + unsafe { + let pool = &(*self.parser.as_ptr()).constant_pool; + let constant = &(*pool.constants.add((self.id - 1).try_into().unwrap())); + std::slice::from_raw_parts(constant.start, constant.length) + } + } +} + +impl std::fmt::Debug for ConstantId<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.id) + } +} + +/// An iterator over the constants in a list. +pub struct ConstantListIter<'pr> { + pub(crate) parser: NonNull, + pub(crate) pointer: NonNull, + pub(crate) index: usize, + pub(crate) marker: PhantomData<&'pr mut pm_constant_id_list_t>, +} + +impl<'pr> Iterator for ConstantListIter<'pr> { + type Item = ConstantId<'pr>; + + fn next(&mut self) -> Option { + if self.index >= unsafe { self.pointer.as_ref().size } { + None + } else { + let constant_id: pm_constant_id_t = unsafe { *(self.pointer.as_ref().ids.add(self.index)) }; + self.index += 1; + Some(ConstantId::new(self.parser, constant_id)) + } + } +} + +/// A list of constants. +pub struct ConstantList<'pr> { + /// The raw pointer to the parser where this list came from. + pub(crate) parser: NonNull, + + /// The raw pointer to the list allocated by prism. + pub(crate) pointer: NonNull, + + /// The marker to indicate the lifetime of the pointer. + pub(crate) marker: PhantomData<&'pr mut pm_constant_id_list_t>, +} + +impl<'pr> ConstantList<'pr> { + const unsafe fn at(&self, index: usize) -> ConstantId<'pr> { + let constant_id: pm_constant_id_t = *(self.pointer.as_ref().ids.add(index)); + ConstantId::new(self.parser, constant_id) + } + + /// Returns an iterator over the constants in the list. + #[must_use] + pub const fn iter(&self) -> ConstantListIter<'pr> { + ConstantListIter { + parser: self.parser, + pointer: self.pointer, + index: 0, + marker: PhantomData, + } + } + + /// Returns the length of the list. + #[must_use] + pub const fn len(&self) -> usize { + unsafe { self.pointer.as_ref().size } + } + + /// Returns whether the list is empty. + #[must_use] + pub const fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the first element of the list, or `None` if it is empty. + #[must_use] + pub const fn first(&self) -> Option> { + if self.is_empty() { + None + } else { + Some(unsafe { self.at(0) }) + } + } + + /// Returns the last element of the list, or `None` if it is empty. + #[must_use] + pub const fn last(&self) -> Option> { + if self.is_empty() { + None + } else { + Some(unsafe { self.at(self.len() - 1) }) + } + } +} + +impl<'pr> IntoIterator for &ConstantList<'pr> { + type Item = ConstantId<'pr>; + type IntoIter = ConstantListIter<'pr>; + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl std::fmt::Debug for ConstantList<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.iter().collect::>()) + } +} + +// ============================================================================ +// Integer +// ============================================================================ + +/// A handle for an arbitrarily-sized integer. +pub struct Integer<'pr> { + /// The raw pointer to the integer allocated by prism. + pub(crate) pointer: *const pm_integer_t, + + /// The marker to indicate the lifetime of the pointer. + pub(crate) marker: PhantomData<&'pr mut pm_constant_id_t>, +} + +impl Integer<'_> { + pub(crate) const fn new(pointer: *const pm_integer_t) -> Self { + Integer { pointer, marker: PhantomData } + } + + /// Returns the sign and the u32 digits representation of the integer, + /// ordered least significant digit first. + #[must_use] + pub const fn to_u32_digits(&self) -> (bool, &[u32]) { + let negative = unsafe { (*self.pointer).negative }; + let length = unsafe { (*self.pointer).length }; + let values = unsafe { (*self.pointer).values }; + + if values.is_null() { + let value_ptr = unsafe { std::ptr::addr_of!((*self.pointer).value) }; + let slice = unsafe { std::slice::from_raw_parts(value_ptr, 1) }; + (negative, slice) + } else { + let slice = unsafe { std::slice::from_raw_parts(values, length) }; + (negative, slice) + } + } +} + +impl std::fmt::Debug for Integer<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.pointer) + } +} + +impl TryInto for Integer<'_> { + type Error = (); + + fn try_into(self) -> Result { + let negative = unsafe { (*self.pointer).negative }; + let length = unsafe { (*self.pointer).length }; + + if length == 0 { + i32::try_from(unsafe { (*self.pointer).value }).map_or(Err(()), |value| if negative { Ok(-value) } else { Ok(value) }) + } else { + Err(()) + } + } +} diff --git a/rust/ruby-prism/src/parse_result/comments.rs b/rust/ruby-prism/src/parse_result/comments.rs new file mode 100644 index 0000000000..767de6330a --- /dev/null +++ b/rust/ruby-prism/src/parse_result/comments.rs @@ -0,0 +1,143 @@ +//! Comment handling for the prism parser. + +use std::marker::PhantomData; +use std::ptr::NonNull; + +use ruby_prism_sys::{pm_comment_t, pm_comment_type_t, pm_magic_comment_t, pm_parser_t}; + +use super::Location; + +/// The type of the comment +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CommentType { + /// `InlineComment` corresponds to comments that start with #. + InlineComment, + /// `EmbDocComment` corresponds to comments that are surrounded by =begin and =end. + EmbDocComment, +} + +/// A comment that was found during parsing. +#[derive(Debug)] +pub struct Comment<'pr> { + content: NonNull, + parser: NonNull, + marker: PhantomData<&'pr pm_comment_t>, +} + +impl<'pr> Comment<'pr> { + /// Returns the text of the comment. + /// + /// # Panics + /// Panics if the end offset is not greater than the start offset. + #[must_use] + pub fn text(&self) -> &[u8] { + self.location().as_slice() + } + + /// Returns the type of the comment. + #[must_use] + pub fn type_(&self) -> CommentType { + let type_ = unsafe { self.content.as_ref().type_ }; + if type_ == pm_comment_type_t::PM_COMMENT_EMBDOC { + CommentType::EmbDocComment + } else { + CommentType::InlineComment + } + } + + /// The location of the comment in the source. + #[must_use] + pub const fn location(&self) -> Location<'pr> { + Location::new(self.parser, unsafe { &self.content.as_ref().location }) + } +} + +/// A struct created by the `comments` method on `ParseResult`. It can be used +/// to iterate over the comments in the parse result. +pub struct Comments<'pr> { + comment: *mut pm_comment_t, + parser: NonNull, + marker: PhantomData<&'pr pm_comment_t>, +} + +impl Comments<'_> { + pub(crate) const fn new(comment: *mut pm_comment_t, parser: NonNull) -> Self { + Comments { comment, parser, marker: PhantomData } + } +} + +impl<'pr> Iterator for Comments<'pr> { + type Item = Comment<'pr>; + + fn next(&mut self) -> Option { + if let Some(comment) = NonNull::new(self.comment) { + let current = Comment { + content: comment, + parser: self.parser, + marker: PhantomData, + }; + self.comment = unsafe { comment.as_ref().node.next.cast::() }; + Some(current) + } else { + None + } + } +} + +/// A magic comment that was found during parsing. +#[derive(Debug)] +pub struct MagicComment<'pr> { + parser: NonNull, + comment: NonNull, + marker: PhantomData<&'pr pm_magic_comment_t>, +} + +impl MagicComment<'_> { + /// Returns the text of the comment's key. + #[must_use] + pub const fn key(&self) -> &[u8] { + unsafe { + let start = self.parser.as_ref().start.add(self.comment.as_ref().key.start as usize); + let len = self.comment.as_ref().key.length as usize; + std::slice::from_raw_parts(start, len) + } + } + + /// Returns the text of the comment's value. + #[must_use] + pub const fn value(&self) -> &[u8] { + unsafe { + let start = self.parser.as_ref().start.add(self.comment.as_ref().value.start as usize); + let len = self.comment.as_ref().value.length as usize; + std::slice::from_raw_parts(start, len) + } + } +} + +/// A struct created by the `magic_comments` method on `ParseResult`. It can be used +/// to iterate over the magic comments in the parse result. +pub struct MagicComments<'pr> { + parser: NonNull, + comment: *mut pm_magic_comment_t, + marker: PhantomData<&'pr pm_magic_comment_t>, +} + +impl MagicComments<'_> { + pub(crate) const fn new(parser: NonNull, comment: *mut pm_magic_comment_t) -> Self { + MagicComments { parser, comment, marker: PhantomData } + } +} + +impl<'pr> Iterator for MagicComments<'pr> { + type Item = MagicComment<'pr>; + + fn next(&mut self) -> Option { + if let Some(comment) = NonNull::new(self.comment) { + let current = MagicComment { parser: self.parser, comment, marker: PhantomData }; + self.comment = unsafe { comment.as_ref().node.next.cast::() }; + Some(current) + } else { + None + } + } +} diff --git a/rust/ruby-prism/src/parse_result/diagnostics.rs b/rust/ruby-prism/src/parse_result/diagnostics.rs new file mode 100644 index 0000000000..00fc9ffe33 --- /dev/null +++ b/rust/ruby-prism/src/parse_result/diagnostics.rs @@ -0,0 +1,71 @@ +//! Diagnostic handling for parse errors and warnings. + +use std::ffi::{c_char, CStr}; +use std::marker::PhantomData; +use std::ptr::NonNull; + +use ruby_prism_sys::{pm_diagnostic_t, pm_parser_t}; + +use super::Location; + +/// A diagnostic message that came back from the parser. +#[derive(Debug)] +pub struct Diagnostic<'pr> { + diag: NonNull, + parser: NonNull, + marker: PhantomData<&'pr pm_diagnostic_t>, +} + +impl<'pr> Diagnostic<'pr> { + /// Returns the message associated with the diagnostic. + /// + /// # Panics + /// + /// Panics if the message is not able to be converted into a `CStr`. + /// + #[must_use] + pub fn message(&self) -> &str { + unsafe { + let message: *mut c_char = self.diag.as_ref().message.cast_mut(); + CStr::from_ptr(message).to_str().expect("prism allows only UTF-8 for diagnostics.") + } + } + + /// The location of the diagnostic in the source. + #[must_use] + pub const fn location(&self) -> Location<'pr> { + Location::new(self.parser, unsafe { &self.diag.as_ref().location }) + } +} + +/// A struct created by the `errors` or `warnings` methods on `ParseResult`. It +/// can be used to iterate over the diagnostics in the parse result. +pub struct Diagnostics<'pr> { + diagnostic: *mut pm_diagnostic_t, + parser: NonNull, + marker: PhantomData<&'pr pm_diagnostic_t>, +} + +impl Diagnostics<'_> { + pub(crate) const fn new(diagnostic: *mut pm_diagnostic_t, parser: NonNull) -> Self { + Diagnostics { diagnostic, parser, marker: PhantomData } + } +} + +impl<'pr> Iterator for Diagnostics<'pr> { + type Item = Diagnostic<'pr>; + + fn next(&mut self) -> Option { + if let Some(diagnostic) = NonNull::new(self.diagnostic) { + let current = Diagnostic { + diag: diagnostic, + parser: self.parser, + marker: PhantomData, + }; + self.diagnostic = unsafe { diagnostic.as_ref().node.next.cast::() }; + Some(current) + } else { + None + } + } +} diff --git a/rust/ruby-prism/src/parse_result/mod.rs b/rust/ruby-prism/src/parse_result/mod.rs new file mode 100644 index 0000000000..33eb1ac9a0 --- /dev/null +++ b/rust/ruby-prism/src/parse_result/mod.rs @@ -0,0 +1,188 @@ +//! Parse result types for the prism parser. +//! +//! This module contains types related to the result of parsing, including +//! the main `ParseResult` struct, location tracking, comments, and diagnostics. + +mod comments; +mod diagnostics; + +use std::ptr::NonNull; + +use ruby_prism_sys::{pm_comment_t, pm_diagnostic_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_t, pm_parser_free, pm_parser_t}; + +pub use self::comments::{Comment, CommentType, Comments, MagicComment, MagicComments}; +pub use self::diagnostics::{Diagnostic, Diagnostics}; + +use crate::Node; + +/// A range in the source file, represented as a start offset and length. +pub struct Location<'pr> { + pub(crate) parser: NonNull, + pub(crate) start: u32, + pub(crate) length: u32, + marker: std::marker::PhantomData<&'pr [u8]>, +} + +impl<'pr> Location<'pr> { + /// Returns a byte slice for the range. + #[must_use] + pub fn as_slice(&self) -> &'pr [u8] { + unsafe { + let parser_start = (*self.parser.as_ptr()).start; + std::slice::from_raw_parts(parser_start.add(self.start as usize), self.length as usize) + } + } + + /// Return a Location from the given `pm_location_t`. + #[must_use] + pub(crate) const fn new(parser: NonNull, location: &'pr pm_location_t) -> Self { + Location { + parser, + start: location.start, + length: location.length, + marker: std::marker::PhantomData, + } + } + + /// Returns the end offset from the beginning of the parsed source. + #[must_use] + pub const fn end(&self) -> u32 { + self.start + self.length + } + + /// Return a Location starting at self and ending at the end of other. + /// Returns None if both locations did not originate from the same parser, + /// or if self starts after other. + #[must_use] + pub fn join(&self, other: &Self) -> Option { + if self.parser != other.parser || self.start > other.start { + None + } else { + Some(Location { + parser: self.parser, + start: self.start, + length: other.end() - self.start, + marker: std::marker::PhantomData, + }) + } + } +} + +impl std::fmt::Debug for Location<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let slice: &[u8] = self.as_slice(); + + let mut visible = String::new(); + visible.push('"'); + + for &byte in slice { + let part: Vec = std::ascii::escape_default(byte).collect(); + visible.push_str(std::str::from_utf8(&part).unwrap()); + } + + visible.push('"'); + write!(f, "{visible}") + } +} + +/// The result of parsing a source string. +#[derive(Debug)] +pub struct ParseResult<'pr> { + source: &'pr [u8], + parser: NonNull, + node: NonNull, +} + +impl<'pr> ParseResult<'pr> { + pub(crate) const unsafe fn new(source: &'pr [u8], parser: NonNull, node: NonNull) -> Self { + ParseResult { source, parser, node } + } + + /// Returns the source string that was parsed. + #[must_use] + pub const fn source(&self) -> &'pr [u8] { + self.source + } + + /// Returns whether we found a `frozen_string_literal` magic comment with a true value. + #[must_use] + pub fn frozen_string_literals(&self) -> bool { + unsafe { (*self.parser.as_ptr()).frozen_string_literal == 1 } + } + + /// Returns a slice of the source string that was parsed using the given + /// slice range. + #[must_use] + pub fn as_slice(&self, location: &Location<'pr>) -> &'pr [u8] { + let start = location.start as usize; + let end = start + location.length as usize; + &self.source[start..end] + } + + /// Returns an iterator that can be used to iterate over the errors in the + /// parse result. + #[must_use] + pub fn errors(&self) -> Diagnostics<'_> { + unsafe { + let list = &mut (*self.parser.as_ptr()).error_list; + Diagnostics::new(list.head.cast::(), self.parser) + } + } + + /// Returns an iterator that can be used to iterate over the warnings in the + /// parse result. + #[must_use] + pub fn warnings(&self) -> Diagnostics<'_> { + unsafe { + let list = &mut (*self.parser.as_ptr()).warning_list; + Diagnostics::new(list.head.cast::(), self.parser) + } + } + + /// Returns an iterator that can be used to iterate over the comments in the + /// parse result. + #[must_use] + pub fn comments(&self) -> Comments<'_> { + unsafe { + let list = &mut (*self.parser.as_ptr()).comment_list; + Comments::new(list.head.cast::(), self.parser) + } + } + + /// Returns an iterator that can be used to iterate over the magic comments in the + /// parse result. + #[must_use] + pub fn magic_comments(&self) -> MagicComments<'_> { + unsafe { + let list = &mut (*self.parser.as_ptr()).magic_comment_list; + MagicComments::new(self.parser, list.head.cast::()) + } + } + + /// Returns an optional location of the __END__ marker and the rest of the content of the file. + #[must_use] + pub fn data_loc(&self) -> Option> { + let location = unsafe { &(*self.parser.as_ptr()).data_loc }; + if location.length == 0 { + None + } else { + Some(Location::new(self.parser, location)) + } + } + + /// Returns the root node of the parse result. + #[must_use] + pub fn node(&self) -> Node<'_> { + Node::new(self.parser, self.node.as_ptr()) + } +} + +impl Drop for ParseResult<'_> { + fn drop(&mut self) { + unsafe { + pm_node_destroy(self.parser.as_ptr(), self.node.as_ptr()); + pm_parser_free(self.parser.as_ptr()); + drop(Box::from_raw(self.parser.as_ptr())); + } + } +}