diff --git a/rust/ruby-prism-sys/build/main.rs b/rust/ruby-prism-sys/build/main.rs index 798d06d8ff..971b5e951e 100644 --- a/rust/ruby-prism-sys/build/main.rs +++ b/rust/ruby-prism-sys/build/main.rs @@ -116,6 +116,7 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { // Structs .allowlist_type("pm_comment_t") .allowlist_type("pm_diagnostic_t") + .allowlist_type("pm_line_column_t") .allowlist_type("pm_list_t") .allowlist_type("pm_magic_comment_t") .allowlist_type("pm_node_t") @@ -140,6 +141,8 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings { // Functions .allowlist_function("pm_list_empty_p") .allowlist_function("pm_list_free") + .allowlist_function("pm_newline_list_line") + .allowlist_function("pm_newline_list_line_column") .allowlist_function("pm_node_destroy") .allowlist_function("pm_pack_parse") .allowlist_function("pm_parse") diff --git a/rust/ruby-prism/src/lib.rs b/rust/ruby-prism/src/lib.rs index 6824768193..55f00d0b20 100644 --- a/rust/ruby-prism/src/lib.rs +++ b/rust/ruby-prism/src/lib.rs @@ -14,6 +14,7 @@ mod bindings { } mod node; +mod node_ext; mod parse_result; use std::mem::MaybeUninit; @@ -21,6 +22,7 @@ use std::ptr::NonNull; pub use self::bindings::*; pub use self::node::{ConstantId, ConstantList, ConstantListIter, Integer, NodeList, NodeListIter}; +pub use self::node_ext::ConstantPathError; pub use self::parse_result::{Comment, CommentType, Comments, Diagnostic, Diagnostics, Location, MagicComment, MagicComments, ParseResult}; use ruby_prism_sys::{pm_parse, pm_parser_init, pm_parser_t}; @@ -160,6 +162,48 @@ mod tests { assert_eq!(slice, "222"); } + #[test] + #[allow(clippy::similar_names)] + fn location_line_column_test() { + let source = "foo\nbar\nbaz"; + let result = parse(source.as_ref()); + + let node = result.node(); + let program = node.as_program_node().unwrap(); + let statements = program.statements().body(); + let mut iter = statements.iter(); + let _foo = iter.next().unwrap(); + let bar = iter.next().unwrap(); + let baz = iter.next().unwrap(); + + let bar_loc = bar.location(); + assert_eq!(bar_loc.start_line(), 2); + assert_eq!(bar_loc.end_line(), 2); + assert_eq!(bar_loc.start_column(), 0); + assert_eq!(bar_loc.end_column(), 3); + + let baz_loc = baz.location(); + assert_eq!(baz_loc.start_line(), 3); + assert_eq!(baz_loc.end_line(), 3); + assert_eq!(baz_loc.start_column(), 0); + assert_eq!(baz_loc.end_column(), 3); + } + + #[test] + fn test_chop() { + let result = parse(b"foo"); + let mut location = result.node().as_program_node().unwrap().location(); + + assert_eq!(location.chop().as_slice(), b"fo"); + assert_eq!(location.chop().chop().chop().as_slice(), b""); + + // Check that we don't go negative. + for _ in 0..10 { + location = location.chop(); + } + assert_eq!(location.as_slice(), b""); + } + #[test] fn visitor_test() { use super::{visit_interpolated_regular_expression_node, visit_regular_expression_node, InterpolatedRegularExpressionNode, RegularExpressionNode, Visit}; diff --git a/rust/ruby-prism/src/node_ext.rs b/rust/ruby-prism/src/node_ext.rs new file mode 100644 index 0000000000..66f3074043 --- /dev/null +++ b/rust/ruby-prism/src/node_ext.rs @@ -0,0 +1,429 @@ +//! Node extension methods for the prism parser. +//! +//! This module provides convenience methods on AST nodes that aren't generated +//! from the config, mirroring Ruby's `node_ext.rb`. + +use std::borrow::Cow; +use std::fmt; + +use crate::{ConstantPathNode, ConstantPathTargetNode, ConstantReadNode, ConstantTargetNode, ConstantWriteNode, Node}; + +/// Errors for constant path name computation. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ConstantPathError { + /// An error returned when dynamic parts are found while computing a + /// constant path's full name. For example: + /// `Foo::Bar::Baz` -> succeeds because all parts of the constant + /// path are simple constants. + /// `var::Bar::Baz` -> fails because the first part of the constant path + /// is a local variable. + DynamicParts, + /// An error returned when missing nodes are found while computing a + /// constant path's full name. For example: + /// `Foo::` -> fails because the constant path is missing the last part. + MissingNodes, +} + +impl fmt::Display for ConstantPathError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::DynamicParts => { + write!(f, "Constant path contains dynamic parts. Cannot compute full name") + }, + Self::MissingNodes => { + write!(f, "Constant path contains missing nodes. Cannot compute full name") + }, + } + } +} + +impl std::error::Error for ConstantPathError {} + +impl<'pr> ConstantReadNode<'pr> { + /// Returns the list of parts for the full name of this constant. + /// + /// # Examples + /// + /// ``` + /// # use ruby_prism::parse; + /// let result = parse(b"Foo"); + /// let stmt = result.node().as_program_node().unwrap() + /// .statements().body().iter().next().unwrap(); + /// let constant = stmt.as_constant_read_node().unwrap(); + /// assert_eq!(constant.full_name_parts(), vec!["Foo"]); + /// ``` + #[must_use] + pub fn full_name_parts(&self) -> Vec> { + vec![String::from_utf8_lossy(self.name().as_slice())] + } + + /// Returns the full name of this constant. + /// + /// # Examples + /// + /// ``` + /// # use ruby_prism::parse; + /// let result = parse(b"Foo"); + /// let stmt = result.node().as_program_node().unwrap() + /// .statements().body().iter().next().unwrap(); + /// let constant = stmt.as_constant_read_node().unwrap(); + /// assert_eq!(constant.full_name(), "Foo"); + /// ``` + #[must_use] + pub fn full_name(&self) -> Cow<'pr, str> { + String::from_utf8_lossy(self.name().as_slice()) + } +} + +impl<'pr> ConstantWriteNode<'pr> { + /// Returns the list of parts for the full name of this constant. + /// + /// # Examples + /// + /// ``` + /// # use ruby_prism::parse; + /// let result = parse(b"Foo = 1"); + /// let stmt = result.node().as_program_node().unwrap() + /// .statements().body().iter().next().unwrap(); + /// let constant = stmt.as_constant_write_node().unwrap(); + /// assert_eq!(constant.full_name_parts(), vec!["Foo"]); + /// ``` + #[must_use] + pub fn full_name_parts(&self) -> Vec> { + vec![String::from_utf8_lossy(self.name().as_slice())] + } + + /// Returns the full name of this constant. + /// + /// # Examples + /// + /// ``` + /// # use ruby_prism::parse; + /// let result = parse(b"Foo = 1"); + /// let stmt = result.node().as_program_node().unwrap() + /// .statements().body().iter().next().unwrap(); + /// let constant = stmt.as_constant_write_node().unwrap(); + /// assert_eq!(constant.full_name(), "Foo"); + /// ``` + #[must_use] + pub fn full_name(&self) -> Cow<'pr, str> { + String::from_utf8_lossy(self.name().as_slice()) + } +} + +impl<'pr> ConstantTargetNode<'pr> { + /// Returns the list of parts for the full name of this constant. + /// + /// # Examples + /// + /// ``` + /// # use ruby_prism::parse; + /// let result = parse(b"Foo, Bar = [1, 2]"); + /// let stmt = result.node().as_program_node().unwrap() + /// .statements().body().iter().next().unwrap(); + /// let target = stmt.as_multi_write_node().unwrap() + /// .lefts().iter().next().unwrap(); + /// let constant = target.as_constant_target_node().unwrap(); + /// assert_eq!(constant.full_name_parts(), vec!["Foo"]); + /// ``` + #[must_use] + pub fn full_name_parts(&self) -> Vec> { + vec![String::from_utf8_lossy(self.name().as_slice())] + } + + /// Returns the full name of this constant. + /// + /// # Examples + /// + /// ``` + /// # use ruby_prism::parse; + /// let result = parse(b"Foo, Bar = [1, 2]"); + /// let stmt = result.node().as_program_node().unwrap() + /// .statements().body().iter().next().unwrap(); + /// let target = stmt.as_multi_write_node().unwrap() + /// .lefts().iter().next().unwrap(); + /// let constant = target.as_constant_target_node().unwrap(); + /// assert_eq!(constant.full_name(), "Foo"); + /// ``` + #[must_use] + pub fn full_name(&self) -> Cow<'pr, str> { + String::from_utf8_lossy(self.name().as_slice()) + } +} + +impl<'pr> ConstantPathNode<'pr> { + /// Returns the list of parts for the full name of this constant path. + /// + /// # Examples + /// + /// ``` + /// # use ruby_prism::parse; + /// let result = parse(b"Foo::Bar"); + /// let stmt = result.node().as_program_node().unwrap() + /// .statements().body().iter().next().unwrap(); + /// let constant_path = stmt.as_constant_path_node().unwrap(); + /// assert_eq!(constant_path.full_name_parts().unwrap(), vec!["Foo", "Bar"]); + /// ``` + /// + /// # Errors + /// + /// Returns [`ConstantPathError::DynamicParts`] if the path contains + /// dynamic parts, or [`ConstantPathError::MissingNodes`] if the path + /// contains missing nodes. + pub fn full_name_parts(&self) -> Result>, ConstantPathError> { + let mut parts = Vec::new(); + let mut current: Option> = Some(self.as_node()); + + while let Some(ref node) = current { + if let Some(path_node) = node.as_constant_path_node() { + let name = path_node.name().ok_or(ConstantPathError::MissingNodes)?; + parts.push(String::from_utf8_lossy(name.as_slice())); + current = path_node.parent(); + } else if let Some(read_node) = node.as_constant_read_node() { + parts.push(String::from_utf8_lossy(read_node.name().as_slice())); + current = None; + } else { + return Err(ConstantPathError::DynamicParts); + } + } + + parts.reverse(); + + if self.is_stovetop() { + parts.insert(0, Cow::Borrowed("")); + } + + Ok(parts) + } + + /// Returns the full name of this constant path. + /// + /// # Examples + /// + /// ``` + /// # use ruby_prism::parse; + /// let result = parse(b"Foo::Bar"); + /// let stmt = result.node().as_program_node().unwrap() + /// .statements().body().iter().next().unwrap(); + /// let constant_path = stmt.as_constant_path_node().unwrap(); + /// assert_eq!(constant_path.full_name().unwrap(), "Foo::Bar"); + /// ``` + /// + /// # Errors + /// + /// Returns [`ConstantPathError::DynamicParts`] if the path contains + /// dynamic parts, or [`ConstantPathError::MissingNodes`] if the path + /// contains missing nodes. + pub fn full_name(&self) -> Result { + Ok(self.full_name_parts()?.join("::")) + } + + fn is_stovetop(&self) -> bool { + let mut current: Option> = Some(self.as_node()); + + while let Some(ref node) = current { + if let Some(path_node) = node.as_constant_path_node() { + current = path_node.parent(); + } else { + return false; + } + } + + true + } +} + +impl<'pr> ConstantPathTargetNode<'pr> { + /// Returns the list of parts for the full name of this constant path. + /// + /// # Examples + /// + /// ``` + /// # use ruby_prism::parse; + /// let result = parse(b"Foo::Bar, Baz = [1, 2]"); + /// let stmt = result.node().as_program_node().unwrap() + /// .statements().body().iter().next().unwrap(); + /// let target = stmt.as_multi_write_node().unwrap() + /// .lefts().iter().next().unwrap(); + /// let constant_path = target.as_constant_path_target_node().unwrap(); + /// assert_eq!(constant_path.full_name_parts().unwrap(), vec!["Foo", "Bar"]); + /// ``` + /// + /// # Errors + /// + /// Returns [`ConstantPathError::DynamicParts`] if the path contains + /// dynamic parts, or [`ConstantPathError::MissingNodes`] if the path + /// contains missing nodes. + pub fn full_name_parts(&self) -> Result>, ConstantPathError> { + let name = self.name().ok_or(ConstantPathError::MissingNodes)?; + + let mut parts = if let Some(parent) = self.parent() { + if let Some(path_node) = parent.as_constant_path_node() { + path_node.full_name_parts()? + } else if let Some(read_node) = parent.as_constant_read_node() { + read_node.full_name_parts() + } else { + return Err(ConstantPathError::DynamicParts); + } + } else { + vec![Cow::Borrowed("")] + }; + + parts.push(String::from_utf8_lossy(name.as_slice())); + Ok(parts) + } + + /// Returns the full name of this constant path. + /// + /// # Examples + /// + /// ``` + /// # use ruby_prism::parse; + /// let result = parse(b"Foo::Bar, Baz = [1, 2]"); + /// let stmt = result.node().as_program_node().unwrap() + /// .statements().body().iter().next().unwrap(); + /// let target = stmt.as_multi_write_node().unwrap() + /// .lefts().iter().next().unwrap(); + /// let constant_path = target.as_constant_path_target_node().unwrap(); + /// assert_eq!(constant_path.full_name().unwrap(), "Foo::Bar"); + /// ``` + /// + /// # Errors + /// + /// Returns [`ConstantPathError::DynamicParts`] if the path contains + /// dynamic parts, or [`ConstantPathError::MissingNodes`] if the path + /// contains missing nodes. + pub fn full_name(&self) -> Result { + Ok(self.full_name_parts()?.join("::")) + } +} + +#[cfg(test)] +mod tests { + use super::ConstantPathError; + use crate::parse; + + #[test] + fn test_full_name_for_constant_read_node() { + let result = parse(b"Foo"); + let node = result.node().as_program_node().unwrap().statements().body().iter().next().unwrap(); + let constant = node.as_constant_read_node().unwrap(); + + assert_eq!(constant.full_name_parts(), vec!["Foo"]); + assert_eq!(constant.full_name(), "Foo"); + } + + #[test] + fn test_full_name_for_constant_write_node() { + let result = parse(b"Foo = 1"); + let node = result.node().as_program_node().unwrap().statements().body().iter().next().unwrap(); + let constant = node.as_constant_write_node().unwrap(); + + assert_eq!(constant.full_name_parts(), vec!["Foo"]); + assert_eq!(constant.full_name(), "Foo"); + } + + #[test] + fn test_full_name_for_constant_target_node() { + let result = parse(b"Foo, Bar = [1, 2]"); + let node = result.node().as_program_node().unwrap().statements().body().iter().next().unwrap(); + let multi_write = node.as_multi_write_node().unwrap(); + let target = multi_write.lefts().iter().next().unwrap(); + let constant = target.as_constant_target_node().unwrap(); + + assert_eq!(constant.full_name_parts(), vec!["Foo"]); + assert_eq!(constant.full_name(), "Foo"); + } + + #[test] + fn test_full_name_for_constant_path() { + let result = parse(b"Foo::Bar"); + let node = result.node().as_program_node().unwrap().statements().body().iter().next().unwrap(); + let constant_path = node.as_constant_path_node().unwrap(); + + assert_eq!(constant_path.full_name_parts().unwrap(), vec!["Foo", "Bar"]); + assert_eq!(constant_path.full_name().unwrap(), "Foo::Bar"); + } + + #[test] + fn test_full_name_for_constant_path_with_stovetop() { + let result = parse(b"::Foo::Bar"); + let node = result.node().as_program_node().unwrap().statements().body().iter().next().unwrap(); + let constant_path = node.as_constant_path_node().unwrap(); + + assert_eq!(constant_path.full_name_parts().unwrap(), vec!["", "Foo", "Bar"]); + assert_eq!(constant_path.full_name().unwrap(), "::Foo::Bar"); + } + + #[test] + fn test_full_name_for_constant_path_with_self() { + let source = r" +self:: + Bar::Baz:: + Qux +"; + let result = parse(source.as_bytes()); + let node = result.node().as_program_node().unwrap().statements().body().iter().next().unwrap(); + let constant_path = node.as_constant_path_node().unwrap(); + + assert_eq!(constant_path.full_name().unwrap_err(), ConstantPathError::DynamicParts); + } + + #[test] + fn test_full_name_for_constant_path_with_variable() { + let source = r" +foo:: + Bar::Baz:: + Qux +"; + let result = parse(source.as_bytes()); + let node = result.node().as_program_node().unwrap().statements().body().iter().next().unwrap(); + let constant_path = node.as_constant_path_node().unwrap(); + + assert_eq!(constant_path.full_name().unwrap_err(), ConstantPathError::DynamicParts); + } + + #[test] + fn test_full_name_for_constant_path_with_missing_name() { + let result = parse(b"Foo::"); + let node = result.node().as_program_node().unwrap().statements().body().iter().next().unwrap(); + let constant_path = node.as_constant_path_node().unwrap(); + + assert_eq!(constant_path.full_name().unwrap_err(), ConstantPathError::MissingNodes); + } + + #[test] + fn test_full_name_for_constant_path_target() { + let result = parse(b"Foo::Bar, Baz = [1, 2]"); + let node = result.node().as_program_node().unwrap().statements().body().iter().next().unwrap(); + let multi_write = node.as_multi_write_node().unwrap(); + let target = multi_write.lefts().iter().next().unwrap(); + let constant_path = target.as_constant_path_target_node().unwrap(); + + assert_eq!(constant_path.full_name_parts().unwrap(), vec!["Foo", "Bar"]); + assert_eq!(constant_path.full_name().unwrap(), "Foo::Bar"); + } + + #[test] + fn test_full_name_for_constant_path_target_with_stovetop() { + let result = parse(b"::Foo, Bar = [1, 2]"); + let node = result.node().as_program_node().unwrap().statements().body().iter().next().unwrap(); + let multi_write = node.as_multi_write_node().unwrap(); + let target = multi_write.lefts().iter().next().unwrap(); + let constant_path = target.as_constant_path_target_node().unwrap(); + + assert_eq!(constant_path.full_name_parts().unwrap(), vec!["", "Foo"]); + assert_eq!(constant_path.full_name().unwrap(), "::Foo"); + } + + #[test] + fn test_full_name_for_constant_path_target_with_self() { + let result = parse(b"self::Foo, Bar = [1, 2]"); + let node = result.node().as_program_node().unwrap().statements().body().iter().next().unwrap(); + let multi_write = node.as_multi_write_node().unwrap(); + let target = multi_write.lefts().iter().next().unwrap(); + let constant_path = target.as_constant_path_target_node().unwrap(); + + assert_eq!(constant_path.full_name().unwrap_err(), ConstantPathError::DynamicParts); + } +} diff --git a/rust/ruby-prism/src/parse_result/mod.rs b/rust/ruby-prism/src/parse_result/mod.rs index 33eb1ac9a0..447b47e460 100644 --- a/rust/ruby-prism/src/parse_result/mod.rs +++ b/rust/ruby-prism/src/parse_result/mod.rs @@ -8,7 +8,7 @@ mod diagnostics; use std::ptr::NonNull; -use ruby_prism_sys::{pm_comment_t, pm_diagnostic_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_t, pm_parser_free, pm_parser_t}; +use ruby_prism_sys::{pm_comment_t, pm_diagnostic_t, pm_location_t, pm_magic_comment_t, pm_newline_list_line_column, pm_node_destroy, pm_node_t, pm_parser_free, pm_parser_t}; pub use self::comments::{Comment, CommentType, Comments, MagicComment, MagicComments}; pub use self::diagnostics::{Diagnostic, Diagnostics}; @@ -66,6 +66,58 @@ impl<'pr> Location<'pr> { }) } } + + /// Returns the line number where this location starts. + #[must_use] + pub fn start_line(&self) -> i32 { + self.line_column_at(self.start).line + } + + /// Returns the line number where this location ends. + #[must_use] + pub fn end_line(&self) -> i32 { + self.line_column_at(self.end()).line + } + + /// Returns the column number in bytes where this location starts from the + /// start of the line. + #[must_use] + pub fn start_column(&self) -> u32 { + self.line_column_at(self.start).column + } + + /// Returns the column number in bytes where this location ends from the + /// start of the line. + #[must_use] + pub fn end_column(&self) -> u32 { + self.line_column_at(self.end()).column + } + + /// Returns a new location that is the result of chopping off the last byte. + #[must_use] + pub const fn chop(&self) -> Self { + Location { + parser: self.parser, + start: self.start, + length: if self.length == 0 { 0 } else { self.length - 1 }, + marker: std::marker::PhantomData, + } + } + + fn line_column_at(&self, offset: u32) -> LineColumn { + unsafe { + let parser = self.parser.as_ptr(); + let newline_list = &(*parser).newline_list; + let start_line = (*parser).start_line; + let result = pm_newline_list_line_column(newline_list, offset, start_line); + LineColumn { line: result.line, column: result.column } + } + } +} + +struct LineColumn { + line: i32, + column: u32, } impl std::fmt::Debug for Location<'_> {