diff --git a/CHANGELOG.md b/CHANGELOG.md index 763cdf5..87cfef3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.1.8] - 2026-03-31 + +### Fixed + +- Google parser: a non-indented line following a blank line inside a section + was incorrectly absorbed into that section as a bogus entry (e.g. `stray + line 1` became an `Args` entry) or appended to the preceding + `Returns` description. The parser now flushes the current section when a + blank line is followed by a line whose indentation is at or below the + section header's indentation level. +- NumPy parser: same fix applied. `FreeText` sections (Notes, Examples, etc.) + are exempt because their body lines legitimately share the same indentation + level as the section header. + ## [0.1.7] - 2026-03-30 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index cb6c147..319280e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,4 +4,4 @@ version = 4 [[package]] name = "pydocstring" -version = "0.1.7" +version = "0.1.8" diff --git a/Cargo.toml b/Cargo.toml index 4812858..44e0519 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pydocstring" -version = "0.1.7" +version = "0.1.8" edition = "2024" authors = ["Ryuma Asai"] description = "A zero-dependency Rust parser for Python docstrings (Google and NumPy styles) with a unified syntax tree and byte-precise source locations" diff --git a/README.md b/README.md index 2c9b691..fbf920b 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ Python bindings are also available as [`pydocstring-rs`](https://pypi.org/projec ```toml [dependencies] -pydocstring = "0.1.6" +pydocstring = "0.1.8" ``` ## Usage diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index e2c3556..d99352f 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -67,11 +67,11 @@ dependencies = [ [[package]] name = "pydocstring" -version = "0.1.7" +version = "0.1.8" [[package]] name = "pydocstring-python" -version = "0.1.7" +version = "0.1.8" dependencies = [ "pydocstring", "pyo3", diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 871f2b6..57dc157 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pydocstring-python" -version = "0.1.7" +version = "0.1.8" edition = "2024" authors = ["Ryuma Asai"] description = "Python bindings for pydocstring — a fast docstring parser for Google and NumPy styles" @@ -12,5 +12,5 @@ name = "pydocstring" crate-type = ["cdylib"] [dependencies] -pydocstring_core = { package = "pydocstring", version = "0.1.7", path = "../.." } +pydocstring_core = { package = "pydocstring", version = "0.1.8", path = "../.." } pyo3 = { version = "0.24", features = ["extension-module"] } diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index 99ebdbc..e691437 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "pydocstring-rs" -version = "0.1.7" +version = "0.1.8" description = "Python bindings for pydocstring — a zero-dependency Rust parser for Python docstrings (Google and NumPy styles) with a unified syntax tree and byte-precise source locations" license = {text = "MIT"} authors = [{name = "Ryuma Asai"}] diff --git a/src/parse/google/parser.rs b/src/parse/google/parser.rs index 7f4b388..82200dd 100644 --- a/src/parse/google/parser.rs +++ b/src/parse/google/parser.rs @@ -724,6 +724,10 @@ pub fn parse_google(input: &str) -> Parsed { let mut current_header: Option = None; let mut current_body: Option = None; let mut entry_indent: Option = None; + // Set when a blank line is encountered while inside a section. + // Used to terminate the section when the next non-blank line is at or + // below the section header's indentation level. + let mut had_blank_in_section: bool = false; while !line_cursor.is_eof() { // --- Blank lines --- @@ -735,6 +739,9 @@ pub fn parse_google(input: &str) -> Parsed { ))); summary_done = true; } + if current_body.is_some() { + had_blank_in_section = true; + } line_cursor.advance(); continue; } @@ -781,10 +788,31 @@ pub fn parse_google(input: &str) -> Parsed { current_body = Some(SectionBody::new(header_info.kind)); current_header = Some(header_info); entry_indent = None; + had_blank_in_section = false; line_cursor.advance(); continue; } + // --- Flush section if a blank line preceded a non-indented line --- + // A blank line followed by a line at or below the section header's + // indentation level ends the current section. Lines that are more + // indented than the header (e.g. a second entry inside an Args block + // separated from the first by a blank line) continue the section. + if had_blank_in_section { + if let Some(ref h) = current_header { + if line_cursor.current_indent_columns() <= h.indent_columns { + let prev_header = current_header.take().unwrap(); + flush_section( + &line_cursor, + &mut root_children, + prev_header, + current_body.take().unwrap(), + ); + } + } + had_blank_in_section = false; + } + // --- Process line based on current state --- if let Some(ref mut body) = current_body { body.process_line(&line_cursor, &mut entry_indent); diff --git a/src/parse/numpy/parser.rs b/src/parse/numpy/parser.rs index c15f04d..ec645ce 100644 --- a/src/parse/numpy/parser.rs +++ b/src/parse/numpy/parser.rs @@ -51,6 +51,7 @@ fn try_detect_header(cursor: &LineCursor) -> Option { kind, name: cursor.make_line_range(cursor.line, header_col, header_trimmed.len()), underline: cursor.make_line_range(cursor.line + 1, underline_col, underline_trimmed.len()), + indent_columns: cursor.current_indent_columns(), }) } @@ -59,6 +60,7 @@ struct SectionHeaderInfo { kind: NumPySectionKind, name: TextRange, underline: TextRange, + indent_columns: usize, } // ============================================================================= @@ -1172,9 +1174,13 @@ pub fn parse_numpy(input: &str) -> Parsed { let mut current_header: Option = None; let mut current_body: Option = None; let mut entry_indent: Option = None; + let mut had_blank_in_section: bool = false; while !cursor.is_eof() { if cursor.current_trimmed().is_empty() { + if current_body.is_some() { + had_blank_in_section = true; + } cursor.advance(); continue; } @@ -1189,10 +1195,28 @@ pub fn parse_numpy(input: &str) -> Parsed { current_body = Some(SectionBody::new(header_info.kind)); current_header = Some(header_info); entry_indent = None; + had_blank_in_section = false; cursor.line += 2; // skip header + underline continue; } + // Flush section if a blank line preceded a non-indented line. + // FreeText sections (Notes, Examples, etc.) may have same-indent paragraphs + // separated by blank lines — do not flush those. + if had_blank_in_section { + let is_freetext = matches!(current_body, Some(SectionBody::FreeText(_))); + if !is_freetext { + if let Some(ref h) = current_header { + if cursor.current_indent_columns() <= h.indent_columns { + let prev_header = current_header.take().unwrap(); + let section_node = flush_section(&cursor, prev_header, current_body.take().unwrap()); + root_children.push(SyntaxElement::Node(section_node)); + } + } + } + had_blank_in_section = false; + } + if let Some(ref mut body) = current_body { body.process_line(&cursor, &mut entry_indent); } else { diff --git a/tests/google/edge_cases.rs b/tests/google/edge_cases.rs index a73556f..fad4659 100644 --- a/tests/google/edge_cases.rs +++ b/tests/google/edge_cases.rs @@ -316,6 +316,91 @@ fn test_arg_no_description_space_before_colon_not_header() { // RST-style :param lines inside Args section // ============================================================================= +// ============================================================================= +// Stray lines between sections +// ============================================================================= + +/// A non-section, non-indented line that appears after a blank line following +/// a section's entries must NOT be absorbed into the previous section. +/// It should become a STRAY_LINE, and the next real section must be parsed +/// correctly. +#[test] +fn test_stray_line_between_args_and_returns() { + let input = "Summary.\n\nArgs:\n a: desc.\n\nstray line 1\n\nReturns:\n desc\n\nstray line 2"; + let result = parse_google(input); + + // Args section should contain exactly one entry. + let a = args(&result); + assert_eq!(a.len(), 1, "stray line must not become an arg entry"); + assert_eq!(a[0].name().text(result.source()), "a"); + + // Returns section should be present and its description should not include + // the stray line. + let r = returns(&result).unwrap(); + let desc = r.description().unwrap().text(result.source()); + assert!( + !desc.contains("stray"), + "stray line must not be part of Returns description" + ); +} + +/// A blank-line-separated entry at greater indent than the header must still +/// be absorbed into the same section (existing behaviour). +#[test] +fn test_blank_between_entries_within_section() { + let input = "Summary.\n\nArgs:\n x (int): Value.\n\n y (str): Name.\n\nReturns:\n bool: Success."; + let result = parse_google(input); + assert_eq!(args(&result).len(), 2, "both entries should belong to Args"); + assert!(returns(&result).is_some()); +} + +/// An arg description that has a blank line followed by a more-deeply-indented +/// continuation must keep both parts in the description. +#[test] +fn test_arg_description_blank_line_with_continuation() { + // " Second paragraph." is at 8 spaces — deeper than the entry (4). + let input = "Summary.\n\nArgs:\n a: First paragraph.\n\n Second paragraph.\n\nReturns:\n bool: ok.\n"; + let result = parse_google(input); + let a = args(&result); + assert_eq!(a.len(), 1, "should be exactly one arg"); + let desc = a[0].description().unwrap().text(result.source()); + assert!(desc.contains("First paragraph."), "desc = {:?}", desc); + assert!(desc.contains("Second paragraph."), "desc = {:?}", desc); + // Returns must still be parsed correctly. + assert!(returns(&result).is_some()); +} + +/// A FreeText section (Notes) with a blank line between two paragraphs at the +/// same depth must keep both paragraphs in its body. +#[test] +fn test_freetext_description_blank_line_continuation() { + let input = "Summary.\n\nNotes:\n Paragraph one.\n\n Paragraph two.\n\nArgs:\n x: val.\n"; + let result = parse_google(input); + let sections = all_sections(&result); + // Notes section present + let notes_sec = sections + .iter() + .find(|s| s.header().name().text(result.source()) == "Notes"); + assert!(notes_sec.is_some(), "Notes section should be present"); + let body = notes_sec.unwrap().syntax().find_token(SyntaxKind::BODY_TEXT).unwrap(); + let body_text = body.text(result.source()); + assert!(body_text.contains("Paragraph one."), "body = {:?}", body_text); + assert!(body_text.contains("Paragraph two."), "body = {:?}", body_text); + // Args must still be parsed + assert_eq!(args(&result).len(), 1); +} + +/// Returns description with blank line + continuation at deeper indent. +#[test] +fn test_returns_description_blank_line_continuation() { + let input = "Summary.\n\nReturns:\n bool: Short desc.\n\n Longer explanation.\n"; + let result = parse_google(input); + let r = returns(&result).unwrap(); + let desc = r.description().unwrap().text(result.source()); + assert!(desc.contains("Short desc."), "desc = {:?}", desc); + assert!(desc.contains("Longer explanation."), "desc = {:?}", desc); +} + /// RST-style `:param foo:` lines inside a Google `Args:` section must not /// produce a GOOGLE_ARG with an empty NAME, which would panic when /// `required_token(NAME)` is called. They should be treated as bare-name diff --git a/tests/numpy/sections.rs b/tests/numpy/sections.rs index db4a55f..8026a02 100644 --- a/tests/numpy/sections.rs +++ b/tests/numpy/sections.rs @@ -169,6 +169,25 @@ fn test_stray_lines() { assert_eq!(parameters(&result).len(), 1); } +#[test] +fn test_stray_line_between_sections() { + // stray line 1 is at indent 0 inside Parameters section (after a blank line). + // stray line 2 is at indent 0 inside Returns section (after a blank line). + let input = "Summary.\n\nParameters\n----------\na : int\n desc.\n\nstray line 1\n\nReturns\n-------\nbool\n desc\n\nstray line 2\n"; + let result = parse_numpy(input); + let p = parameters(&result); + assert_eq!(p.len(), 1, "stray line must not become a parameter"); + assert_eq!(p[0].names().next().unwrap().text(result.source()), "a"); + let r = returns(&result); + assert!(!r.is_empty(), "Returns section must be parsed"); + let desc = r[0].description().unwrap().text(result.source()); + assert!( + !desc.contains("stray"), + "stray line must not be in Returns desc, got {:?}", + desc + ); +} + // ============================================================================= // Display impl // =============================================================================