diff --git a/CHANGELOG.md b/CHANGELOG.md index 87cfef3..21ed659 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.1.9] - 2026-04-01 + +### Fixed + +- Google parser: stray lines without a preceding blank line were incorrectly + absorbed into the current section as bogus entries. The `had_blank_in_section` + flag is removed; instead, any non-blank line at or below the section header's + indentation level unconditionally flushes the current section, regardless of + whether a blank line preceded it. +- NumPy parser: the `had_blank_in_section` flush introduced in v0.1.8 incorrectly + terminated a section when two entries were separated by a blank line (e.g. + `x : int\n\ny : float` inside a `Parameters` block). The flag is removed; + NumPy sections now end only when the next `name\n---` header is detected, + matching the NumPy docstring specification (stray lines inside NumPy sections + are a known limitation documented in the source). + ## [0.1.8] - 2026-03-31 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index 319280e..42bb9bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,4 +4,4 @@ version = 4 [[package]] name = "pydocstring" -version = "0.1.8" +version = "0.1.9" diff --git a/Cargo.toml b/Cargo.toml index 44e0519..a221c84 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pydocstring" -version = "0.1.8" +version = "0.1.9" edition = "2024" authors = ["Ryuma Asai"] description = "A zero-dependency Rust parser for Python docstrings (Google and NumPy styles) with a unified syntax tree and byte-precise source locations" diff --git a/README.md b/README.md index fbf920b..a1804c4 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ Python bindings are also available as [`pydocstring-rs`](https://pypi.org/projec ```toml [dependencies] -pydocstring = "0.1.8" +pydocstring = "0.1.9" ``` ## Usage diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 57dc157..b443a2c 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pydocstring-python" -version = "0.1.8" +version = "0.1.9" edition = "2024" authors = ["Ryuma Asai"] description = "Python bindings for pydocstring — a fast docstring parser for Google and NumPy styles" @@ -12,5 +12,5 @@ name = "pydocstring" crate-type = ["cdylib"] [dependencies] -pydocstring_core = { package = "pydocstring", version = "0.1.8", path = "../.." } +pydocstring_core = { package = "pydocstring", version = "0.1.9", path = "../.." } pyo3 = { version = "0.24", features = ["extension-module"] } diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index e691437..e14afb1 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "pydocstring-rs" -version = "0.1.8" +version = "0.1.9" description = "Python bindings for pydocstring — a zero-dependency Rust parser for Python docstrings (Google and NumPy styles) with a unified syntax tree and byte-precise source locations" license = {text = "MIT"} authors = [{name = "Ryuma Asai"}] diff --git a/src/parse/google/parser.rs b/src/parse/google/parser.rs index 82200dd..afde971 100644 --- a/src/parse/google/parser.rs +++ b/src/parse/google/parser.rs @@ -724,10 +724,6 @@ pub fn parse_google(input: &str) -> Parsed { let mut current_header: Option = None; let mut current_body: Option = None; let mut entry_indent: Option = None; - // Set when a blank line is encountered while inside a section. - // Used to terminate the section when the next non-blank line is at or - // below the section header's indentation level. - let mut had_blank_in_section: bool = false; while !line_cursor.is_eof() { // --- Blank lines --- @@ -739,9 +735,6 @@ pub fn parse_google(input: &str) -> Parsed { ))); summary_done = true; } - if current_body.is_some() { - had_blank_in_section = true; - } line_cursor.advance(); continue; } @@ -788,20 +781,20 @@ pub fn parse_google(input: &str) -> Parsed { current_body = Some(SectionBody::new(header_info.kind)); current_header = Some(header_info); entry_indent = None; - had_blank_in_section = false; line_cursor.advance(); continue; } - // --- Flush section if a blank line preceded a non-indented line --- - // A blank line followed by a line at or below the section header's - // indentation level ends the current section. Lines that are more - // indented than the header (e.g. a second entry inside an Args block - // separated from the first by a blank line) continue the section. - if had_blank_in_section { - if let Some(ref h) = current_header { - if line_cursor.current_indent_columns() <= h.indent_columns { - let prev_header = current_header.take().unwrap(); + // --- Flush section when a stray line is detected --- + // In Google style every section body line must be more indented than the + // section header. A line at or below the header's indent that is not itself + // a section header ends the current section unconditionally, regardless of + // whether a blank line preceded it. + { + let l = line_cursor.current_indent_columns(); + let below_or_at_header = current_header.as_ref().is_some_and(|h| l <= h.indent_columns); + if below_or_at_header { + if let Some(prev_header) = current_header.take() { flush_section( &line_cursor, &mut root_children, @@ -810,7 +803,6 @@ pub fn parse_google(input: &str) -> Parsed { ); } } - had_blank_in_section = false; } // --- Process line based on current state --- diff --git a/src/parse/numpy/parser.rs b/src/parse/numpy/parser.rs index ec645ce..485e6d3 100644 --- a/src/parse/numpy/parser.rs +++ b/src/parse/numpy/parser.rs @@ -51,7 +51,6 @@ fn try_detect_header(cursor: &LineCursor) -> Option { kind, name: cursor.make_line_range(cursor.line, header_col, header_trimmed.len()), underline: cursor.make_line_range(cursor.line + 1, underline_col, underline_trimmed.len()), - indent_columns: cursor.current_indent_columns(), }) } @@ -60,7 +59,6 @@ struct SectionHeaderInfo { kind: NumPySectionKind, name: TextRange, underline: TextRange, - indent_columns: usize, } // ============================================================================= @@ -1174,13 +1172,9 @@ pub fn parse_numpy(input: &str) -> Parsed { let mut current_header: Option = None; let mut current_body: Option = None; let mut entry_indent: Option = None; - let mut had_blank_in_section: bool = false; while !cursor.is_eof() { if cursor.current_trimmed().is_empty() { - if current_body.is_some() { - had_blank_in_section = true; - } cursor.advance(); continue; } @@ -1195,28 +1189,13 @@ pub fn parse_numpy(input: &str) -> Parsed { current_body = Some(SectionBody::new(header_info.kind)); current_header = Some(header_info); entry_indent = None; - had_blank_in_section = false; cursor.line += 2; // skip header + underline continue; } - // Flush section if a blank line preceded a non-indented line. - // FreeText sections (Notes, Examples, etc.) may have same-indent paragraphs - // separated by blank lines — do not flush those. - if had_blank_in_section { - let is_freetext = matches!(current_body, Some(SectionBody::FreeText(_))); - if !is_freetext { - if let Some(ref h) = current_header { - if cursor.current_indent_columns() <= h.indent_columns { - let prev_header = current_header.take().unwrap(); - let section_node = flush_section(&cursor, prev_header, current_body.take().unwrap()); - root_children.push(SyntaxElement::Node(section_node)); - } - } - } - had_blank_in_section = false; - } - + // NumPy entries sit at the same indentation level as the section header + // (L = H = 0), so stray lines cannot be detected by indent or blank-line + // heuristics alone. Sections end only when the next header is detected. if let Some(ref mut body) = current_body { body.process_line(&cursor, &mut entry_indent); } else { diff --git a/tests/google/edge_cases.rs b/tests/google/edge_cases.rs index fad4659..688e635 100644 --- a/tests/google/edge_cases.rs +++ b/tests/google/edge_cases.rs @@ -401,6 +401,19 @@ fn test_returns_description_blank_line_continuation() { assert!(desc.contains("Longer explanation."), "desc = {:?}", desc); } +/// Same as above but WITHOUT blank lines before the stray lines. +#[test] +fn test_stray_line_between_args_and_returns_no_blank() { + let input = "Summary.\n\nArgs:\n a: desc.\nstray line 1\n\nReturns:\n desc\nstray line 2\n"; + let result = parse_google(input); + let a = args(&result); + assert_eq!(a.len(), 1, "stray line must not become an arg entry (no-blank case)"); + assert_eq!(a[0].name().text(result.source()), "a"); + let r = returns(&result).unwrap(); + let desc = r.description().unwrap().text(result.source()); + assert!(!desc.contains("stray"), "stray line must not be in Returns description"); +} + /// RST-style `:param foo:` lines inside a Google `Args:` section must not /// produce a GOOGLE_ARG with an empty NAME, which would panic when /// `required_token(NAME)` is called. They should be treated as bare-name diff --git a/tests/numpy/parameters.rs b/tests/numpy/parameters.rs index 295393a..5f502c1 100644 --- a/tests/numpy/parameters.rs +++ b/tests/numpy/parameters.rs @@ -150,6 +150,17 @@ x1, x2 : array_like assert_eq!(names[1].text(result.source()), "x2"); } +#[test] +fn test_multiple_parameters_with_blank_line_between() { + // NumPy style allows a blank line between parameter entries. + let docstring = "Summary.\n\nParameters\n----------\nx : int\n First.\n\ny : str\n Second.\n"; + let result = parse_numpy(docstring); + let p = parameters(&result); + assert_eq!(p.len(), 2, "both parameters should be in the same section"); + assert_eq!(p[0].names().next().unwrap().text(result.source()), "x"); + assert_eq!(p[1].names().next().unwrap().text(result.source()), "y"); +} + #[test] fn test_description_with_colon_not_treated_as_param() { let docstring = r#"Brief summary. diff --git a/tests/numpy/sections.rs b/tests/numpy/sections.rs index 8026a02..cce80a9 100644 --- a/tests/numpy/sections.rs +++ b/tests/numpy/sections.rs @@ -171,21 +171,26 @@ fn test_stray_lines() { #[test] fn test_stray_line_between_sections() { - // stray line 1 is at indent 0 inside Parameters section (after a blank line). - // stray line 2 is at indent 0 inside Returns section (after a blank line). + // In NumPy style, entries and stray lines sit at the same indentation level + // (L = H = 0). A stray line between sections is absorbed into the preceding + // section as a spurious entry because indent alone cannot distinguish them. + // Sections end only when the next section header (name + underline) is found. let input = "Summary.\n\nParameters\n----------\na : int\n desc.\n\nstray line 1\n\nReturns\n-------\nbool\n desc\n\nstray line 2\n"; let result = parse_numpy(input); - let p = parameters(&result); - assert_eq!(p.len(), 1, "stray line must not become a parameter"); - assert_eq!(p[0].names().next().unwrap().text(result.source()), "a"); + // Returns is still parsed (it has a proper header+underline). + let r = returns(&result); + assert!(!r.is_empty(), "Returns section must be parsed"); +} + +#[test] +fn test_stray_line_between_sections_no_blank() { + // Same limitation as test_stray_line_between_sections: stray lines in NumPy + // style cannot be detected and are absorbed into the preceding section. + let input = + "Summary.\n\nParameters\n----------\na : int\n desc.\nstray line 1\n\nReturns\n-------\nbool\n desc\n"; + let result = parse_numpy(input); let r = returns(&result); assert!(!r.is_empty(), "Returns section must be parsed"); - let desc = r[0].description().unwrap().text(result.source()); - assert!( - !desc.contains("stray"), - "stray line must not be in Returns desc, got {:?}", - desc - ); } // =============================================================================