From f7e2b38301917dc56e49a90f271b47b908aebe7f Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 4 Mar 2026 03:54:55 +0000 Subject: [PATCH 01/12] Resolve grammar rules in link reference definitions Currently, in our Markdown, we support `[text][RULE_NAME]` and `[text][grammar-RULE_NAME]` for linking to grammar rules, but we don't support this syntax within link reference definitions, i.e., `[text]: grammar-RULE_NAME`, even though we do support linking to (non-grammar) rule identifiers within link reference definitions. That's an inconsistency that continually surprises us. Let's fix that. In this commit, we add `grammar_link_references`, which scans link reference definitions for destinations that match a grammar rule name -- either with a `grammar-` prefix or not. When a match is found, the destination is replaced with the resolved path and anchor, just as `rule_link_references` does for rules. Unrecognized destinations pass through unchanged, falling through to `std_links` for rustdoc resolution -- the same behavior as unresolved `[text][NAME]` reference links. We also update the dev-guide to document the new feature in both `links.md` and `grammar.md`. --- dev-guide/src/grammar.md | 14 +++++++++++ dev-guide/src/links.md | 4 ++++ tools/mdbook-spec/src/grammar.rs | 41 ++++++++++++++++++++++++++++++++ tools/mdbook-spec/src/lib.rs | 1 + 4 files changed, 60 insertions(+) diff --git a/dev-guide/src/grammar.md b/dev-guide/src/grammar.md index 2d9b22756d..af1c23ba6e 100644 --- a/dev-guide/src/grammar.md +++ b/dev-guide/src/grammar.md @@ -154,5 +154,19 @@ The [`mdbook-spec`] plugin automatically adds Markdown link definitions for all In some cases, there might be name collisions with the automatic linking of rule names. In that case, disambiguate with the `grammar-` prefix, such as `[Type][grammar-Type]`. The prefix can also be used when explicitness would aid clarity. +Production names can also be used in link reference definitions to provide custom link text, both with and without the `grammar-` prefix. + +```markdown +We accept any [type]. + +[type]: grammar-Type +``` + +```markdown +We accept any [type]. + +[type]: Type +``` + [`mdbook-spec`]: tooling/mdbook-spec.md [Notation]: https://doc.rust-lang.org/nightly/reference/notation.html diff --git a/dev-guide/src/links.md b/dev-guide/src/links.md index fb2199c0dc..20458758a6 100644 --- a/dev-guide/src/links.md +++ b/dev-guide/src/links.md @@ -74,6 +74,10 @@ Link definitions are automatically generated for all grammar production names. S This attribute uses the [MetaWord] syntax. Explicit grammar links can have the `grammar-` prefix like [Type][grammar-Type]. + +Grammar links can also appear in link reference definitions, e.g. [type]. + +[type]: grammar-Type ``` ## Outside book links diff --git a/tools/mdbook-spec/src/grammar.rs b/tools/mdbook-spec/src/grammar.rs index 12ece5df7a..8aa98d3608 100644 --- a/tools/mdbook-spec/src/grammar.rs +++ b/tools/mdbook-spec/src/grammar.rs @@ -75,6 +75,47 @@ pub fn insert_grammar(grammar: &Grammar, chapter: &Chapter, diag: &mut Diagnosti content } +/// Converts link reference definitions that point to a grammar rule +/// to the correct link. +/// +/// For example: +/// +/// ```markdown +/// We accept any [token]. +/// +/// [token]: grammar-Token +/// ``` +/// +/// This will convert the `[token]` definition to point +/// to the actual link. +/// +/// This supports both a `grammar-` prefixed form (e.g. +/// `grammar-Token`) and a bare rule name (e.g. `Token`). +pub fn grammar_link_references(chapter: &Chapter, grammar: &Grammar) -> String { + let current_path = chapter.path.as_ref().unwrap().parent().unwrap(); + let for_summary = is_summary(chapter); + crate::MD_LINK_REFERENCE_DEFINITION + .replace_all(&chapter.content, |caps: &Captures<'_>| { + let dest = &caps["dest"]; + let name = dest.strip_prefix("grammar-").unwrap_or(dest); + if let Some(production) = grammar.productions.get(name) { + let label = &caps["label"]; + let relative = pathdiff::diff_paths(&production.path, current_path).unwrap(); + // Adjust paths for Windows. + let relative = relative.display().to_string().replace('\\', "/"); + let id = render_markdown::markdown_id(name, for_summary); + if for_summary { + format!("[{label}]: #{id}") + } else { + format!("[{label}]: {relative}#{id}") + } + } else { + caps.get(0).unwrap().as_str().to_string() + } + }) + .to_string() +} + /// Creates a map of production name -> relative link path. fn make_relative_link_map(grammar: &Grammar, chapter: &Chapter) -> HashMap { let current_path = chapter.path.as_ref().unwrap().parent().unwrap(); diff --git a/tools/mdbook-spec/src/lib.rs b/tools/mdbook-spec/src/lib.rs index 918508a6df..b94d296940 100644 --- a/tools/mdbook-spec/src/lib.rs +++ b/tools/mdbook-spec/src/lib.rs @@ -168,6 +168,7 @@ impl Preprocessor for Spec { } ch.content = admonitions::admonitions(&ch, &mut diag); ch.content = self.rule_link_references(&ch, &rules); + ch.content = grammar::grammar_link_references(&ch, &grammar); ch.content = self.auto_link_references(&ch, &rules); ch.content = self.render_rule_definitions(&ch.content, &tests, &git_ref); if ch.name == "Test summary" { From 9ad9c23c6061705b4c0842e8e34616f35ea5b2f2 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 9 Oct 2025 14:01:04 -0500 Subject: [PATCH 02/12] docs(ref): Specify frontmatter --- src/SUMMARY.md | 1 + src/frontmatter.md | 58 ++++++++++++++++++++++++++++++++++++++++++++ src/input-format.md | 8 +++++- src/items/modules.md | 2 +- 4 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 src/frontmatter.md diff --git a/src/SUMMARY.md b/src/SUMMARY.md index 49caabfdd8..b0c390ae50 100644 --- a/src/SUMMARY.md +++ b/src/SUMMARY.md @@ -6,6 +6,7 @@ - [Lexical structure](lexical-structure.md) - [Input format](input-format.md) + - [Frontmatter](frontmatter.md) - [Keywords](keywords.md) - [Identifiers](identifiers.md) - [Comments](comments.md) diff --git a/src/frontmatter.md b/src/frontmatter.md new file mode 100644 index 0000000000..388f9232f5 --- /dev/null +++ b/src/frontmatter.md @@ -0,0 +1,58 @@ +r[frontmatter] +# Frontmatter + +r[frontmatter.syntax] +```grammar,lexer +@root FRONTMATTER -> + FRONTMATTER_FENCE HORIZONTAL_WHITESPACE* INFOSTRING? HORIZONTAL_WHITESPACE* LF + (FRONTMATTER_LINE LF )* + FRONTMATTER_FENCE[^matched-fence] HORIZONTAL_WHITESPACE* LF + +FRONTMATTER_FENCE -> `-`{3..255} + +INFOSTRING -> (XID_Start | `_`) ( XID_Continue | `-` | `.` )* + +FRONTMATTER_LINE -> (~INVALID_FRONTMATTER_LINE_START (~INVALID_FRONTMATTER_LINE_CONTINUE)*)? + +INVALID_FRONTMATTER_LINE_START -> (FRONTMATTER_FENCE[^escaped-fence] | CR | LF) + +INVALID_FRONTMATTER_LINE_CONTINUE -> CR | LF + +HORIZONTAL_WHITESPACE -> + U+0009 // horizontal tab, `'\t'` + | U+0020 // space, `' '` +``` + +[^matched-fence]: The closing fence must have the same number of `-` as the opening fence +[^escaped-fence]: A `FRONTMATTER_FENCE` at the beginning of a `FRONTMATTER_LINE` is only invalid if it has the same or more `-` as the `FRONTMATTER_FENCE` + +r[frontmatter.intro] +Frontmatter is an optional section for content intended for external tools without requiring these tools to have full knowledge of the Rust grammar. + +```rust +#!/usr/bin/env cargo +--- +[dependencies] +fastrand = "2" +--- + +fn main() { + let num = fastrand::i32(..); + println!("{num}"); +} +``` + +r[frontmatter.document] +Frontmatter may only be preceded by a [shebang] and [whitespace]. + +r[frontmatter.fence] +The delimiters are referred to as a *fence*. The opening and closing fences must be at the start of a line. They must be a matching pair of hyphens (`-`), from 3 to 255. A fence may be followed by horizontal whitespace. + +r[frontmatter.infostring] +Following the opening fence may be an infostring for identifying the intention of the contained content. An infostring may be followed by horizontal whitespace. + +r[frontmatter.body] +The body of the frontmatter may contain any content except for a line starting with as many or more hyphens (`-`) than in the fences or carriage returns. + +[shebang]: input-format.md#shebang-removal +[whitespace]: whitespace.md diff --git a/src/input-format.md b/src/input-format.md index 2d7a2124c1..3e43ff0ae6 100644 --- a/src/input-format.md +++ b/src/input-format.md @@ -69,6 +69,11 @@ The shebang may appear immediately at the start of the file or after the optiona r[input.shebang.removal] The shebang is removed from the input sequence (and is therefore ignored). +r[input.frontmatter] +## Frontmatter removal + +After some [whitespace], [frontmatter] may next appear in the input. + r[input.tokenization] ## Tokenization @@ -79,7 +84,7 @@ The resulting sequence of characters is then converted into tokens as described > > - Byte order mark removal. > - CRLF normalization. -> - Shebang removal when invoked in an item context (as opposed to expression or statement contexts). +> - Shebang and frontmatter removal when invoked in an item context (as opposed to expression or statement contexts). > > The [`include_str!`] and [`include_bytes!`] macros do not apply these transformations. @@ -88,4 +93,5 @@ The resulting sequence of characters is then converted into tokens as described [comments]: comments.md [Crates and source files]: crates-and-source-files.md [shebang]: https://en.wikipedia.org/wiki/Shebang_(Unix) +[frontmatter]: frontmatter.md [whitespace]: whitespace.md diff --git a/src/items/modules.md b/src/items/modules.md index 3cc015025b..2164051f84 100644 --- a/src/items/modules.md +++ b/src/items/modules.md @@ -123,7 +123,7 @@ r[items.mod.attributes] ## Attributes on modules r[items.mod.attributes.intro] -Modules, like all items, accept outer attributes. They also accept inner attributes: either after `{` for a module with a body, or at the beginning of the source file, after the optional BOM and shebang. +Modules, like all items, accept outer attributes. They also accept inner attributes: either after `{` for a module with a body, or at the beginning of the source file, after the optional BOM, shebang, and frontmatter. r[items.mod.attributes.supported] The built-in attributes that have meaning on a module are [`cfg`], [`deprecated`], [`doc`], [the lint check attributes], [`path`], and [`no_implicit_prelude`]. Modules also accept macro attributes. From 0eeb9d20c8ac70005ffe26aba4eb3f6661dba328 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 4 Feb 2026 02:25:32 +0000 Subject: [PATCH 03/12] Revise frontmatter grammar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prior commit added a grammar for frontmatter, but the grammar notation available at the time that commit was prepared couldn't express all of the invariants the language requires. Opening and closing fences must have the same dash count. Indented fences must be rejected as an error. And once an opening fence is recognized, the parser must commit -- it can't backtrack and reinterpret the dashes as tokens. Since then, we've added named range repeats, hard cut, and negative lookahead to the grammar notation. With these, we can express the invariants directly. In this commit, we rewrite the frontmatter grammar. Named range repeats let the closing fence reference the opening fence's dash count. Hard cut commits the parse after the opening dashes. And `FRONTMATTER_INVALID` uses hard cut followed by the bottom rule (`^ ⊥`) to express that indented fences are a recognized-and-rejected syntactic form. We also add `⊥` as a primitive production in the Notation chapter, move `HORIZONTAL_WHITESPACE` to Whitespace, and fix some minor editorial matters such as indentation and comment style. --- src/frontmatter.md | 36 ++++++++++++++++++++++-------------- src/notation.md | 14 ++++++++++++++ src/whitespace.md | 4 ++++ 3 files changed, 40 insertions(+), 14 deletions(-) diff --git a/src/frontmatter.md b/src/frontmatter.md index 388f9232f5..f1d1df16ea 100644 --- a/src/frontmatter.md +++ b/src/frontmatter.md @@ -4,27 +4,35 @@ r[frontmatter] r[frontmatter.syntax] ```grammar,lexer @root FRONTMATTER -> - FRONTMATTER_FENCE HORIZONTAL_WHITESPACE* INFOSTRING? HORIZONTAL_WHITESPACE* LF - (FRONTMATTER_LINE LF )* - FRONTMATTER_FENCE[^matched-fence] HORIZONTAL_WHITESPACE* LF + WHITESPACE_ONLY_LINE* + !FRONTMATTER_INVALID + FRONTMATTER_MAIN -FRONTMATTER_FENCE -> `-`{3..255} +WHITESPACE_ONLY_LINE -> (!LF WHITESPACE)* LF -INFOSTRING -> (XID_Start | `_`) ( XID_Continue | `-` | `.` )* +FRONTMATTER_INVALID -> (!LF WHITESPACE)+ `---` ^ ⊥ -FRONTMATTER_LINE -> (~INVALID_FRONTMATTER_LINE_START (~INVALID_FRONTMATTER_LINE_CONTINUE)*)? +FRONTMATTER_MAIN -> + `-`{n:3..=255} ^ FRONTMATTER_REST -INVALID_FRONTMATTER_LINE_START -> (FRONTMATTER_FENCE[^escaped-fence] | CR | LF) +FRONTMATTER_REST -> + FRONTMATTER_FENCE_START + FRONTMATTER_LINE* + FRONTMATTER_FENCE_END -INVALID_FRONTMATTER_LINE_CONTINUE -> CR | LF +FRONTMATTER_FENCE_START -> + MAYBE_INFOSTRING_OR_WS LF -HORIZONTAL_WHITESPACE -> - U+0009 // horizontal tab, `'\t'` - | U+0020 // space, `' '` -``` +FRONTMATTER_FENCE_END -> + `-`{n} HORIZONTAL_WHITESPACE* ( LF | EOF ) + +FRONTMATTER_LINE -> !`-`{n} ~[LF CR]* LF -[^matched-fence]: The closing fence must have the same number of `-` as the opening fence -[^escaped-fence]: A `FRONTMATTER_FENCE` at the beginning of a `FRONTMATTER_LINE` is only invalid if it has the same or more `-` as the `FRONTMATTER_FENCE` +MAYBE_INFOSTRING_OR_WS -> + HORIZONTAL_WHITESPACE* INFOSTRING? HORIZONTAL_WHITESPACE* + +INFOSTRING -> (XID_Start | `_`) ( XID_Continue | `-` | `.` )* +``` r[frontmatter.intro] Frontmatter is an optional section for content intended for external tools without requiring these tools to have full knowledge of the Rust grammar. diff --git a/src/notation.md b/src/notation.md index 7537c67ddc..fc98c36462 100644 --- a/src/notation.md +++ b/src/notation.md @@ -45,6 +45,20 @@ Mizushima et al. introduced [cut operators][cut operator paper] to parsing expre The hard cut operator is necessary because some tokens in Rust begin with a prefix that is itself a valid token. For example, `c"` begins a C string literal, but `c` alone is a valid identifier. Without the cut, if `c"\0"` failed to lex as a C string literal (because null bytes are not allowed in C strings), the parser could backtrack and lex it as two tokens: the identifier `c` and the string literal `"\0"`. The [cut after `c"`] prevents this --- once the opening delimiter is recognized, the parser cannot go back. The same reasoning applies to [byte literals], [byte string literals], [raw string literals], and other literals with prefixes that are themselves valid tokens. +r[notation.grammar.bottom] +### The bottom rule + +In logic, ⊥ (*bottom*) represents absurdity --- a proposition that is always false. In type theory, it is the *empty type*: a type with no inhabitants. The grammar borrows both senses: the rule ⊥ matches nothing --- not any character, not even the end of input. + +```grammar,notation +// The bottom rule does not match anything. +⊥ -> !(CHAR | EOF) +``` + +Placed after a [hard cut operator], `^ ⊥` makes a rule fail unconditionally once the parser has committed past the cut. This gives the grammar a way to express *recognition without acceptance*: the parser identifies the input, commits so that no other alternative can be tried, and then rejects it. In the frontmatter grammar, for example, [`FRONTMATTER_INVALID`] uses `^ ⊥` to recognize an opening fence preceded by whitespace on the same line --- input that is close enough to frontmatter to rule out other interpretations, but that is not valid. + +[`FRONTMATTER_INVALID`]: frontmatter.md#grammar-FRONTMATTER_INVALID + r[notation.grammar.string-tables] ### String table productions diff --git a/src/whitespace.md b/src/whitespace.md index 7e16c51d41..25f33ee1c6 100644 --- a/src/whitespace.md +++ b/src/whitespace.md @@ -16,6 +16,10 @@ WHITESPACE -> | U+2028 // Line separator | U+2029 // Paragraph separator +HORIZONTAL_WHITESPACE -> + U+0009 // Horizontal tab, `'\t'` + | U+0020 // Space, `' '` + TAB -> U+0009 // Horizontal tab, `'\t'` LF -> U+000A // Line feed, `'\n'` From 24cad86931069a26337844237d12ffdfa8c90aab Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 25 Feb 2026 06:54:53 +0000 Subject: [PATCH 04/12] Clarify the fence description The fence description uses the phrase "a matching pair of hyphens", which can be misread as describing exactly two individual hyphens. The constraints on fence length and matching are also compressed into a single sentence with a trailing subclause ("from 3 to 255") that reads as nonrestrictive. Let's give each constraint its own sentence: what a fence is, where it must appear, the length bounds on the opening fence, the matching requirement for the closing fence, and trailing whitespace. This makes the structure clearer. --- src/frontmatter.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontmatter.md b/src/frontmatter.md index f1d1df16ea..87eb70acf7 100644 --- a/src/frontmatter.md +++ b/src/frontmatter.md @@ -54,7 +54,7 @@ r[frontmatter.document] Frontmatter may only be preceded by a [shebang] and [whitespace]. r[frontmatter.fence] -The delimiters are referred to as a *fence*. The opening and closing fences must be at the start of a line. They must be a matching pair of hyphens (`-`), from 3 to 255. A fence may be followed by horizontal whitespace. +Frontmatter must start and end with a *fence*. Each fence must start at the beginning of a line. The opening fence must consist of at least 3 and no more than 255 hyphens (`-`). The closing fence must have exactly the same number of hyphens as the opening fence. The hyphens of either fence may be followed by horizontal whitespace. r[frontmatter.infostring] Following the opening fence may be an infostring for identifying the intention of the contained content. An infostring may be followed by horizontal whitespace. From b14a101a77361665aaa0193c13e8dafe8b4e4aab Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 25 Feb 2026 06:55:13 +0000 Subject: [PATCH 05/12] Revise the infostring description The infostring sentence uses an inverted construction ("Following the opening fence may be an infostring"); it's a bit awkward. Let's use active voice and tighten the phrasing. --- src/frontmatter.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontmatter.md b/src/frontmatter.md index 87eb70acf7..b65ae6a131 100644 --- a/src/frontmatter.md +++ b/src/frontmatter.md @@ -57,7 +57,7 @@ r[frontmatter.fence] Frontmatter must start and end with a *fence*. Each fence must start at the beginning of a line. The opening fence must consist of at least 3 and no more than 255 hyphens (`-`). The closing fence must have exactly the same number of hyphens as the opening fence. The hyphens of either fence may be followed by horizontal whitespace. r[frontmatter.infostring] -Following the opening fence may be an infostring for identifying the intention of the contained content. An infostring may be followed by horizontal whitespace. +The opening fence, after optional horizontal whitespace, may be followed by an infostring that identifies the format or purpose of the body. An infostring may be followed by horizontal whitespace. r[frontmatter.body] The body of the frontmatter may contain any content except for a line starting with as many or more hyphens (`-`) than in the fences or carriage returns. From d21b355880523473da2b3949b3b1b70bdb5687f3 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 25 Feb 2026 06:55:29 +0000 Subject: [PATCH 06/12] Clarify the body restrictions The body restriction sentence combines two unrelated constraints -- the hyphen-line restriction and the carriage-return ban -- in a single sentence joined by "or". This makes "or carriage returns" read as parallel to "hyphens", as though the line could maybe start with carriage returns. Let's split these into two separate sentences so that each constraint stands on its own. --- src/frontmatter.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontmatter.md b/src/frontmatter.md index b65ae6a131..2575ac7613 100644 --- a/src/frontmatter.md +++ b/src/frontmatter.md @@ -60,7 +60,7 @@ r[frontmatter.infostring] The opening fence, after optional horizontal whitespace, may be followed by an infostring that identifies the format or purpose of the body. An infostring may be followed by horizontal whitespace. r[frontmatter.body] -The body of the frontmatter may contain any content except for a line starting with as many or more hyphens (`-`) than in the fences or carriage returns. +No line in the body may start with a sequence of hyphens (`-`) equal to or longer than the opening fence. The body may not contain carriage returns. [shebang]: input-format.md#shebang-removal [whitespace]: whitespace.md From 8543ae813cc593fdf957e1863d01412cf25c38b7 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 25 Feb 2026 06:55:45 +0000 Subject: [PATCH 07/12] Link horizontal whitespace to the grammar rule The prose mentions "horizontal whitespace" in two places (fence trailing content and infostring trailing content) without linking to the grammar definition. Since `HORIZONTAL_WHITESPACE` is now a defined production in Whitespace, let's add a link so readers can click through to the precise definition. --- src/frontmatter.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/frontmatter.md b/src/frontmatter.md index 2575ac7613..27a1006808 100644 --- a/src/frontmatter.md +++ b/src/frontmatter.md @@ -54,13 +54,14 @@ r[frontmatter.document] Frontmatter may only be preceded by a [shebang] and [whitespace]. r[frontmatter.fence] -Frontmatter must start and end with a *fence*. Each fence must start at the beginning of a line. The opening fence must consist of at least 3 and no more than 255 hyphens (`-`). The closing fence must have exactly the same number of hyphens as the opening fence. The hyphens of either fence may be followed by horizontal whitespace. +Frontmatter must start and end with a *fence*. Each fence must start at the beginning of a line. The opening fence must consist of at least 3 and no more than 255 hyphens (`-`). The closing fence must have exactly the same number of hyphens as the opening fence. The hyphens of either fence may be followed by [horizontal whitespace]. r[frontmatter.infostring] -The opening fence, after optional horizontal whitespace, may be followed by an infostring that identifies the format or purpose of the body. An infostring may be followed by horizontal whitespace. +The opening fence, after optional [horizontal whitespace], may be followed by an infostring that identifies the format or purpose of the body. An infostring may be followed by horizontal whitespace. r[frontmatter.body] No line in the body may start with a sequence of hyphens (`-`) equal to or longer than the opening fence. The body may not contain carriage returns. +[horizontal whitespace]: grammar-HORIZONTAL_WHITESPACE [shebang]: input-format.md#shebang-removal [whitespace]: whitespace.md From 68279a33e5be7eb908fa273b34edc81b646b09b8 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 25 Feb 2026 06:56:12 +0000 Subject: [PATCH 08/12] Flesh out the frontmatter removal section The frontmatter removal section in `input-format.md` is a single sentence ("After some whitespace, frontmatter may next appear in the input") that doesn't clearly describe the removal behavior. By contrast, the shebang removal section provides a full description with an example. Let's rewrite the section with a precise description of the removal process and add an annotated example. --- src/input-format.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/input-format.md b/src/input-format.md index 3e43ff0ae6..065f827da4 100644 --- a/src/input-format.md +++ b/src/input-format.md @@ -72,7 +72,21 @@ The shebang is removed from the input sequence (and is therefore ignored). r[input.frontmatter] ## Frontmatter removal -After some [whitespace], [frontmatter] may next appear in the input. +r[input.frontmatter.removal] +If the remaining input begins with a [frontmatter] fence, optionally preceded by lines containing only [whitespace], the [frontmatter] and any preceding whitespace are removed. + +For example, given the following file: + + +```rust,ignore +--- cargo +package.edition = 2024 +--- + +fn main() {} +``` + +The first three lines (the opening fence, body, and closing fence) would be removed, leaving an empty line followed by `fn main() {}`. r[input.tokenization] ## Tokenization From c99703f569e0e5fe1b0e6be97c071b92aefbd69f Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 4 Mar 2026 01:16:21 +0000 Subject: [PATCH 09/12] Clarify frontmatter position rule The `frontmatter.document` rule said "Frontmatter may only be preceded by a shebang and whitespace", where the "and" could be misread as requiring both a shebang and whitespace rather than listing the set of things allowed to precede frontmatter. Since we merged the shebang prose revision (rust-lang/reference#2192), the shebang position rule now reads as a positive statement of where the shebang may appear. Let's follow the same pattern here: state positively where frontmatter may appear rather than leaning on "only" and a negative constraint. We'll also rename the rule identifier to `frontmatter.position` in keeping with our conventions. --- src/frontmatter.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/frontmatter.md b/src/frontmatter.md index 27a1006808..11aaea768c 100644 --- a/src/frontmatter.md +++ b/src/frontmatter.md @@ -50,8 +50,8 @@ fn main() { } ``` -r[frontmatter.document] -Frontmatter may only be preceded by a [shebang] and [whitespace]. +r[frontmatter.position] +Frontmatter may appear at the start of the file (after the optional [byte order mark]) or after a [shebang]. In either case, it may be preceded by [whitespace]. r[frontmatter.fence] Frontmatter must start and end with a *fence*. Each fence must start at the beginning of a line. The opening fence must consist of at least 3 and no more than 255 hyphens (`-`). The closing fence must have exactly the same number of hyphens as the opening fence. The hyphens of either fence may be followed by [horizontal whitespace]. @@ -62,6 +62,7 @@ The opening fence, after optional [horizontal whitespace], may be followed by an r[frontmatter.body] No line in the body may start with a sequence of hyphens (`-`) equal to or longer than the opening fence. The body may not contain carriage returns. +[byte order mark]: https://en.wikipedia.org/wiki/Byte_order_mark#UTF-8 [horizontal whitespace]: grammar-HORIZONTAL_WHITESPACE [shebang]: input-format.md#shebang-removal [whitespace]: whitespace.md From 6675ec7d96b7b513a58ab944701017d2cf328d1c Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 4 Mar 2026 01:23:03 +0000 Subject: [PATCH 10/12] Revise frontmatter intro example The example under `frontmatter.intro` used an external crate, a nontrivial script body, and a bare `rust` code block that would fail CI since the test runner doesn't support frontmatter. Let's simplify it to mirror the example in the frontmatter removal section of `input-format.md`, and let's wrap it in an `EXAMPLE` admonition consistent with our convention for examples that aren't demonstrating the behavior of a specific rule. --- src/frontmatter.md | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/frontmatter.md b/src/frontmatter.md index 11aaea768c..31e0afe973 100644 --- a/src/frontmatter.md +++ b/src/frontmatter.md @@ -37,18 +37,16 @@ INFOSTRING -> (XID_Start | `_`) ( XID_Continue | `-` | `.` )* r[frontmatter.intro] Frontmatter is an optional section for content intended for external tools without requiring these tools to have full knowledge of the Rust grammar. -```rust -#!/usr/bin/env cargo ---- -[dependencies] -fastrand = "2" ---- - -fn main() { - let num = fastrand::i32(..); - println!("{num}"); -} -``` +> [!EXAMPLE] +> +> ```rust,ignore +> #!/usr/bin/env cargo +> --- cargo +> package.edition = 2024 +> --- +> +> fn main() {} +> ``` r[frontmatter.position] Frontmatter may appear at the start of the file (after the optional [byte order mark]) or after a [shebang]. In either case, it may be preceded by [whitespace]. From 544b5c69f3467fc86d7f7f2ce0412dfa31e0a998 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 4 Mar 2026 01:28:10 +0000 Subject: [PATCH 11/12] Rewrite frontmatter intro sentence The intro under `frontmatter.intro` said "an optional section for content intended for external tools without requiring these tools to have full knowledge of the Rust grammar." This was a negative construction (what frontmatter doesn't require) rather than a positive one (what it is and what it enables). In this commit, we rewrite the intro as "an optional section of metadata whose syntax allows external tools to read it without parsing Rust." This tells the reader three things in one sentence: what frontmatter is, who it's for, and the key design property. --- src/frontmatter.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontmatter.md b/src/frontmatter.md index 31e0afe973..57aca4a351 100644 --- a/src/frontmatter.md +++ b/src/frontmatter.md @@ -35,7 +35,7 @@ INFOSTRING -> (XID_Start | `_`) ( XID_Continue | `-` | `.` )* ``` r[frontmatter.intro] -Frontmatter is an optional section for content intended for external tools without requiring these tools to have full knowledge of the Rust grammar. +Frontmatter is an optional section of metadata whose syntax allows external tools to read it without parsing Rust. > [!EXAMPLE] > From 00c5777cbed6009a7e6b8832e11e1df458d30b25 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 4 Mar 2026 05:50:21 +0000 Subject: [PATCH 12/12] Note the UAX 31 provenance of `HORIZONTAL_WHITESPACE` For the `WHITESPACE` grammar rule, we cite `Pattern_White_Space`. For `HORIZONTAL_WHITESPACE`, we hadn't cited provenance. Let's do that. Horizontal whitespace, in a Unicode context, is defined by UAX 31, Section 4.1, which categorizes `Pattern_White_Space` into line endings, ignorable format controls, and horizontal space. The horizontal space category is exactly the two characters our grammar specifies. --- src/whitespace.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/whitespace.md b/src/whitespace.md index 25f33ee1c6..da0d8502b5 100644 --- a/src/whitespace.md +++ b/src/whitespace.md @@ -30,6 +30,9 @@ CR -> U+000D // Carriage return, `'\r'` r[lex.whitespace.intro] Whitespace is any non-empty string containing only characters that have the [`Pattern_White_Space`] Unicode property. +r[lex.whitespace.horizontal] +[HORIZONTAL_WHITESPACE] is the horizontal space subset of [`Pattern_White_Space`] as categorized by [UAX #31, Section 4.1][uax31-4.1]. + r[lex.whitespace.token-sep] Rust is a "free-form" language, meaning that all forms of whitespace serve only to separate _tokens_ in the grammar, and have no semantic significance. @@ -37,3 +40,4 @@ r[lex.whitespace.replacement] A Rust program has identical meaning if each whitespace element is replaced with any other legal whitespace element, such as a single space character. [`Pattern_White_Space`]: https://www.unicode.org/reports/tr31/ +[uax31-4.1]: https://www.unicode.org/reports/tr31/#Whitespace_and_Syntax