From 01aebe9d6304a4a6710522f31d68a811572da7e0 Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Sat, 21 Mar 2026 22:20:14 +0100 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=8E=A8=20Overhaul=20syntax=20highligh?= =?UTF-8?q?ting=20colors=20and=20add=20parser-based=20function=20detection?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- Cargo.lock | 1 + ndc_bin/Cargo.toml | 1 + ndc_bin/src/highlighter.rs | 185 +++++++++++++++++++++++++++++++++++-- ndc_bin/src/main.rs | 2 +- ndc_bin/src/repl.rs | 2 +- 5 files changed, 181 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9e46ea0d..9d53cad3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1132,6 +1132,7 @@ dependencies = [ "ndc_interpreter", "ndc_lexer", "ndc_lsp", + "ndc_parser", "ndc_stdlib", "rustyline", "strsim", diff --git a/ndc_bin/Cargo.toml b/ndc_bin/Cargo.toml index 1d589452..dfeab912 100644 --- a/ndc_bin/Cargo.toml +++ b/ndc_bin/Cargo.toml @@ -15,6 +15,7 @@ itertools.workspace = true strsim.workspace = true codespan-reporting = "0.11.1" ndc_lexer.workspace = true +ndc_parser.workspace = true ndc_interpreter.workspace = true ndc_stdlib.workspace = true ndc_core.workspace = true diff --git a/ndc_bin/src/highlighter.rs b/ndc_bin/src/highlighter.rs index b9098024..784cae12 100644 --- a/ndc_bin/src/highlighter.rs +++ b/ndc_bin/src/highlighter.rs @@ -1,11 +1,35 @@ use itertools::Itertools; -use ndc_lexer::{Lexer, Token}; +use ndc_lexer::{Lexer, Token, TokenLocation}; +use ndc_parser::{Expression, ExpressionLocation, ForBody, ForIteration}; +use std::collections::HashSet; use yansi::{Paint, Painted}; pub(crate) struct AndycppHighlighter; impl AndycppHighlighter { - pub fn highlight_line(line: &str) -> Vec> { + /// Parser-enhanced highlighting that correctly identifies function names + /// even in method-call syntax like `foo.len`. + pub fn highlight_parsed(line: &str) -> Vec> { + let mut function_spans = HashSet::new(); + + let expressions = Lexer::new(line) + .collect::, _>>() + .ok() + .and_then(|tokens| ndc_parser::Parser::from_tokens(tokens).parse().ok()); + + if let Some(expressions) = expressions { + for expr in &expressions { + collect_function_spans(expr, &mut function_spans); + } + } + + Self::highlight_tokens(line, &function_spans) + } + + fn highlight_tokens<'a>( + line: &'a str, + function_spans: &HashSet, + ) -> Vec> { let Ok(tokens) = Lexer::new(line).collect::, _>>() else { return vec![line.red()]; }; @@ -22,26 +46,67 @@ impl AndycppHighlighter { } let mut out = Vec::new(); - for (range, token) in ranges.into_iter().zip(tokens.into_iter()) { + let pairs: Vec<_> = ranges.into_iter().zip(tokens).collect(); + for (i, (range, token)) in pairs.iter().enumerate() { let substring = &line[range.start..(range.start + range.len())]; + let next_token = pairs.get(i + 1).map(|(_, t)| &t.token); let colored = match &token.token { - Token::String(_) => substring.rgb(70, 200, 128), + // Strings — green + Token::String(_) => substring.rgb(152, 195, 121), + // Numeric literals and booleans — orange Token::BigInt(_) | Token::Int64(_) | Token::Float64(_) | Token::Complex(_) + | Token::Infinity | Token::True - | Token::False => substring.rgb(253, 151, 31), + | Token::False => substring.rgb(209, 154, 102), + // Keywords — coral red + Token::Let + | Token::Fn + | Token::If + | Token::Else + | Token::Return + | Token::Break + | Token::Continue + | Token::For + | Token::In + | Token::While + | Token::Pure + | Token::LogicAnd + | Token::LogicOr + | Token::LogicNot => substring.rgb(224, 108, 117), + // Function identifiers — yellow/gold + // Detected by parser (dot-calls, etc.) or by token heuristics as fallback + Token::Identifier(_) if function_spans.contains(&token.span.offset()) => { + substring.rgb(229, 192, 123) + } + Token::Identifier(_) if matches!(next_token, Some(Token::LeftParentheses)) => { + substring.rgb(229, 192, 123) + } + Token::Identifier(_) if i > 0 && matches!(pairs[i - 1].1.token, Token::Fn) => { + substring.rgb(229, 192, 123) + } + // Variable identifiers — blue + Token::Identifier(_) => substring.rgb(97, 175, 239), + // Arrows, fat arrows, and assignment — cyan + Token::RightArrow | Token::FatArrow | Token::EqualsSign | Token::OpAssign(_) => { + substring.rgb(86, 182, 194) + } + // Brackets and delimiters — light gray (neutral) Token::LeftSquareBracket | Token::RightSquareBracket | Token::LeftCurlyBracket | Token::RightCurlyBracket | Token::LeftParentheses | Token::RightParentheses - | Token::MapOpen => substring.rgb(229, 181, 103), - Token::Identifier(_) => substring.rgb(51, 177, 255), - _ => substring.rgb(140, 182, 255).bold(), + | Token::MapOpen + | Token::Semicolon + | Token::Comma + | Token::Colon => substring.rgb(171, 178, 191), + // Operators — purple + _ => substring.rgb(198, 120, 221), }; out.push(colored); @@ -50,3 +115,107 @@ impl AndycppHighlighter { out } } + +/// Walk the parsed AST and collect the byte offsets of identifiers used as function names. +fn collect_function_spans(expr: &ExpressionLocation, spans: &mut HashSet) { + match &expr.expression { + Expression::Call { + function, + arguments, + } => { + if let Expression::Identifier { .. } = &function.expression { + spans.insert(function.span.offset()); + } + collect_function_spans(function, spans); + for arg in arguments { + collect_function_spans(arg, spans); + } + } + Expression::FunctionDeclaration { body, .. } => { + collect_function_spans(body, spans); + } + Expression::VariableDeclaration { value, .. } + | Expression::Assignment { r_value: value, .. } + | Expression::OpAssignment { r_value: value, .. } + | Expression::Return { value } => { + collect_function_spans(value, spans); + } + Expression::Statement(inner) | Expression::Grouping(inner) => { + collect_function_spans(inner, spans); + } + Expression::Block { statements } => { + for s in statements { + collect_function_spans(s, spans); + } + } + Expression::If { + condition, + on_true, + on_false, + } => { + collect_function_spans(condition, spans); + collect_function_spans(on_true, spans); + if let Some(f) = on_false { + collect_function_spans(f, spans); + } + } + Expression::While { + expression, + loop_body, + } => { + collect_function_spans(expression, spans); + collect_function_spans(loop_body, spans); + } + Expression::For { iterations, body } => { + for iteration in iterations { + match iteration { + ForIteration::Iteration { sequence, .. } => { + collect_function_spans(sequence, spans); + } + ForIteration::Guard(expr) => collect_function_spans(expr, spans), + } + } + match body.as_ref() { + ForBody::Block(e) | ForBody::List { expr: e, .. } => { + collect_function_spans(e, spans); + } + ForBody::Map { + key, + value, + default, + .. + } => { + collect_function_spans(key, spans); + if let Some(v) = value { + collect_function_spans(v, spans); + } + if let Some(d) = default { + collect_function_spans(d, spans); + } + } + } + } + Expression::Logical { left, right, .. } => { + collect_function_spans(left, spans); + collect_function_spans(right, spans); + } + Expression::Tuple { values } | Expression::List { values } => { + for v in values { + collect_function_spans(v, spans); + } + } + Expression::Map { values, default } => { + for (k, v) in values { + collect_function_spans(k, spans); + if let Some(v) = v { + collect_function_spans(v, spans); + } + } + if let Some(d) = default { + collect_function_spans(d, spans); + } + } + // Literals, identifiers (non-call), etc. — nothing to collect + _ => {} + } +} diff --git a/ndc_bin/src/main.rs b/ndc_bin/src/main.rs index d6068a76..0ecce76c 100644 --- a/ndc_bin/src/main.rs +++ b/ndc_bin/src/main.rs @@ -143,7 +143,7 @@ fn main() -> anyhow::Result<()> { Action::HighlightFile(path) => { let string = fs::read_to_string(path)?; - let out = AndycppHighlighter::highlight_line(&string); + let out = AndycppHighlighter::highlight_parsed(&string); for styled in out { print!("{}", styled); } diff --git a/ndc_bin/src/repl.rs b/ndc_bin/src/repl.rs index 817fc222..5fb83239 100644 --- a/ndc_bin/src/repl.rs +++ b/ndc_bin/src/repl.rs @@ -15,7 +15,7 @@ struct RustylineHelper {} impl rustyline::highlight::Highlighter for RustylineHelper { fn highlight<'l>(&self, line: &'l str, _pos: usize) -> Cow<'l, str> { - let out = AndycppHighlighter::highlight_line(line); + let out = AndycppHighlighter::highlight_parsed(line); Cow::Owned(out.into_iter().join("")) } From e03a1319c58e3832feb4d07faf27bf342e9b05a0 Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Sat, 21 Mar 2026 22:25:38 +0100 Subject: [PATCH 2/2] Use ahash and exhaust all Expression match arms Co-Authored-By: Claude Opus 4.6 --- Cargo.lock | 1 + ndc_bin/Cargo.toml | 1 + ndc_bin/src/highlighter.rs | 28 ++++++++++++++++++++++------ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9d53cad3..39498f30 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1124,6 +1124,7 @@ dependencies = [ name = "ndc_bin" version = "0.3.0" dependencies = [ + "ahash", "anyhow", "clap", "codespan-reporting", diff --git a/ndc_bin/Cargo.toml b/ndc_bin/Cargo.toml index dfeab912..836dac29 100644 --- a/ndc_bin/Cargo.toml +++ b/ndc_bin/Cargo.toml @@ -14,6 +14,7 @@ clap.workspace = true itertools.workspace = true strsim.workspace = true codespan-reporting = "0.11.1" +ahash.workspace = true ndc_lexer.workspace = true ndc_parser.workspace = true ndc_interpreter.workspace = true diff --git a/ndc_bin/src/highlighter.rs b/ndc_bin/src/highlighter.rs index 784cae12..efdb33dc 100644 --- a/ndc_bin/src/highlighter.rs +++ b/ndc_bin/src/highlighter.rs @@ -1,7 +1,7 @@ use itertools::Itertools; use ndc_lexer::{Lexer, Token, TokenLocation}; use ndc_parser::{Expression, ExpressionLocation, ForBody, ForIteration}; -use std::collections::HashSet; +use ahash::AHashSet; use yansi::{Paint, Painted}; pub(crate) struct AndycppHighlighter; @@ -10,7 +10,7 @@ impl AndycppHighlighter { /// Parser-enhanced highlighting that correctly identifies function names /// even in method-call syntax like `foo.len`. pub fn highlight_parsed(line: &str) -> Vec> { - let mut function_spans = HashSet::new(); + let mut function_spans = AHashSet::new(); let expressions = Lexer::new(line) .collect::, _>>() @@ -28,7 +28,7 @@ impl AndycppHighlighter { fn highlight_tokens<'a>( line: &'a str, - function_spans: &HashSet, + function_spans: &AHashSet, ) -> Vec> { let Ok(tokens) = Lexer::new(line).collect::, _>>() else { return vec![line.red()]; @@ -117,7 +117,7 @@ impl AndycppHighlighter { } /// Walk the parsed AST and collect the byte offsets of identifiers used as function names. -fn collect_function_spans(expr: &ExpressionLocation, spans: &mut HashSet) { +fn collect_function_spans(expr: &ExpressionLocation, spans: &mut AHashSet) { match &expr.expression { Expression::Call { function, @@ -215,7 +215,23 @@ fn collect_function_spans(expr: &ExpressionLocation, spans: &mut HashSet) collect_function_spans(d, spans); } } - // Literals, identifiers (non-call), etc. — nothing to collect - _ => {} + Expression::RangeInclusive { start, end } | Expression::RangeExclusive { start, end } => { + if let Some(s) = start { + collect_function_spans(s, spans); + } + if let Some(e) = end { + collect_function_spans(e, spans); + } + } + // Leaves — no sub-expressions to recurse into + Expression::BoolLiteral(_) + | Expression::StringLiteral(_) + | Expression::Int64Literal(_) + | Expression::Float64Literal(_) + | Expression::BigIntLiteral(_) + | Expression::ComplexLiteral(_) + | Expression::Identifier { .. } + | Expression::Break + | Expression::Continue => {} } }