From cb3d5ef3d91ad6a3c96de5ff1ef964fac64b2e85 Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Mon, 2 Mar 2026 21:56:55 +0100 Subject: [PATCH 001/185] =?UTF-8?q?=E2=9C=A8=20Add=20ndc=5Fvm=20crate=20sk?= =?UTF-8?q?eleton=20for=20bytecode=20VM?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 7 +++++++ Cargo.toml | 3 ++- ndc_vm/Cargo.toml | 7 +++++++ ndc_vm/src/chunk.rs | 17 +++++++++++++++++ ndc_vm/src/compiler.rs | 9 +++++++++ ndc_vm/src/lib.rs | 3 +++ ndc_vm/src/vm.rs | 29 +++++++++++++++++++++++++++++ 7 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 ndc_vm/Cargo.toml create mode 100644 ndc_vm/src/chunk.rs create mode 100644 ndc_vm/src/compiler.rs create mode 100644 ndc_vm/src/lib.rs create mode 100644 ndc_vm/src/vm.rs diff --git a/Cargo.lock b/Cargo.lock index f24b433e..c65ef70c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1349,6 +1349,13 @@ dependencies = [ "tap", ] +[[package]] +name = "ndc_vm" +version = "0.2.1" +dependencies = [ + "thiserror", +] + [[package]] name = "nibble_vec" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index c0b4146c..4601d7bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace] resolver = "3" -members = ["ndc_macros", "ndc_bin", "ndc_core", "ndc_interpreter", "ndc_lsp", "ndc_lexer", "ndc_parser", "ndc_stdlib", "benches", "tests"] +members = ["ndc_macros", "ndc_bin", "ndc_core", "ndc_interpreter", "ndc_vm", "ndc_lsp", "ndc_lexer", "ndc_parser", "ndc_stdlib", "benches", "tests"] [workspace.package] edition = "2024" @@ -20,6 +20,7 @@ itertools = "0.14.0" ndc_core = { path = "ndc_core" } ndc_lexer = { path = "ndc_lexer" } ndc_interpreter = { path = "ndc_interpreter" } +ndc_vm = { path = "ndc_vm" } ndc_parser = { path = "ndc_parser" } ndc_lsp = { path = "ndc_lsp" } ndc_macros = { path = "ndc_macros" } diff --git a/ndc_vm/Cargo.toml b/ndc_vm/Cargo.toml new file mode 100644 index 00000000..726893a7 --- /dev/null +++ b/ndc_vm/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "ndc_vm" +edition.workspace = true +version.workspace = true + +[dependencies] +thiserror.workspace = true diff --git a/ndc_vm/src/chunk.rs b/ndc_vm/src/chunk.rs new file mode 100644 index 00000000..4ed201aa --- /dev/null +++ b/ndc_vm/src/chunk.rs @@ -0,0 +1,17 @@ +/// A single bytecode instruction. +#[derive(Debug, Clone, PartialEq)] +pub enum OpCode { + Return, +} + +/// A chunk of bytecode along with the constants it references. +#[derive(Debug, Default)] +pub struct Chunk { + pub code: Vec, +} + +impl Chunk { + pub fn write(&mut self, op: OpCode) { + self.code.push(op); + } +} diff --git a/ndc_vm/src/compiler.rs b/ndc_vm/src/compiler.rs new file mode 100644 index 00000000..687c7b2d --- /dev/null +++ b/ndc_vm/src/compiler.rs @@ -0,0 +1,9 @@ +use crate::chunk::Chunk; + +pub struct Compiler; + +impl Compiler { + pub fn compile(_source: &str) -> Chunk { + todo!("compiler not yet implemented") + } +} diff --git a/ndc_vm/src/lib.rs b/ndc_vm/src/lib.rs new file mode 100644 index 00000000..a8f481dc --- /dev/null +++ b/ndc_vm/src/lib.rs @@ -0,0 +1,3 @@ +pub mod chunk; +pub mod compiler; +pub mod vm; diff --git a/ndc_vm/src/vm.rs b/ndc_vm/src/vm.rs new file mode 100644 index 00000000..531ce09a --- /dev/null +++ b/ndc_vm/src/vm.rs @@ -0,0 +1,29 @@ +use crate::chunk::{Chunk, OpCode}; + +pub struct Vm { + chunk: Chunk, + ip: usize, +} + +#[derive(thiserror::Error, Debug)] +pub enum VmError { + #[error("runtime error")] + RuntimeError, +} + +impl Vm { + pub fn new(chunk: Chunk) -> Self { + Self { chunk, ip: 0 } + } + + pub fn run(&mut self) -> Result<(), VmError> { + loop { + let op = &self.chunk.code[self.ip]; + self.ip += 1; + + match op { + OpCode::Return => return Ok(()), + } + } + } +} From 0cb0105052c2aeacf7822a22559def6d217d7ae2 Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Mon, 2 Mar 2026 23:11:48 +0100 Subject: [PATCH 002/185] =?UTF-8?q?=F0=9F=93=9A=20Chapter=2014=20+=2015?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 2 ++ ndc_vm/Cargo.toml | 3 +++ ndc_vm/src/chunk.rs | 14 +++++++++++++- ndc_vm/src/vm.rs | 16 ++++++++++++++-- 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c65ef70c..008cb15c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1353,6 +1353,8 @@ dependencies = [ name = "ndc_vm" version = "0.2.1" dependencies = [ + "ndc_interpreter", + "ndc_lexer", "thiserror", ] diff --git a/ndc_vm/Cargo.toml b/ndc_vm/Cargo.toml index 726893a7..8043d466 100644 --- a/ndc_vm/Cargo.toml +++ b/ndc_vm/Cargo.toml @@ -5,3 +5,6 @@ version.workspace = true [dependencies] thiserror.workspace = true +ndc_interpreter.workspace = true +# ndc_parser.workspace = true +ndc_lexer.workspace = true diff --git a/ndc_vm/src/chunk.rs b/ndc_vm/src/chunk.rs index 4ed201aa..a70c1866 100644 --- a/ndc_vm/src/chunk.rs +++ b/ndc_vm/src/chunk.rs @@ -1,17 +1,29 @@ +use ndc_interpreter::value::Value; +use ndc_lexer::Span; + /// A single bytecode instruction. #[derive(Debug, Clone, PartialEq)] pub enum OpCode { + Constant(usize), Return, } /// A chunk of bytecode along with the constants it references. #[derive(Debug, Default)] pub struct Chunk { + pub constants: Vec, pub code: Vec, + pub spans: Vec, } impl Chunk { - pub fn write(&mut self, op: OpCode) { + pub fn add_constant(&mut self, value: Value) -> usize { + self.constants.push(value); + self.constants.len() - 1 + } + + pub fn write(&mut self, op: OpCode, span: Span) { self.code.push(op); + (self.spans).push(span); } } diff --git a/ndc_vm/src/vm.rs b/ndc_vm/src/vm.rs index 531ce09a..9410e58a 100644 --- a/ndc_vm/src/vm.rs +++ b/ndc_vm/src/vm.rs @@ -1,8 +1,10 @@ +use ndc_interpreter::value::Value; use crate::chunk::{Chunk, OpCode}; pub struct Vm { chunk: Chunk, ip: usize, + stack: Vec, } #[derive(thiserror::Error, Debug)] @@ -13,7 +15,11 @@ pub enum VmError { impl Vm { pub fn new(chunk: Chunk) -> Self { - Self { chunk, ip: 0 } + Self { + chunk, + ip: 0, + stack: Vec::default(), + } } pub fn run(&mut self) -> Result<(), VmError> { @@ -22,7 +28,13 @@ impl Vm { self.ip += 1; match op { - OpCode::Return => return Ok(()), + OpCode::Return => { + println!("{}", self.stack.pop().expect("stack underflow")); + }, + OpCode::Constant(idx) => { + // TODO: assuming constants can be referenced multiple times we'll have to clone here + self.stack.push(self.chunk.constants[*idx].clone()); + } } } } From 7e9296167fd87ff4cc0c0d308174aee5da5eda6f Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Tue, 3 Mar 2026 09:17:19 +0100 Subject: [PATCH 003/185] =?UTF-8?q?=F0=9F=9A=A7=20WIP=20bytecode=20VM=20co?= =?UTF-8?q?mpiler=20and=20chunk?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 2 ++ ndc_bin/Cargo.toml | 1 + ndc_vm/Cargo.toml | 2 +- ndc_vm/src/chunk.rs | 6 ++--- ndc_vm/src/compiler.rs | 61 +++++++++++++++++++++++++++++++++++++++--- 5 files changed, 65 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 008cb15c..ea49d91e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1252,6 +1252,7 @@ dependencies = [ "ndc_lexer", "ndc_lsp", "ndc_stdlib", + "ndc_vm", "owo-colors", "rustyline", "strsim", @@ -1355,6 +1356,7 @@ version = "0.2.1" dependencies = [ "ndc_interpreter", "ndc_lexer", + "ndc_parser", "thiserror", ] diff --git a/ndc_bin/Cargo.toml b/ndc_bin/Cargo.toml index 2cd5c4f4..d23bc1e9 100644 --- a/ndc_bin/Cargo.toml +++ b/ndc_bin/Cargo.toml @@ -18,6 +18,7 @@ ndc_lexer.workspace = true ndc_interpreter.workspace = true ndc_stdlib.workspace = true ndc_lsp.workspace = true +ndc_vm.workspace = true owo-colors.workspace = true rustyline.workspace = true tap.workspace = true diff --git a/ndc_vm/Cargo.toml b/ndc_vm/Cargo.toml index 8043d466..b947c46a 100644 --- a/ndc_vm/Cargo.toml +++ b/ndc_vm/Cargo.toml @@ -6,5 +6,5 @@ version.workspace = true [dependencies] thiserror.workspace = true ndc_interpreter.workspace = true -# ndc_parser.workspace = true +ndc_parser.workspace = true ndc_lexer.workspace = true diff --git a/ndc_vm/src/chunk.rs b/ndc_vm/src/chunk.rs index a70c1866..3d6d376c 100644 --- a/ndc_vm/src/chunk.rs +++ b/ndc_vm/src/chunk.rs @@ -11,9 +11,9 @@ pub enum OpCode { /// A chunk of bytecode along with the constants it references. #[derive(Debug, Default)] pub struct Chunk { - pub constants: Vec, - pub code: Vec, - pub spans: Vec, + constants: Vec, + code: Vec, + spans: Vec, } impl Chunk { diff --git a/ndc_vm/src/compiler.rs b/ndc_vm/src/compiler.rs index 687c7b2d..f4db784e 100644 --- a/ndc_vm/src/compiler.rs +++ b/ndc_vm/src/compiler.rs @@ -1,9 +1,64 @@ -use crate::chunk::Chunk; +use crate::chunk::{Chunk, OpCode}; +use ndc_interpreter::value::Value; +use ndc_parser::{Expression, ExpressionLocation}; pub struct Compiler; impl Compiler { - pub fn compile(_source: &str) -> Chunk { - todo!("compiler not yet implemented") + pub fn compile(expressions: impl Iterator) -> Chunk { + let mut chunk = Chunk::default(); + + for ExpressionLocation { expression, span } in expressions { + match expression { + Expression::BoolLiteral(b) => { + let idx = chunk.add_constant(Value::Bool(b)); + chunk.write(OpCode::Constant(idx), span); + } + Expression::StringLiteral(s) => { + let idx = chunk.add_constant(Value::string(s)); + chunk.write(OpCode::Constant(idx), span); + } + Expression::Int64Literal(i) => { + let idx = chunk.add_constant(Value::from(i)); + chunk.write(OpCode::Constant(idx), span); + } + Expression::Float64Literal(f) => { + let idx = chunk.add_constant(Value::from(f)); + chunk.write(OpCode::Constant(idx), span); + } + Expression::BigIntLiteral(i) => { + let idx = chunk.add_constant(Value::from(i)); + chunk.write(OpCode::Constant(idx), span); + } + Expression::ComplexLiteral(c) => { + let idx = chunk.add_constant(Value::from(c)); + chunk.write(OpCode::Constant(idx), span); + } + Expression::Identifier { .. } => {} + Expression::Statement(_) => {} + Expression::Logical { .. } => {} + Expression::Grouping(_) => {} + Expression::VariableDeclaration { .. } => {} + Expression::Assignment { .. } => {} + Expression::OpAssignment { .. } => {} + Expression::FunctionDeclaration { .. } => {} + Expression::Block { .. } => {} + Expression::If { .. } => {} + Expression::While { .. } => {} + Expression::For { .. } => {} + Expression::Call { .. } => {} + Expression::Index { .. } => {} + Expression::Tuple { .. } => {} + Expression::List { .. } => {} + Expression::Map { .. } => {} + Expression::Return { .. } => {} + Expression::Break => {} + Expression::Continue => {} + Expression::RangeInclusive { .. } => {} + Expression::RangeExclusive { .. } => {} + } + } + + chunk } } From 507306267a9b8ae5cfcfc61564f7c7a1f758450d Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Tue, 3 Mar 2026 10:54:59 +0100 Subject: [PATCH 004/185] New Value type for the VM, and constant compilation --- Cargo.lock | 4 +- ndc_interpreter/Cargo.toml | 1 + ndc_interpreter/src/lib.rs | 21 ++++++-- ndc_vm/Cargo.toml | 3 +- ndc_vm/src/chunk.rs | 14 ++++- ndc_vm/src/compiler.rs | 108 ++++++++++++++++++++----------------- ndc_vm/src/lib.rs | 3 ++ ndc_vm/src/value.rs | 28 ++++++++++ ndc_vm/src/vm.rs | 18 +++++-- 9 files changed, 138 insertions(+), 62 deletions(-) create mode 100644 ndc_vm/src/value.rs diff --git a/Cargo.lock b/Cargo.lock index ea49d91e..3d54418f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1284,6 +1284,7 @@ dependencies = [ "ndc_core", "ndc_lexer", "ndc_parser", + "ndc_vm", "num", "self_cell", "thiserror", @@ -1354,9 +1355,10 @@ dependencies = [ name = "ndc_vm" version = "0.2.1" dependencies = [ - "ndc_interpreter", + "ndc_core", "ndc_lexer", "ndc_parser", + "num", "thiserror", ] diff --git a/ndc_interpreter/Cargo.toml b/ndc_interpreter/Cargo.toml index 1420330e..26b2427b 100644 --- a/ndc_interpreter/Cargo.toml +++ b/ndc_interpreter/Cargo.toml @@ -10,6 +10,7 @@ derive_more.workspace = true derive_builder.workspace = true itertools.workspace = true ndc_core.workspace = true +ndc_vm.workspace = true ndc_lexer.workspace = true ndc_parser.workspace = true num.workspace = true diff --git a/ndc_interpreter/src/lib.rs b/ndc_interpreter/src/lib.rs index c13018ad..a3ea98e9 100644 --- a/ndc_interpreter/src/lib.rs +++ b/ndc_interpreter/src/lib.rs @@ -9,16 +9,18 @@ pub mod semantic; pub mod sequence; pub mod value; -use std::cell::RefCell; -use std::rc::Rc; - use crate::environment::{Environment, InterpreterOutput}; use crate::evaluate::{EvaluationError, evaluate_expression}; use crate::function::FunctionCarrier; use crate::semantic::analyser::{Analyser, ScopeTree}; use crate::value::Value; +use ndc_core::int::Int; use ndc_lexer::{Lexer, TokenLocation}; use ndc_parser::ExpressionLocation; +use ndc_vm::compiler::Compiler; +use ndc_vm::vm::Vm; +use std::cell::RefCell; +use std::rc::Rc; pub struct Interpreter { environment: Rc>, @@ -63,7 +65,7 @@ impl Interpreter { pub fn run_str(&mut self, input: &str) -> Result { let expressions = self.parse_and_analyse(input)?; - let final_value = self.interpret(expressions.into_iter())?; + let final_value = self.interpret_vm(expressions.into_iter())?; Ok(format!("{final_value}")) } @@ -84,6 +86,17 @@ impl Interpreter { Ok(expressions) } + fn interpret_vm( + &mut self, + expressions: impl Iterator, + ) -> Result { + let code = Compiler::compile(expressions); + dbg!(&code); + let mut vm = Vm::new(code); + vm.run().expect("VM failed"); + + Ok(Value::unit()) + } fn interpret( &mut self, diff --git a/ndc_vm/Cargo.toml b/ndc_vm/Cargo.toml index b947c46a..6e734684 100644 --- a/ndc_vm/Cargo.toml +++ b/ndc_vm/Cargo.toml @@ -5,6 +5,7 @@ version.workspace = true [dependencies] thiserror.workspace = true -ndc_interpreter.workspace = true ndc_parser.workspace = true ndc_lexer.workspace = true +ndc_core.workspace = true +num.workspace = true diff --git a/ndc_vm/src/chunk.rs b/ndc_vm/src/chunk.rs index 3d6d376c..a7a0ddb5 100644 --- a/ndc_vm/src/chunk.rs +++ b/ndc_vm/src/chunk.rs @@ -1,4 +1,4 @@ -use ndc_interpreter::value::Value; +use crate::Value; use ndc_lexer::Span; /// A single bytecode instruction. @@ -26,4 +26,16 @@ impl Chunk { self.code.push(op); (self.spans).push(span); } + + pub fn is_empty(&self) -> bool { + self.code.is_empty() + } + #[inline(always)] + pub fn opcode(&self, idx: usize) -> &OpCode { + &self.code[idx] + } + + pub fn constant(&self, idx: usize) -> &Value { + &self.constants[idx] + } } diff --git a/ndc_vm/src/compiler.rs b/ndc_vm/src/compiler.rs index f4db784e..c12764ec 100644 --- a/ndc_vm/src/compiler.rs +++ b/ndc_vm/src/compiler.rs @@ -1,5 +1,7 @@ use crate::chunk::{Chunk, OpCode}; -use ndc_interpreter::value::Value; +use crate::{Object, Value}; +use ndc_core::int::Int; +use ndc_core::num::Number; use ndc_parser::{Expression, ExpressionLocation}; pub struct Compiler; @@ -8,57 +10,63 @@ impl Compiler { pub fn compile(expressions: impl Iterator) -> Chunk { let mut chunk = Chunk::default(); - for ExpressionLocation { expression, span } in expressions { - match expression { - Expression::BoolLiteral(b) => { - let idx = chunk.add_constant(Value::Bool(b)); - chunk.write(OpCode::Constant(idx), span); - } - Expression::StringLiteral(s) => { - let idx = chunk.add_constant(Value::string(s)); - chunk.write(OpCode::Constant(idx), span); - } - Expression::Int64Literal(i) => { - let idx = chunk.add_constant(Value::from(i)); - chunk.write(OpCode::Constant(idx), span); - } - Expression::Float64Literal(f) => { - let idx = chunk.add_constant(Value::from(f)); - chunk.write(OpCode::Constant(idx), span); - } - Expression::BigIntLiteral(i) => { - let idx = chunk.add_constant(Value::from(i)); - chunk.write(OpCode::Constant(idx), span); - } - Expression::ComplexLiteral(c) => { - let idx = chunk.add_constant(Value::from(c)); - chunk.write(OpCode::Constant(idx), span); - } - Expression::Identifier { .. } => {} - Expression::Statement(_) => {} - Expression::Logical { .. } => {} - Expression::Grouping(_) => {} - Expression::VariableDeclaration { .. } => {} - Expression::Assignment { .. } => {} - Expression::OpAssignment { .. } => {} - Expression::FunctionDeclaration { .. } => {} - Expression::Block { .. } => {} - Expression::If { .. } => {} - Expression::While { .. } => {} - Expression::For { .. } => {} - Expression::Call { .. } => {} - Expression::Index { .. } => {} - Expression::Tuple { .. } => {} - Expression::List { .. } => {} - Expression::Map { .. } => {} - Expression::Return { .. } => {} - Expression::Break => {} - Expression::Continue => {} - Expression::RangeInclusive { .. } => {} - Expression::RangeExclusive { .. } => {} - } + for expr_loc in expressions { + compile_expr(expr_loc, &mut chunk); } chunk } } +fn compile_expr(ExpressionLocation { expression, span }: ExpressionLocation, chunk: &mut Chunk) { + println!("COMPILING: {expression:?}"); + match expression { + Expression::BoolLiteral(b) => { + let idx = chunk.add_constant(Value::Bool(b)); + chunk.write(OpCode::Constant(idx), span); + } + Expression::StringLiteral(s) => { + let idx = chunk.add_constant(Object::String(s).into()); + chunk.write(OpCode::Constant(idx), span); + } + Expression::Int64Literal(i) => { + let idx = chunk.add_constant(Value::Int(i)); + chunk.write(OpCode::Constant(idx), span); + } + Expression::Float64Literal(f) => { + let idx = chunk.add_constant(Value::Float(f)); + chunk.write(OpCode::Constant(idx), span); + } + Expression::BigIntLiteral(i) => { + let idx = chunk.add_constant(Object::BigInt(i).into()); + chunk.write(OpCode::Constant(idx), span); + } + Expression::ComplexLiteral(c) => { + let idx = chunk.add_constant(Object::Complex(c).into()); + chunk.write(OpCode::Constant(idx), span); + } + Expression::Identifier { .. } => {} + Expression::Statement(stm) => { + compile_expr(*stm, chunk); + } + Expression::Logical { .. } => {} + Expression::Grouping(_) => {} + Expression::VariableDeclaration { .. } => {} + Expression::Assignment { .. } => {} + Expression::OpAssignment { .. } => {} + Expression::FunctionDeclaration { .. } => {} + Expression::Block { .. } => {} + Expression::If { .. } => {} + Expression::While { .. } => {} + Expression::For { .. } => {} + Expression::Call { .. } => {} + Expression::Index { .. } => {} + Expression::Tuple { .. } => {} + Expression::List { .. } => {} + Expression::Map { .. } => {} + Expression::Return { .. } => {} + Expression::Break => {} + Expression::Continue => {} + Expression::RangeInclusive { .. } => {} + Expression::RangeExclusive { .. } => {} + } +} diff --git a/ndc_vm/src/lib.rs b/ndc_vm/src/lib.rs index a8f481dc..6948eac0 100644 --- a/ndc_vm/src/lib.rs +++ b/ndc_vm/src/lib.rs @@ -1,3 +1,6 @@ pub mod chunk; pub mod compiler; +pub mod value; pub mod vm; + +pub use value::*; diff --git a/ndc_vm/src/value.rs b/ndc_vm/src/value.rs new file mode 100644 index 00000000..1cb398b5 --- /dev/null +++ b/ndc_vm/src/value.rs @@ -0,0 +1,28 @@ +/// Enumerates all the different types of values that exist in the language +/// All values should be pretty cheap to clone because the bigger ones are wrapped using Rc's +#[derive(Clone, Debug)] +pub enum Value { + Int(i64), + Float(f64), + Bool(bool), + None, + Object(Box), +} + +#[derive(Clone, Debug)] +pub enum Object { + Some(Value), + BigInt(num::BigInt), + Complex(num::Complex), + Rational(num::BigRational), + String(String), + List(Vec), + Tuple(Vec), + // tec.... +} + +impl From for Value { + fn from(value: Object) -> Self { + Self::Object(Box::new(value)) + } +} diff --git a/ndc_vm/src/vm.rs b/ndc_vm/src/vm.rs index 9410e58a..92af6cdc 100644 --- a/ndc_vm/src/vm.rs +++ b/ndc_vm/src/vm.rs @@ -1,5 +1,6 @@ -use ndc_interpreter::value::Value; +use crate::Value; use crate::chunk::{Chunk, OpCode}; +use ndc_core::num::Number; pub struct Vm { chunk: Chunk, @@ -23,17 +24,24 @@ impl Vm { } pub fn run(&mut self) -> Result<(), VmError> { + eprintln!("[DBG] Value bytes: {}", size_of::()); + eprintln!("[DBG] Number bytes: {}", size_of::()); + + if self.chunk.is_empty() { + return Ok(()); + } + loop { - let op = &self.chunk.code[self.ip]; + let op = self.chunk.opcode(self.ip); self.ip += 1; match op { OpCode::Return => { - println!("{}", self.stack.pop().expect("stack underflow")); - }, + println!("{:?}", self.stack.pop().expect("stack underflow")); + } OpCode::Constant(idx) => { // TODO: assuming constants can be referenced multiple times we'll have to clone here - self.stack.push(self.chunk.constants[*idx].clone()); + self.stack.push(self.chunk.constant(*idx).clone()); } } } From 90b6f9d35f9336e45a4c73059db336de3e7954f8 Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Tue, 3 Mar 2026 15:18:27 +0100 Subject: [PATCH 005/185] WIP --- ndc_bin/src/diagnostic.rs | 2 +- ndc_bin/src/main.rs | 30 +- ndc_core/src/num.rs | 4 +- ndc_interpreter/src/environment.rs | 116 +++-- ndc_interpreter/src/evaluate/mod.rs | 34 +- ndc_interpreter/src/function.rs | 25 +- ndc_interpreter/src/lib.rs | 20 +- ndc_interpreter/src/semantic/analyser.rs | 295 +----------- ndc_interpreter/src/semantic/mod.rs | 6 +- ndc_interpreter/src/semantic/scope.rs | 566 +++++++++++++++++++++++ ndc_interpreter/src/sequence.rs | 2 +- ndc_interpreter/src/value.rs | 2 +- ndc_lsp/src/backend.rs | 2 +- ndc_parser/src/expression.rs | 11 +- ndc_stdlib/src/math.rs | 20 +- ndc_vm/src/chunk.rs | 7 + ndc_vm/src/compiler.rs | 50 +- ndc_vm/src/vm.rs | 26 +- 18 files changed, 825 insertions(+), 393 deletions(-) create mode 100644 ndc_interpreter/src/semantic/scope.rs diff --git a/ndc_bin/src/diagnostic.rs b/ndc_bin/src/diagnostic.rs index add518b0..1d53c595 100644 --- a/ndc_bin/src/diagnostic.rs +++ b/ndc_bin/src/diagnostic.rs @@ -1,6 +1,6 @@ use miette::{Diagnostic, LabeledSpan, SourceSpan}; -use ndc_lexer::Span; use ndc_interpreter::InterpreterError; +use ndc_lexer::Span; use std::fmt; fn span_to_source_span(span: Span) -> SourceSpan { diff --git a/ndc_bin/src/main.rs b/ndc_bin/src/main.rs index e7da5005..f1942a85 100644 --- a/ndc_bin/src/main.rs +++ b/ndc_bin/src/main.rs @@ -33,7 +33,12 @@ struct Cli { #[derive(Subcommand)] enum Command { /// Execute an .ndc file or start the repl (this default action may be omitted) - Run { file: Option }, + Run { + file: Option, + /// Run using the bytecode VM instead of the tree-walk interpreter + #[arg(long)] + vm: bool, + }, /// Output an .ndc file using the built-in syntax highlighting engine Highlight { file: PathBuf }, @@ -53,13 +58,16 @@ enum Command { impl Default for Command { fn default() -> Self { - Self::Run { file: None } + Self::Run { + file: None, + vm: false, + } } } enum Action { RunLsp, - RunFile(PathBuf), + RunFile { path: PathBuf, vm: bool }, HighlightFile(PathBuf), StartRepl, Docs(Option), @@ -70,8 +78,11 @@ impl TryFrom for Action { fn try_from(value: Command) -> Result { let action = match value { - Command::Run { file: Some(file) } => Self::RunFile(file), - Command::Run { file: None } => Self::StartRepl, + Command::Run { + file: Some(file), + vm, + } => Self::RunFile { path: file, vm }, + Command::Run { file: None, .. } => Self::StartRepl, Command::Lsp { stdio: _ } => Self::RunLsp, Command::Highlight { file } => Self::HighlightFile(file), Command::Docs { query } => Self::Docs(query), @@ -81,7 +92,10 @@ impl TryFrom for Action { // This case should have defaulted to `Command::Run { file: None }` unreachable!("fallback case reached with 0 arguments (should never happen)") } - 1 => Self::RunFile(args[0].parse::().context("invalid path")?), + 1 => Self::RunFile { + path: args[0].parse::().context("invalid path")?, + vm: false, + }, n => return Err(anyhow!("invalid number of arguments: {n}")), } } @@ -110,7 +124,7 @@ fn main() -> anyhow::Result<()> { let action: Action = cli.command.unwrap_or_default().try_into()?; match action { - Action::RunFile(path) => { + Action::RunFile { path, vm } => { let filename = path .file_name() .and_then(|name| name.to_str()) @@ -120,7 +134,7 @@ fn main() -> anyhow::Result<()> { let stdout = std::io::stdout(); let mut interpreter = Interpreter::new(stdout).with_stdlib(); - match into_miette_result(interpreter.run_str(&string)) { + match into_miette_result(interpreter.run_str_with_options(&string, vm)) { // we can just ignore successful runs because we have print statements Ok(_final_value) => {} Err(report) => { diff --git a/ndc_core/src/num.rs b/ndc_core/src/num.rs index 355881f6..6f16cc43 100644 --- a/ndc_core/src/num.rs +++ b/ndc_core/src/num.rs @@ -479,9 +479,7 @@ impl Number { if let Some(bi) = BigInt::from_f64(*f) { Self::Int(Int::BigInt(bi).simplified()) } else { - return Err(NumberConversionError(format!( - "cannot convert {f} to int" - ))); + return Err(NumberConversionError(format!("cannot convert {f} to int"))); } } Self::Rational(r) => Self::Int(Int::BigInt(r.to_integer()).simplified()), diff --git a/ndc_interpreter/src/environment.rs b/ndc_interpreter/src/environment.rs index 73f1271c..aa4d6655 100644 --- a/ndc_interpreter/src/environment.rs +++ b/ndc_interpreter/src/environment.rs @@ -18,6 +18,7 @@ pub struct Environment { root: Rc>, parent: Option>>, values: Vec, + base_offset: usize, } impl fmt::Debug for Environment { @@ -60,6 +61,7 @@ impl Environment { root: Rc::new(RefCell::new(root)), parent: None, values: Default::default(), + base_offset: 0, } } @@ -79,23 +81,36 @@ impl Environment { pub fn set(&mut self, var: ResolvedVar, value: Value) { match var { - ResolvedVar::Captured { depth: 0, slot } => { - if self.values.len() > slot { - self.values[slot] = value + ResolvedVar::Local { slot } if slot >= self.base_offset => { + let local_idx = slot - self.base_offset; + if local_idx < self.values.len() { + self.values[local_idx] = value; } else { - debug_assert!(slot == self.values.len()); + debug_assert!(local_idx == self.values.len()); self.values.push(value); } } - - // Recursively insert - ResolvedVar::Captured { depth, slot } => { + ResolvedVar::Local { .. } => { + self.parent + .clone() + .expect("Local slot below base_offset but no parent") + .borrow_mut() + .set(var, value); + } + ResolvedVar::Upvalue { depth: 1, slot } => { self.parent .clone() - .expect("tried to get parent but failed") + .expect("Upvalue but no parent environment") + .borrow_mut() + .set(ResolvedVar::Local { slot }, value); + } + ResolvedVar::Upvalue { depth, slot } => { + self.parent + .clone() + .expect("Upvalue but no parent environment") .borrow_mut() .set( - ResolvedVar::Captured { + ResolvedVar::Upvalue { depth: depth - 1, slot, }, @@ -118,26 +133,33 @@ impl Environment { root.global_functions.push(new_function.clone()); } - fn get_copy_from_slot(&self, depth: usize, slot: usize) -> Value { - if depth == 0 { - assert!( - self.values.len() > slot, - "failed to take item out of slot {slot} because it was empty" - ); - self.values[slot].clone() - } else { - self.parent - .clone() - .expect("expected parent env did not exist") - .borrow() - .get_copy_from_slot(depth - 1, slot) - } - } - #[must_use] pub fn get(&self, var: ResolvedVar) -> Value { match var { - ResolvedVar::Captured { depth, slot } => self.get_copy_from_slot(depth, slot), + ResolvedVar::Local { slot } if slot >= self.base_offset => { + self.values[slot - self.base_offset].clone() + } + ResolvedVar::Local { .. } => self + .parent + .as_ref() + .expect("Local slot below base_offset but no parent") + .borrow() + .get(var), + ResolvedVar::Upvalue { depth: 1, slot } => self + .parent + .as_ref() + .expect("Upvalue but no parent environment") + .borrow() + .get(ResolvedVar::Local { slot }), + ResolvedVar::Upvalue { depth, slot } => self + .parent + .as_ref() + .expect("Upvalue but no parent environment") + .borrow() + .get(ResolvedVar::Upvalue { + depth: depth - 1, + slot, + }), ResolvedVar::Global { slot } => { Value::function(self.root.borrow().global_functions[slot].clone()) } @@ -148,16 +170,31 @@ impl Environment { #[must_use] pub fn take(&mut self, var: ResolvedVar) -> Option { match var { - ResolvedVar::Captured { depth: 0, slot } => Some(std::mem::replace( - self.values.get_mut(slot).expect("slot can't be empty"), - Value::unit(), - )), - ResolvedVar::Captured { depth, slot } => self + ResolvedVar::Local { slot } if slot >= self.base_offset => { + let local_idx = slot - self.base_offset; + Some(std::mem::replace( + self.values.get_mut(local_idx).expect("slot can't be empty"), + Value::unit(), + )) + } + ResolvedVar::Local { .. } => self .parent .clone() - .expect("expected parent env did not exist") + .expect("Local slot below base_offset but no parent") .borrow_mut() - .take(ResolvedVar::Captured { + .take(var), + ResolvedVar::Upvalue { depth: 1, slot } => self + .parent + .clone() + .expect("Upvalue but no parent environment") + .borrow_mut() + .take(ResolvedVar::Local { slot }), + ResolvedVar::Upvalue { depth, slot } => self + .parent + .clone() + .expect("Upvalue but no parent environment") + .borrow_mut() + .take(ResolvedVar::Upvalue { depth: depth - 1, slot, }), @@ -165,12 +202,23 @@ impl Environment { } } - pub fn new_scope(parent: &Rc>) -> Self { + pub fn new_function_scope(parent: &Rc>) -> Self { + let root_ref = Rc::clone(&parent.borrow().root); + Self { + parent: Some(parent.clone()), + root: root_ref, + values: Default::default(), + base_offset: 0, + } + } + + pub fn new_iteration_scope(parent: &Rc>, base_offset: usize) -> Self { let root_ref = Rc::clone(&parent.borrow().root); Self { parent: Some(parent.clone()), root: root_ref, values: Default::default(), + base_offset, } } } diff --git a/ndc_interpreter/src/evaluate/mod.rs b/ndc_interpreter/src/evaluate/mod.rs index 60436285..cf174a8a 100644 --- a/ndc_interpreter/src/evaluate/mod.rs +++ b/ndc_interpreter/src/evaluate/mod.rs @@ -1,6 +1,6 @@ -use crate::hash_map::HashMap; use crate::environment::Environment; use crate::function::{Function, FunctionBody, FunctionCarrier, StaticType}; +use crate::hash_map::HashMap; use crate::int::Int; use crate::iterator::mut_value_to_iterator; use crate::num::Number; @@ -11,6 +11,7 @@ use itertools::Itertools; use ndc_lexer::Span; use ndc_parser::{ Binding, Expression, ExpressionLocation, ForBody, ForIteration, LogicalOperator, Lvalue, + ResolvedVar, }; use std::cell::RefCell; use std::fmt; @@ -237,14 +238,10 @@ pub(crate) fn evaluate_expression( } } Expression::Block { statements } => { - let local_scope = Rc::new(RefCell::new(Environment::new_scope(environment))); - let mut value = Value::unit(); for stm in statements { - value = evaluate_expression(stm, &local_scope)?; + value = evaluate_expression(stm, environment)?; } - - drop(local_scope); value } Expression::If { @@ -903,15 +900,13 @@ fn execute_for_iterations( let mut sequence = evaluate_expression(sequence, environment)?; let iter = mut_value_to_iterator(&mut sequence).into_evaluation_result(span)?; + let base_offset = lvalue_base_offset(l_value); + for r_value in iter { - // In a previous version this scope was lifted outside the loop and reset for every iteration inside the loop - // in the following code sample this matters (a lot): - // ```ndc - // [fn(x) { x + i } for i in 0...10] - // ``` - // With the current implementation with a new scope declared for every iteration this produces 10 functions - // each with their own scope and their own version of `i`, this might potentially be a bit slower though - let scope = Rc::new(RefCell::new(Environment::new_scope(environment))); + let scope = Rc::new(RefCell::new(Environment::new_iteration_scope( + environment, + base_offset, + ))); declare_or_assign_variable(l_value, r_value, &scope, span)?; if tail.is_empty() { @@ -1061,6 +1056,17 @@ fn vectorized_element_types(left: &StaticType, right: &StaticType) -> [StaticTyp [left_elem, right_elem] } +fn lvalue_base_offset(lvalue: &Lvalue) -> usize { + match lvalue { + Lvalue::Identifier { + resolved: Some(ResolvedVar::Local { slot }), + .. + } => *slot, + Lvalue::Sequence(seq) => seq.iter().map(lvalue_base_offset).min().unwrap_or(0), + _ => 0, + } +} + fn resolve_dynamic_binding( binding: &Binding, arg_types: &[StaticType], diff --git a/ndc_interpreter/src/function.rs b/ndc_interpreter/src/function.rs index 3aa65743..a7c4ed96 100644 --- a/ndc_interpreter/src/function.rs +++ b/ndc_interpreter/src/function.rs @@ -1,8 +1,6 @@ -use crate::hash_map::{DefaultHasher, HashMap}; use crate::environment::Environment; -use crate::evaluate::{ - ErrorConverter, EvaluationError, EvaluationResult, evaluate_expression, -}; +use crate::evaluate::{ErrorConverter, EvaluationError, EvaluationResult, evaluate_expression}; +use crate::hash_map::{DefaultHasher, HashMap}; use crate::num::{BinaryOperatorError, Number}; use crate::sequence::Sequence; use crate::value::Value; @@ -241,21 +239,10 @@ impl FunctionBody { Self::Closure { body, environment, .. } => { - let mut local_scope = Environment::new_scope(environment); - - { - for (position, value) in args.iter().enumerate() { - // NOTE: stores a copy of the value in the environment (which is fine?) - // NOTE: we just assume here that the arguments are slotted in order starting at 0 - // because why not? Is this a call convention? - local_scope.set( - ResolvedVar::Captured { - depth: 0, - slot: position, - }, - value.clone(), - ) - } + let mut local_scope = Environment::new_function_scope(environment); + + for (position, value) in args.iter().enumerate() { + local_scope.set(ResolvedVar::Local { slot: position }, value.clone()) } let local_scope = Rc::new(RefCell::new(local_scope)); diff --git a/ndc_interpreter/src/lib.rs b/ndc_interpreter/src/lib.rs index a3ea98e9..5ba0dd47 100644 --- a/ndc_interpreter/src/lib.rs +++ b/ndc_interpreter/src/lib.rs @@ -12,9 +12,8 @@ pub mod value; use crate::environment::{Environment, InterpreterOutput}; use crate::evaluate::{EvaluationError, evaluate_expression}; use crate::function::FunctionCarrier; -use crate::semantic::analyser::{Analyser, ScopeTree}; +use crate::semantic::{Analyser, ScopeTree}; use crate::value::Value; -use ndc_core::int::Int; use ndc_lexer::{Lexer, TokenLocation}; use ndc_parser::ExpressionLocation; use ndc_vm::compiler::Compiler; @@ -64,8 +63,20 @@ impl Interpreter { } pub fn run_str(&mut self, input: &str) -> Result { + self.run_str_with_options(input, false) + } + + pub fn run_str_with_options( + &mut self, + input: &str, + use_vm: bool, + ) -> Result { let expressions = self.parse_and_analyse(input)?; - let final_value = self.interpret_vm(expressions.into_iter())?; + let final_value = if use_vm { + self.interpret_vm(expressions.into_iter())? + } else { + self.interpret(expressions.into_iter())? + }; Ok(format!("{final_value}")) } @@ -91,7 +102,6 @@ impl Interpreter { expressions: impl Iterator, ) -> Result { let code = Compiler::compile(expressions); - dbg!(&code); let mut vm = Vm::new(code); vm.run().expect("VM failed"); @@ -151,7 +161,7 @@ pub enum InterpreterError { #[error("Error during static analysis")] Resolver { #[from] - cause: semantic::analyser::AnalysisError, + cause: semantic::AnalysisError, }, #[error("Error while executing code")] Evaluation(#[from] EvaluationError), diff --git a/ndc_interpreter/src/semantic/analyser.rs b/ndc_interpreter/src/semantic/analyser.rs index 57b9b9b9..cb293248 100644 --- a/ndc_interpreter/src/semantic/analyser.rs +++ b/ndc_interpreter/src/semantic/analyser.rs @@ -1,4 +1,5 @@ use crate::function::StaticType; +use crate::semantic::ScopeTree; use itertools::Itertools; use ndc_lexer::Span; use ndc_parser::{ @@ -6,6 +7,7 @@ use ndc_parser::{ }; use std::fmt::{Debug, Formatter}; +#[derive(Debug)] pub struct Analyser { scope_tree: ScopeTree, } @@ -83,8 +85,10 @@ impl Analyser { *resolved_assign_operation = self .scope_tree - .resolve_function2(&format!("{operation}="), &arg_types); - *resolved_operation = self.scope_tree.resolve_function2(operation, &arg_types); + .resolve_function_binding(&format!("{operation}="), &arg_types); + *resolved_operation = self + .scope_tree + .resolve_function_binding(operation, &arg_types); if let Binding::None = resolved_operation { return Err(AnalysisError::function_not_found( @@ -123,7 +127,7 @@ impl Analyser { None }; - self.scope_tree.new_scope(); + self.scope_tree.new_function_scope(); let param_types = self.resolve_parameters_declarative(parameters)?; let return_type = self.analyse(body)?; @@ -150,7 +154,7 @@ impl Analyser { Ok(function_type) } Expression::Block { statements } => { - self.scope_tree.new_scope(); + self.scope_tree.new_block_scope(); let mut last = None; for s in statements { last = Some(self.analyse(s)?); @@ -306,7 +310,9 @@ impl Analyser { // println!("resolve fn {name} {}", argument_types.iter().join(", ")); - let binding = self.scope_tree.resolve_function2(name, argument_types); + let binding = self + .scope_tree + .resolve_function_binding(name, argument_types); let out_type = match &binding { Binding::None => { @@ -344,7 +350,7 @@ impl Analyser { ForIteration::Iteration { l_value, sequence } => { let sequence_type = self.analyse(sequence)?; - self.scope_tree.new_scope(); + self.scope_tree.new_iteration_scope(); // TODO: when we give type parameters to all instances of sequence we can correctly infer StaticType::Any in this position self.resolve_lvalue_declarative( @@ -354,7 +360,7 @@ impl Analyser { .unwrap_or(StaticType::Any), span, )?; - do_destroy = true; // TODO: why is this correct + do_destroy = true; } ForIteration::Guard(expr) => { self.analyse(expr)?; @@ -579,270 +585,6 @@ fn extract_argument_arity(arguments: &ExpressionLocation) -> usize { values.len() } -#[derive(Debug, Clone)] -pub struct ScopeTree { - current_scope_idx: usize, - global_scope: Scope, - scopes: Vec, -} - -impl ScopeTree { - pub fn from_global_scope(global_scope_map: Vec<(String, StaticType)>) -> Self { - Self { - current_scope_idx: 0, - global_scope: Scope { - parent_idx: None, - identifiers: global_scope_map, - }, - scopes: vec![Scope::new(None)], - } - } - - fn get_type(&self, res: ResolvedVar) -> &StaticType { - match res { - ResolvedVar::Captured { slot, depth } => { - let mut scope_idx = self.current_scope_idx; - let mut depth = depth; - while depth > 0 { - depth -= 1; - scope_idx = self.scopes[scope_idx] - .parent_idx - .expect("parent_idx was None while traversing the scope tree"); - } - &self.scopes[scope_idx].identifiers[slot].1 - } - // for now all globals are functions - ResolvedVar::Global { slot } => &self.global_scope.identifiers[slot].1, - } - } - - fn new_scope(&mut self) -> &Scope { - let old_scope_idx = self.current_scope_idx; - self.current_scope_idx = self.scopes.len(); - let new_scope = Scope::new(Some(old_scope_idx)); - self.scopes.push(new_scope); - &self.scopes[self.current_scope_idx] - } - - fn destroy_scope(&mut self) { - let next = self.scopes[self.current_scope_idx] - .parent_idx - .expect("tried to destroy scope while there were none"); - self.current_scope_idx = next; - } - - fn get_binding_any(&mut self, ident: &str) -> Option { - let mut depth = 0; - let mut scope_ptr = self.current_scope_idx; - - loop { - if let Some(slot) = self.scopes[scope_ptr].find_slot_by_name(ident) { - return Some(ResolvedVar::Captured { slot, depth }); - } else if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { - depth += 1; - scope_ptr = parent_idx; - } else { - return Some(ResolvedVar::Global { - slot: self.global_scope.find_slot_by_name(ident)?, - }); - } - } - } - - fn resolve_function_dynamic(&mut self, ident: &str, sig: &[StaticType]) -> Vec { - let mut depth = 0; - let mut scope_ptr = self.current_scope_idx; - - loop { - let candidates = self.scopes[scope_ptr].find_function_candidates(ident, sig); - if !candidates.is_empty() { - return candidates - .into_iter() - .map(|slot| ResolvedVar::Captured { slot, depth }) - .collect(); - } else if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { - depth += 1; - scope_ptr = parent_idx; - } else { - return self - .global_scope - .find_function_candidates(ident, sig) - .into_iter() - .map(|slot| ResolvedVar::Global { slot }) - .collect(); - } - } - } - - fn resolve_function2(&mut self, ident: &str, sig: &[StaticType]) -> Binding { - self.resolve_function(ident, sig) - .map(Binding::Resolved) - .or_else(|| { - let loose_bindings = self.resolve_function_dynamic(ident, sig); - - if loose_bindings.is_empty() { - return None; - } - - Some(Binding::Dynamic(loose_bindings)) - }) - // If we can't find any function in scope that could match, fall back to all same-named - // bindings so runtime dynamic dispatch (including vectorization) can pick the right one. - .or_else(|| { - let all_bindings = self.get_all_bindings_by_name(ident); - if all_bindings.is_empty() { - return None; - } - Some(Binding::Dynamic(all_bindings)) - }) - .unwrap_or(Binding::None) - } - - fn get_all_bindings_by_name(&self, ident: &str) -> Vec { - let mut results = Vec::new(); - let mut depth = 0; - let mut scope_ptr = self.current_scope_idx; - - loop { - let slots = self.scopes[scope_ptr].find_all_slots_by_name(ident); - results.extend( - slots - .into_iter() - .map(|slot| ResolvedVar::Captured { slot, depth }), - ); - - if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { - depth += 1; - scope_ptr = parent_idx; - } else { - let global_slots = self.global_scope.find_all_slots_by_name(ident); - results.extend( - global_slots - .into_iter() - .map(|slot| ResolvedVar::Global { slot }), - ); - break; - } - } - - results - } - - fn resolve_function(&mut self, ident: &str, arg_types: &[StaticType]) -> Option { - let mut depth = 0; - let mut scope_ptr = self.current_scope_idx; - - loop { - if let Some(slot) = self.scopes[scope_ptr].find_function(ident, arg_types) { - return Some(ResolvedVar::Captured { slot, depth }); - } else if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { - depth += 1; - scope_ptr = parent_idx; - } else { - return Some(ResolvedVar::Global { - slot: self.global_scope.find_function(ident, arg_types)?, - }); - } - } - } - - fn create_local_binding(&mut self, ident: String, typ: StaticType) -> ResolvedVar { - ResolvedVar::Captured { - slot: self.scopes[self.current_scope_idx].allocate(ident, typ), - depth: 0, - } - } - - fn update_binding_type(&mut self, var: ResolvedVar, new_type: StaticType) { - let ResolvedVar::Captured { slot, depth } = var else { - panic!("update_binding_type called with a global binding"); - }; - let mut scope_idx = self.current_scope_idx; - let mut remaining = depth; - while remaining > 0 { - remaining -= 1; - scope_idx = self.scopes[scope_idx] - .parent_idx - .expect("parent_idx was None while traversing the scope tree"); - } - self.scopes[scope_idx].identifiers[slot].1 = new_type; - } -} - -#[derive(Debug, Clone)] -struct Scope { - parent_idx: Option, - identifiers: Vec<(String, StaticType)>, -} - -impl Scope { - fn new(parent_idx: Option) -> Self { - Self { - parent_idx, - identifiers: Default::default(), - } - } - - pub fn find_slot_by_name(&self, find_ident: &str) -> Option { - self.identifiers - .iter() - .rposition(|(ident, _)| ident == find_ident) - } - - fn find_all_slots_by_name(&self, find_ident: &str) -> Vec { - self.identifiers - .iter() - .enumerate() - .filter_map(|(slot, (ident, _))| { - if ident == find_ident { - Some(slot) - } else { - None - } - }) - .collect() - } - - fn find_function_candidates(&self, find_ident: &str, find_types: &[StaticType]) -> Vec { - self.identifiers.iter() - .enumerate() - .rev() - .filter_map(|(slot, (ident, typ))| { - if ident != find_ident { - return None; - } - - // If the thing is not a function we're not interested - let StaticType::Function { parameters, .. } = typ else { - return None; - }; - - let Some(param_types) = parameters else { - // If this branch happens then the function we're matching against is variadic meaning it's always a match - debug_assert!(false, "we should never be calling find_function_candidates if there were variadic matches"); - // TODO: Change to unreachable? - return Some(slot); - }; - - let is_good = param_types.len() == find_types.len() - && param_types.iter().zip(find_types.iter()).all(|(typ_1, typ_2)| !typ_1.is_incompatible_with(typ_2)); - - is_good.then_some(slot) - }) - .collect() - } - fn find_function(&self, find_ident: &str, find_types: &[StaticType]) -> Option { - self.identifiers - .iter() - .rposition(|(ident, typ)| ident == find_ident && typ.is_fn_and_matches(find_types)) - } - - fn allocate(&mut self, name: String, typ: StaticType) -> usize { - self.identifiers.push((name, typ)); - // Slot is just the length of the list minus one - self.identifiers.len() - 1 - } -} #[derive(thiserror::Error, Debug)] #[error("{text}")] pub struct AnalysisError { @@ -896,14 +638,3 @@ impl AnalysisError { } } } - -impl Debug for Analyser { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - writeln!(f)?; - for (id, scope) in self.scope_tree.scopes.iter().enumerate() { - writeln!(f, "{id}: {scope:?}")?; - } - - Ok(()) - } -} diff --git a/ndc_interpreter/src/semantic/mod.rs b/ndc_interpreter/src/semantic/mod.rs index 32c1c0de..849a242b 100644 --- a/ndc_interpreter/src/semantic/mod.rs +++ b/ndc_interpreter/src/semantic/mod.rs @@ -1 +1,5 @@ -pub mod analyser; +mod analyser; +mod scope; + +pub(crate) use analyser::*; +pub(crate) use scope::*; diff --git a/ndc_interpreter/src/semantic/scope.rs b/ndc_interpreter/src/semantic/scope.rs new file mode 100644 index 00000000..67a4d126 --- /dev/null +++ b/ndc_interpreter/src/semantic/scope.rs @@ -0,0 +1,566 @@ +use crate::semantic::analyser::Analyser; +use ndc_parser::{Binding, ResolvedVar, StaticType}; +use std::fmt::{Debug, Formatter}; + +#[derive(Debug, Clone)] +pub(crate) struct Scope { + parent_idx: Option, + creates_environment: bool, // Only true for function scopes and for-loop iterations + base_offset: usize, + function_scope_idx: usize, + identifiers: Vec<(String, StaticType)>, +} + +impl Scope { + pub(crate) fn offset(&self) -> usize { + self.base_offset + self.identifiers.len() + } + + pub(crate) fn new_function_scope(parent_idx: Option, function_scope_idx: usize) -> Self { + Self { + parent_idx, + creates_environment: true, + base_offset: 0, + function_scope_idx, + identifiers: Vec::default(), + } + } + + pub(crate) fn new_block_scope( + parent_idx: Option, + base_offset: usize, + function_scope_idx: usize, + ) -> Self { + Self { + parent_idx, + creates_environment: false, + base_offset, + function_scope_idx, + identifiers: Vec::default(), + } + } + + pub(crate) fn new_iteration_scope( + parent_idx: Option, + base_offset: usize, + function_scope_idx: usize, + ) -> Self { + Self { + parent_idx, + creates_environment: true, + base_offset, + function_scope_idx, + identifiers: Vec::default(), + } + } + + pub(crate) fn find_slot_by_name(&self, find_ident: &str) -> Option { + self.identifiers + .iter() + .rposition(|(ident, _)| ident == find_ident) + .map(|idx| idx + self.base_offset) + } + + fn find_all_slots_by_name(&self, find_ident: &str) -> Vec { + self.identifiers + .iter() + .enumerate() + .filter_map(|(slot, (ident, _))| { + if ident == find_ident { + Some(slot + self.base_offset) + } else { + None + } + }) + .collect() + } + + fn find_function_candidates(&self, find_ident: &str, find_types: &[StaticType]) -> Vec { + self.identifiers.iter() + .enumerate() + .rev() + .filter_map(|(slot, (ident, typ))| { + if ident != find_ident { + return None; + } + + // If the thing is not a function we're not interested + let StaticType::Function { parameters, .. } = typ else { + return None; + }; + + let Some(param_types) = parameters else { + // If this branch happens then the function we're matching against is variadic meaning it's always a match + debug_assert!(false, "we should never be calling find_function_candidates if there were variadic matches"); + // TODO: Change to unreachable? + return Some(slot); + }; + + let is_good = param_types.len() == find_types.len() + && param_types.iter().zip(find_types.iter()).all(|(typ_1, typ_2)| !typ_1.is_incompatible_with(typ_2)); + + is_good.then_some(slot) + }) + .map(|idx| idx + self.base_offset) + .collect() + } + fn find_function(&self, find_ident: &str, find_types: &[StaticType]) -> Option { + self.identifiers + .iter() + .rposition(|(ident, typ)| ident == find_ident && typ.is_fn_and_matches(find_types)) + .map(|idx| idx + self.base_offset) + } + + fn allocate(&mut self, name: String, typ: StaticType) -> usize { + self.identifiers.push((name, typ)); + // Slot is just the length of the list minus one + self.base_offset + self.identifiers.len() - 1 + } +} + +#[derive(Clone)] +pub(crate) struct ScopeTree { + current_scope_idx: usize, + global_scope: Scope, + scopes: Vec, +} + +impl ScopeTree { + pub(crate) fn from_global_scope(global_scope_map: Vec<(String, StaticType)>) -> Self { + let mut global_scope = Scope::new_function_scope(None, 0); + global_scope.identifiers = global_scope_map; + + Self { + current_scope_idx: 0, + global_scope, + scopes: vec![Scope::new_function_scope(None, 0)], + } + } + + pub(crate) fn get_type(&self, res: ResolvedVar) -> &StaticType { + match res { + ResolvedVar::Local { slot } => self.find_type_by_slot(self.current_scope_idx, slot), + ResolvedVar::Upvalue { slot, depth } => { + let mut scope_idx = self.current_scope_idx; + let mut depth = depth; + while depth > 0 { + scope_idx = self.scopes[scope_idx] + .parent_idx + .expect("parent_idx was None while traversing the scope tree"); + if self.scopes[scope_idx].creates_environment { + depth -= 1; + } + } + self.find_type_by_slot(scope_idx, slot) + } + ResolvedVar::Global { slot } => &self.global_scope.identifiers[slot].1, + } + } + + pub(crate) fn find_type_by_slot(&self, start_scope: usize, slot: usize) -> &StaticType { + let mut scope_idx = start_scope; + loop { + let scope = &self.scopes[scope_idx]; + if slot >= scope.base_offset && slot < scope.base_offset + scope.identifiers.len() { + return &scope.identifiers[slot - scope.base_offset].1; + } + scope_idx = scope + .parent_idx + .expect("slot not found in any scope within function"); + } + } + + pub(crate) fn new_block_scope(&mut self) -> &Scope { + let old_scope_idx = self.current_scope_idx; + self.current_scope_idx = self.scopes.len(); + let new_scope = Scope::new_block_scope( + Some(old_scope_idx), + self.scopes[old_scope_idx].offset(), + self.scopes[old_scope_idx].function_scope_idx, + ); + self.scopes.push(new_scope); + &self.scopes[self.current_scope_idx] + } + + pub(crate) fn new_function_scope(&mut self) -> &Scope { + let old_scope_idx = self.current_scope_idx; + self.current_scope_idx = self.scopes.len(); + let new_scope = Scope::new_function_scope(Some(old_scope_idx), self.scopes.len()); + self.scopes.push(new_scope); + &self.scopes[self.current_scope_idx] + } + + pub(crate) fn new_iteration_scope(&mut self) -> &Scope { + let old_scope_idx = self.current_scope_idx; + self.current_scope_idx = self.scopes.len(); + let new_scope = Scope::new_iteration_scope( + Some(old_scope_idx), + self.scopes[old_scope_idx].offset(), // todo: @claude is this correct + self.scopes.len(), + ); + self.scopes.push(new_scope); + &self.scopes[self.current_scope_idx] + } + + pub(crate) fn destroy_scope(&mut self) { + let next = self.scopes[self.current_scope_idx] + .parent_idx + .expect("tried to destroy scope while there were none"); + self.current_scope_idx = next; + } + + pub(crate) fn get_binding_any(&mut self, ident: &str) -> Option { + let mut depth = 0; + let mut scope_ptr = self.current_scope_idx; + + loop { + if let Some(slot) = self.scopes[scope_ptr].find_slot_by_name(ident) { + return Some(if depth == 0 { + ResolvedVar::Local { slot } + } else { + ResolvedVar::Upvalue { slot, depth } + }); + } else if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { + if self.scopes[scope_ptr].creates_environment { + depth += 1; + } + scope_ptr = parent_idx; + } else { + return Some(ResolvedVar::Global { + slot: self.global_scope.find_slot_by_name(ident)?, + }); + } + } + } + + pub(crate) fn resolve_function_dynamic( + &mut self, + ident: &str, + sig: &[StaticType], + ) -> Vec { + let mut depth = 0; + let mut scope_ptr = self.current_scope_idx; + + loop { + let candidates = self.scopes[scope_ptr].find_function_candidates(ident, sig); + if !candidates.is_empty() { + return candidates + .into_iter() + .map(|slot| { + if depth == 0 { + ResolvedVar::Local { slot } + } else { + ResolvedVar::Upvalue { slot, depth } + } + }) + .collect(); + } else if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { + if self.scopes[scope_ptr].creates_environment { + depth += 1; + } + + scope_ptr = parent_idx; + } else { + return self + .global_scope + .find_function_candidates(ident, sig) + .into_iter() + .map(|slot| ResolvedVar::Global { slot }) + .collect(); + } + } + } + + pub(crate) fn resolve_function_binding(&mut self, ident: &str, sig: &[StaticType]) -> Binding { + self.resolve_function(ident, sig) + .map(Binding::Resolved) + .or_else(|| { + let loose_bindings = self.resolve_function_dynamic(ident, sig); + + if loose_bindings.is_empty() { + return None; + } + + Some(Binding::Dynamic(loose_bindings)) + }) + // If we can't find any function in scope that could match, fall back to all same-named + // bindings so runtime dynamic dispatch (including vectorization) can pick the right one. + .or_else(|| { + let all_bindings = self.get_all_bindings_by_name(ident); + if all_bindings.is_empty() { + return None; + } + Some(Binding::Dynamic(all_bindings)) + }) + .unwrap_or(Binding::None) + } + + pub(crate) fn get_all_bindings_by_name(&self, ident: &str) -> Vec { + let mut results = Vec::new(); + let mut depth = 0; + let mut scope_ptr = self.current_scope_idx; + + loop { + let slots = self.scopes[scope_ptr].find_all_slots_by_name(ident); + results.extend(slots.into_iter().map(|slot| { + if depth == 0 { + ResolvedVar::Local { slot } + } else { + ResolvedVar::Upvalue { slot, depth } + } + })); + + if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { + if self.scopes[scope_ptr].creates_environment { + depth += 1; + } + scope_ptr = parent_idx; + } else { + let global_slots = self.global_scope.find_all_slots_by_name(ident); + results.extend( + global_slots + .into_iter() + .map(|slot| ResolvedVar::Global { slot }), + ); + break; + } + } + + results + } + + pub(crate) fn resolve_function( + &mut self, + ident: &str, + arg_types: &[StaticType], + ) -> Option { + let mut depth = 0; + let mut scope_ptr = self.current_scope_idx; + + loop { + if let Some(slot) = self.scopes[scope_ptr].find_function(ident, arg_types) { + return Some(if depth == 0 { + ResolvedVar::Local { slot } + } else { + ResolvedVar::Upvalue { slot, depth } + }); + } else if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { + if self.scopes[scope_ptr].creates_environment { + depth += 1; + } + scope_ptr = parent_idx; + } else { + return Some(ResolvedVar::Global { + slot: self.global_scope.find_function(ident, arg_types)?, + }); + } + } + } + + pub(crate) fn create_local_binding(&mut self, ident: String, typ: StaticType) -> ResolvedVar { + ResolvedVar::Local { + slot: self.scopes[self.current_scope_idx].allocate(ident, typ), + } + } + + pub(crate) fn update_binding_type(&mut self, var: ResolvedVar, new_type: StaticType) { + let scope_idx = match var { + ResolvedVar::Local { slot } => { + self.find_scope_owning_slot(self.current_scope_idx, slot) + } + ResolvedVar::Upvalue { depth, .. } => { + let mut scope_idx = self.current_scope_idx; + let mut depth = depth; + while depth > 0 { + scope_idx = self.scopes[scope_idx] + .parent_idx + .expect("parent_idx was None while traversing the scope tree"); + if self.scopes[scope_idx].creates_environment { + depth -= 1; + } + } + self.find_scope_owning_slot(scope_idx, var.slot()) + } + ResolvedVar::Global { .. } => { + panic!("update_binding_type called with a global binding") + } + }; + let slot = var.slot(); + let base = self.scopes[scope_idx].base_offset; + self.scopes[scope_idx].identifiers[slot - base].1 = new_type; + } + + pub(crate) fn find_scope_owning_slot(&self, start_scope: usize, slot: usize) -> usize { + let mut scope_idx = start_scope; + loop { + let scope = &self.scopes[scope_idx]; + if slot >= scope.base_offset && slot < scope.base_offset + scope.identifiers.len() { + return scope_idx; + } + scope_idx = scope + .parent_idx + .expect("slot not found in any scope within function"); + } + } +} + +impl Debug for ScopeTree { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + writeln!(f)?; + for (id, scope) in self.scopes.iter().enumerate() { + writeln!(f, "{id}: {scope:?}")?; + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use ndc_parser::ResolvedVar; + + fn empty_scope_tree() -> ScopeTree { + ScopeTree::from_global_scope(vec![]) + } + + #[test] + fn single_local_in_function_scope() { + let mut tree = empty_scope_tree(); + let var = tree.create_local_binding("x".into(), StaticType::Int); + assert_eq!(var, ResolvedVar::Local { slot: 0 }); + assert_eq!( + tree.get_binding_any("x"), + Some(ResolvedVar::Local { slot: 0 }) + ); + } + + #[test] + fn multiple_locals_get_ascending_slots() { + let mut tree = empty_scope_tree(); + let x = tree.create_local_binding("x".into(), StaticType::Int); + let y = tree.create_local_binding("y".into(), StaticType::Int); + let z = tree.create_local_binding("z".into(), StaticType::Int); + assert_eq!(x, ResolvedVar::Local { slot: 0 }); + assert_eq!(y, ResolvedVar::Local { slot: 1 }); + assert_eq!(z, ResolvedVar::Local { slot: 2 }); + } + + #[test] + fn block_scope_continues_flat_numbering() { + let mut tree = empty_scope_tree(); + let x = tree.create_local_binding("x".into(), StaticType::Int); + assert_eq!(x, ResolvedVar::Local { slot: 0 }); + + tree.new_block_scope(); + let y = tree.create_local_binding("y".into(), StaticType::Int); + assert_eq!(y, ResolvedVar::Local { slot: 1 }); + + assert_eq!( + tree.get_binding_any("x"), + Some(ResolvedVar::Local { slot: 0 }) + ); + } + + #[test] + fn nested_block_scopes_continue_numbering() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("a".into(), StaticType::Int); + + tree.new_block_scope(); + let b = tree.create_local_binding("b".into(), StaticType::Int); + assert_eq!(b, ResolvedVar::Local { slot: 1 }); + + tree.new_block_scope(); + let c = tree.create_local_binding("c".into(), StaticType::Int); + assert_eq!(c, ResolvedVar::Local { slot: 2 }); + } + + #[test] + fn block_scope_does_not_increment_depth() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("x".into(), StaticType::Int); + + tree.new_block_scope(); + assert_eq!( + tree.get_binding_any("x"), + Some(ResolvedVar::Local { slot: 0 }) + ); + } + + #[test] + fn function_scope_resets_slots_and_increments_depth() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("x".into(), StaticType::Int); + + tree.new_function_scope(); + let y = tree.create_local_binding("y".into(), StaticType::Int); + assert_eq!(y, ResolvedVar::Local { slot: 0 }); + + assert_eq!( + tree.get_binding_any("x"), + Some(ResolvedVar::Upvalue { depth: 1, slot: 0 }) + ); + } + + #[test] + fn iteration_scope_continues_numbering_but_increments_depth() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("x".into(), StaticType::Int); + + tree.new_iteration_scope(); + let i = tree.create_local_binding("i".into(), StaticType::Int); + assert_eq!(i, ResolvedVar::Local { slot: 1 }); + + assert_eq!( + tree.get_binding_any("x"), + Some(ResolvedVar::Upvalue { depth: 1, slot: 0 }) + ); + } + + #[test] + fn global_lookup() { + let tree = ScopeTree::from_global_scope(vec![( + "print".into(), + StaticType::Function { + parameters: None, + return_type: Box::new(StaticType::Any), + }, + )]); + // get_binding_any requires &mut self + let mut tree = tree; + assert_eq!( + tree.get_binding_any("print"), + Some(ResolvedVar::Global { slot: 0 }) + ); + } + + #[test] + fn slot_reuse_after_scope_destroy() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("a".into(), StaticType::Int); + + tree.new_block_scope(); + tree.create_local_binding("b".into(), StaticType::Int); + tree.destroy_scope(); + + let c = tree.create_local_binding("c".into(), StaticType::Int); + assert_eq!(c, ResolvedVar::Local { slot: 1 }); + } + + #[test] + fn get_type_returns_correct_type() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("x".into(), StaticType::Int); + tree.create_local_binding("y".into(), StaticType::String); + + assert_eq!( + tree.get_type(ResolvedVar::Local { slot: 0 }), + &StaticType::Int + ); + assert_eq!( + tree.get_type(ResolvedVar::Local { slot: 1 }), + &StaticType::String + ); + } +} diff --git a/ndc_interpreter/src/sequence.rs b/ndc_interpreter/src/sequence.rs index db265840..1df32473 100644 --- a/ndc_interpreter/src/sequence.rs +++ b/ndc_interpreter/src/sequence.rs @@ -1,5 +1,5 @@ -use crate::hash_map::HashMap; use crate::function::StaticType; +use crate::hash_map::HashMap; use crate::heap::{MaxHeap, MinHeap}; use crate::iterator::ValueIterator; use crate::value::Value; diff --git a/ndc_interpreter/src/value.rs b/ndc_interpreter/src/value.rs index 52a588d9..221c337b 100644 --- a/ndc_interpreter/src/value.rs +++ b/ndc_interpreter/src/value.rs @@ -9,8 +9,8 @@ use itertools::Itertools; use num::BigInt; use crate::compare::FallibleOrd; -use crate::hash_map::DefaultHasher; use crate::function::{Function, StaticType}; +use crate::hash_map::DefaultHasher; use crate::int::Int; use crate::num::{Number, NumberToFloatError, NumberToUsizeError}; use crate::sequence::Sequence; diff --git a/ndc_lsp/src/backend.rs b/ndc_lsp/src/backend.rs index 05f654b7..d0788c31 100644 --- a/ndc_lsp/src/backend.rs +++ b/ndc_lsp/src/backend.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; -use ndc_lexer::{Lexer, Span, TokenLocation}; use ndc_interpreter::Interpreter; +use ndc_lexer::{Lexer, Span, TokenLocation}; use ndc_parser::{Expression, ExpressionLocation, ForBody, ForIteration, Lvalue}; use ndc_stdlib::WithStdlib; use tokio::sync::Mutex; diff --git a/ndc_parser/src/expression.rs b/ndc_parser/src/expression.rs index 261d646f..6ed5fcac 100644 --- a/ndc_parser/src/expression.rs +++ b/ndc_parser/src/expression.rs @@ -14,10 +14,19 @@ pub enum Binding { #[derive(Debug, Eq, PartialEq, Clone, Copy)] pub enum ResolvedVar { - Captured { depth: usize, slot: usize }, + Local { slot: usize }, + Upvalue { depth: usize, slot: usize }, Global { slot: usize }, } +impl ResolvedVar { + pub fn slot(self) -> usize { + match self { + Self::Local { slot } | Self::Upvalue { slot, .. } | Self::Global { slot } => slot, + } + } +} + #[derive(Eq, PartialEq, Clone)] pub struct ExpressionLocation { pub expression: Expression, diff --git a/ndc_stdlib/src/math.rs b/ndc_stdlib/src/math.rs index 97b97a4f..7bff0a5c 100644 --- a/ndc_stdlib/src/math.rs +++ b/ndc_stdlib/src/math.rs @@ -474,18 +474,16 @@ pub mod f64 { macro_rules! delegate_to_f64 { ($method:ident,$docs:literal) => { let function = FunctionBuilder::default() - .body( - ndc_interpreter::function::FunctionBody::NumericUnaryOp { - body: |num: Number| match num { - Number::Int(i) => Number::Float(f64::from(i).$method()), - Number::Float(f) => Number::Float(f.$method()), - Number::Rational(r) => { - Number::Float(r.to_f64().unwrap_or(f64::NAN).$method()) - } - Number::Complex(c) => Number::Complex(c.$method()), - }, + .body(ndc_interpreter::function::FunctionBody::NumericUnaryOp { + body: |num: Number| match num { + Number::Int(i) => Number::Float(f64::from(i).$method()), + Number::Float(f) => Number::Float(f.$method()), + Number::Rational(r) => { + Number::Float(r.to_f64().unwrap_or(f64::NAN).$method()) + } + Number::Complex(c) => Number::Complex(c.$method()), }, - ) + }) .name(stringify!($method).to_string()) .documentation(String::from($docs)) .build() diff --git a/ndc_vm/src/chunk.rs b/ndc_vm/src/chunk.rs index a7a0ddb5..92ffca0b 100644 --- a/ndc_vm/src/chunk.rs +++ b/ndc_vm/src/chunk.rs @@ -4,7 +4,14 @@ use ndc_lexer::Span; /// A single bytecode instruction. #[derive(Debug, Clone, PartialEq)] pub enum OpCode { + /// Pushes a constant value on the stack Constant(usize), + /// Reads local variable at the given slot and pushes it on the stack + GetLocal(usize), + /// Pops the top of the stack and stores it in the given local slot + SetLocal(usize), + /// Stop execution + Halt, Return, } diff --git a/ndc_vm/src/compiler.rs b/ndc_vm/src/compiler.rs index c12764ec..6428da43 100644 --- a/ndc_vm/src/compiler.rs +++ b/ndc_vm/src/compiler.rs @@ -1,8 +1,7 @@ use crate::chunk::{Chunk, OpCode}; use crate::{Object, Value}; -use ndc_core::int::Int; -use ndc_core::num::Number; -use ndc_parser::{Expression, ExpressionLocation}; +use ndc_lexer::Span; +use ndc_parser::{Expression, ExpressionLocation, Lvalue, ResolvedVar}; pub struct Compiler; @@ -14,11 +13,13 @@ impl Compiler { compile_expr(expr_loc, &mut chunk); } + // TODO: 0,0 span is kinda strange + chunk.write(OpCode::Halt, Span::new(0, 0)); chunk } } fn compile_expr(ExpressionLocation { expression, span }: ExpressionLocation, chunk: &mut Chunk) { - println!("COMPILING: {expression:?}"); + eprintln!("[COMPILING]: {expression:?}"); match expression { Expression::BoolLiteral(b) => { let idx = chunk.add_constant(Value::Bool(b)); @@ -44,17 +45,50 @@ fn compile_expr(ExpressionLocation { expression, span }: ExpressionLocation, chu let idx = chunk.add_constant(Object::Complex(c).into()); chunk.write(OpCode::Constant(idx), span); } - Expression::Identifier { .. } => {} + Expression::Identifier { resolved, .. } => match resolved { + ndc_parser::Binding::Resolved(ResolvedVar::Local { slot }) => { + chunk.write(OpCode::GetLocal(slot), span); + } + _ => {} + }, Expression::Statement(stm) => { compile_expr(*stm, chunk); } Expression::Logical { .. } => {} Expression::Grouping(_) => {} - Expression::VariableDeclaration { .. } => {} - Expression::Assignment { .. } => {} + // TODO: is this supposed to be different in the VM? + Expression::Assignment { + l_value, + r_value: value, + } + | Expression::VariableDeclaration { l_value, value } => { + compile_expr(*value, chunk); + match l_value { + Lvalue::Identifier { + resolved, + span: lv_span, + .. + } => match resolved.expect("identifiers must be resolved") { + ResolvedVar::Local { slot } => { + chunk.write(OpCode::SetLocal(slot), lv_span); + } + ResolvedVar::Upvalue { .. } => {} + ResolvedVar::Global { .. } => {} + }, + Lvalue::Index { .. } => {} + Lvalue::Sequence(_) => {} + } + } Expression::OpAssignment { .. } => {} Expression::FunctionDeclaration { .. } => {} - Expression::Block { .. } => {} + Expression::Grouping(statements) => { + compile_expr(*statements, chunk); + } + Expression::Block { statements } => { + for statement in statements { + compile_expr(statement, chunk); + } + } Expression::If { .. } => {} Expression::While { .. } => {} Expression::For { .. } => {} diff --git a/ndc_vm/src/vm.rs b/ndc_vm/src/vm.rs index 92af6cdc..640ae95c 100644 --- a/ndc_vm/src/vm.rs +++ b/ndc_vm/src/vm.rs @@ -6,6 +6,7 @@ pub struct Vm { chunk: Chunk, ip: usize, stack: Vec, + locals: Vec, } #[derive(thiserror::Error, Debug)] @@ -20,12 +21,13 @@ impl Vm { chunk, ip: 0, stack: Vec::default(), + locals: Vec::default(), } } pub fn run(&mut self) -> Result<(), VmError> { - eprintln!("[DBG] Value bytes: {}", size_of::()); - eprintln!("[DBG] Number bytes: {}", size_of::()); + eprintln!("[VM] Value bytes: {}", size_of::()); + eprintln!("[VM] OpCode bytes: {}", size_of::()); if self.chunk.is_empty() { return Ok(()); @@ -35,14 +37,32 @@ impl Vm { let op = self.chunk.opcode(self.ip); self.ip += 1; + eprintln!("[VM] Running: {:?}", op); + match op { + OpCode::Halt => { + eprintln!("[VM] stack-dump\n{:?}", self.stack); + eprintln!("[VM] locals-dump\n{:?}", self.locals); + return Ok(()); + } OpCode::Return => { println!("{:?}", self.stack.pop().expect("stack underflow")); } OpCode::Constant(idx) => { - // TODO: assuming constants can be referenced multiple times we'll have to clone here self.stack.push(self.chunk.constant(*idx).clone()); } + OpCode::GetLocal(slot) => { + self.stack.push(self.locals[*slot].clone()); + } + OpCode::SetLocal(slot) => { + let value = self.stack.pop().expect("stack underflow"); + if *slot < self.locals.len() { + self.locals[*slot] = value; + } else { + self.locals.resize(*slot, Value::None); + self.locals.push(value); + } + } } } } From a1ded535dc7fc457de55544369dfe6a1162bee54 Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Tue, 3 Mar 2026 15:21:28 +0100 Subject: [PATCH 006/185] Fix warnings --- Cargo.lock | 2 -- ndc_bin/Cargo.toml | 1 - ndc_interpreter/src/semantic/analyser.rs | 6 ++---- ndc_interpreter/src/semantic/scope.rs | 1 - ndc_vm/Cargo.toml | 1 - ndc_vm/src/compiler.rs | 1 - ndc_vm/src/vm.rs | 1 - 7 files changed, 2 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3d54418f..59aa4d83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1252,7 +1252,6 @@ dependencies = [ "ndc_lexer", "ndc_lsp", "ndc_stdlib", - "ndc_vm", "owo-colors", "rustyline", "strsim", @@ -1355,7 +1354,6 @@ dependencies = [ name = "ndc_vm" version = "0.2.1" dependencies = [ - "ndc_core", "ndc_lexer", "ndc_parser", "num", diff --git a/ndc_bin/Cargo.toml b/ndc_bin/Cargo.toml index d23bc1e9..2cd5c4f4 100644 --- a/ndc_bin/Cargo.toml +++ b/ndc_bin/Cargo.toml @@ -18,7 +18,6 @@ ndc_lexer.workspace = true ndc_interpreter.workspace = true ndc_stdlib.workspace = true ndc_lsp.workspace = true -ndc_vm.workspace = true owo-colors.workspace = true rustyline.workspace = true tap.workspace = true diff --git a/ndc_interpreter/src/semantic/analyser.rs b/ndc_interpreter/src/semantic/analyser.rs index cb293248..9c33e251 100644 --- a/ndc_interpreter/src/semantic/analyser.rs +++ b/ndc_interpreter/src/semantic/analyser.rs @@ -2,10 +2,8 @@ use crate::function::StaticType; use crate::semantic::ScopeTree; use itertools::Itertools; use ndc_lexer::Span; -use ndc_parser::{ - Binding, Expression, ExpressionLocation, ForBody, ForIteration, Lvalue, ResolvedVar, -}; -use std::fmt::{Debug, Formatter}; +use ndc_parser::{Binding, Expression, ExpressionLocation, ForBody, ForIteration, Lvalue}; +use std::fmt::Debug; #[derive(Debug)] pub struct Analyser { diff --git a/ndc_interpreter/src/semantic/scope.rs b/ndc_interpreter/src/semantic/scope.rs index 67a4d126..baf3486d 100644 --- a/ndc_interpreter/src/semantic/scope.rs +++ b/ndc_interpreter/src/semantic/scope.rs @@ -1,4 +1,3 @@ -use crate::semantic::analyser::Analyser; use ndc_parser::{Binding, ResolvedVar, StaticType}; use std::fmt::{Debug, Formatter}; diff --git a/ndc_vm/Cargo.toml b/ndc_vm/Cargo.toml index 6e734684..8efc140d 100644 --- a/ndc_vm/Cargo.toml +++ b/ndc_vm/Cargo.toml @@ -7,5 +7,4 @@ version.workspace = true thiserror.workspace = true ndc_parser.workspace = true ndc_lexer.workspace = true -ndc_core.workspace = true num.workspace = true diff --git a/ndc_vm/src/compiler.rs b/ndc_vm/src/compiler.rs index 6428da43..585794e2 100644 --- a/ndc_vm/src/compiler.rs +++ b/ndc_vm/src/compiler.rs @@ -55,7 +55,6 @@ fn compile_expr(ExpressionLocation { expression, span }: ExpressionLocation, chu compile_expr(*stm, chunk); } Expression::Logical { .. } => {} - Expression::Grouping(_) => {} // TODO: is this supposed to be different in the VM? Expression::Assignment { l_value, diff --git a/ndc_vm/src/vm.rs b/ndc_vm/src/vm.rs index 640ae95c..990d3ce1 100644 --- a/ndc_vm/src/vm.rs +++ b/ndc_vm/src/vm.rs @@ -1,6 +1,5 @@ use crate::Value; use crate::chunk::{Chunk, OpCode}; -use ndc_core::num::Number; pub struct Vm { chunk: Chunk, From d22e4d84d24bd3a742aac839affa82293619eb87 Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Tue, 3 Mar 2026 16:57:38 +0100 Subject: [PATCH 007/185] Basic conditions implemented --- ndc_vm/src/chunk.rs | 30 +++++++++++- ndc_vm/src/compiler.rs | 106 ++++++++++++++++++++++++++++++++++++----- ndc_vm/src/vm.rs | 18 +++++++ 3 files changed, 141 insertions(+), 13 deletions(-) diff --git a/ndc_vm/src/chunk.rs b/ndc_vm/src/chunk.rs index 92ffca0b..931cd14c 100644 --- a/ndc_vm/src/chunk.rs +++ b/ndc_vm/src/chunk.rs @@ -4,6 +4,14 @@ use ndc_lexer::Span; /// A single bytecode instruction. #[derive(Debug, Clone, PartialEq)] pub enum OpCode { + /// Removes the top of the stack + Pop, + /// Always jumps + Jump(isize), + /// Conditionally jumps if the top of the stack is true + JumpIfTrue(usize), + /// Conditionally jumps if the top of the stack is false + JumpIfFalse(usize), /// Pushes a constant value on the stack Constant(usize), /// Reads local variable at the given slot and pushes it on the stack @@ -24,14 +32,34 @@ pub struct Chunk { } impl Chunk { + pub fn len(&self) -> usize { + self.code.len() + } + pub fn add_constant(&mut self, value: Value) -> usize { self.constants.push(value); self.constants.len() - 1 } - pub fn write(&mut self, op: OpCode, span: Span) { + pub fn write(&mut self, op: OpCode, span: Span) -> usize { self.code.push(op); (self.spans).push(span); + self.code.len() - 1 + } + + pub fn patch_jump(&mut self, op_idx: usize) { + let len = self.code.len(); + match self.code.get_mut(op_idx) { + Some(OpCode::JumpIfFalse(offset) | OpCode::JumpIfTrue(offset)) => { + *offset = len - op_idx - 1 + } + Some(OpCode::Jump(offset)) => { + *offset = isize::try_from(len - op_idx - 1).expect("usize underflow") + } + _ => { + panic!("expected to backpatch JumpIfFalse") + } + } } pub fn is_empty(&self) -> bool { diff --git a/ndc_vm/src/compiler.rs b/ndc_vm/src/compiler.rs index 585794e2..5b361620 100644 --- a/ndc_vm/src/compiler.rs +++ b/ndc_vm/src/compiler.rs @@ -1,7 +1,7 @@ use crate::chunk::{Chunk, OpCode}; use crate::{Object, Value}; use ndc_lexer::Span; -use ndc_parser::{Expression, ExpressionLocation, Lvalue, ResolvedVar}; +use ndc_parser::{Binding, Expression, ExpressionLocation, LogicalOperator, Lvalue, ResolvedVar}; pub struct Compiler; @@ -18,8 +18,12 @@ impl Compiler { chunk } } -fn compile_expr(ExpressionLocation { expression, span }: ExpressionLocation, chunk: &mut Chunk) { +fn compile_expr( + ExpressionLocation { expression, span }: ExpressionLocation, + chunk: &mut Chunk, +) -> usize { eprintln!("[COMPILING]: {expression:?}"); + let start_len = chunk.len(); match expression { Expression::BoolLiteral(b) => { let idx = chunk.add_constant(Value::Bool(b)); @@ -46,15 +50,43 @@ fn compile_expr(ExpressionLocation { expression, span }: ExpressionLocation, chu chunk.write(OpCode::Constant(idx), span); } Expression::Identifier { resolved, .. } => match resolved { - ndc_parser::Binding::Resolved(ResolvedVar::Local { slot }) => { + Binding::None => todo!("return a nice error"), + Binding::Resolved(ResolvedVar::Local { slot }) => { chunk.write(OpCode::GetLocal(slot), span); } - _ => {} + Binding::Resolved(ResolvedVar::Upvalue { slot, depth }) => { + todo!("?") + } + Binding::Resolved(ResolvedVar::Global { slot }) => { + todo!("?") + } + Binding::Dynamic(_) => {} }, Expression::Statement(stm) => { compile_expr(*stm, chunk); } - Expression::Logical { .. } => {} + Expression::Logical { + left, + right, + operator, + } => { + let left_span = left.span; + compile_expr(*left, chunk); + match operator { + LogicalOperator::And => { + let end_jump = chunk.write(OpCode::JumpIfFalse(0), left_span); + chunk.write(OpCode::Pop, span); + compile_expr(*right, chunk); + chunk.patch_jump(end_jump); + } + LogicalOperator::Or => { + let end_jump = chunk.write(OpCode::JumpIfTrue(0), left_span); + chunk.write(OpCode::Pop, span); + compile_expr(*right, chunk); + chunk.patch_jump(end_jump); + } + } + } // TODO: is this supposed to be different in the VM? Expression::Assignment { l_value, @@ -71,11 +103,11 @@ fn compile_expr(ExpressionLocation { expression, span }: ExpressionLocation, chu ResolvedVar::Local { slot } => { chunk.write(OpCode::SetLocal(slot), lv_span); } - ResolvedVar::Upvalue { .. } => {} - ResolvedVar::Global { .. } => {} + ResolvedVar::Upvalue { .. } => todo!("?"), + ResolvedVar::Global { .. } => todo!("?"), }, - Lvalue::Index { .. } => {} - Lvalue::Sequence(_) => {} + Lvalue::Index { .. } => todo!("?"), + Lvalue::Sequence(_) => todo!("?"), } } Expression::OpAssignment { .. } => {} @@ -88,10 +120,58 @@ fn compile_expr(ExpressionLocation { expression, span }: ExpressionLocation, chu compile_expr(statement, chunk); } } - Expression::If { .. } => {} - Expression::While { .. } => {} + Expression::If { + condition, + on_true, + on_false, + } => { + let condition_span = condition.span; + compile_expr(*condition, chunk); + let conditional_jump_idx = chunk.write(OpCode::JumpIfFalse(0), condition_span); + chunk.write(OpCode::Pop, span); + compile_expr(*on_true, chunk); + // If there is an else branch we need to compile it and backpatch the jump instruction to find it + if let Some(on_false) = on_false { + // In the true branch still we add a jump instruction at the end + let jump_to_end_op = chunk.write(OpCode::Jump(0), span); + // Change the earlier jump to jump over the jump (YO DAWG) + chunk.patch_jump(conditional_jump_idx); + chunk.write(OpCode::Pop, span); + compile_expr(*on_false, chunk); + chunk.patch_jump(jump_to_end_op); + } else { + chunk.patch_jump(conditional_jump_idx); + // If we're jumping over true we still need to pop the condition from the stack + chunk.write(OpCode::Pop, span); + } + } + Expression::While { + expression: condition, + loop_body, + } => { + let condition_span = condition.span; + compile_expr(*condition, chunk); + let conditional_jump_idx = chunk.write(OpCode::JumpIfFalse(0), condition_span); + chunk.write(OpCode::Pop, span); + let body_size = compile_expr(*loop_body, chunk); + chunk.write(OpCode::Jump(-((body_size + 1) as isize)), span); + chunk.patch_jump(conditional_jump_idx); + chunk.write(OpCode::Pop, span); + } Expression::For { .. } => {} - Expression::Call { .. } => {} + Expression::Call { + arguments, + function, + } => { + for argument in arguments { + compile_expr(argument, chunk); + } + + // compile_expr(*function, chunk); + + // chunk.write(OpCode::Call, span); + // chunk.write(OpCode::Return); + } Expression::Index { .. } => {} Expression::Tuple { .. } => {} Expression::List { .. } => {} @@ -102,4 +182,6 @@ fn compile_expr(ExpressionLocation { expression, span }: ExpressionLocation, chu Expression::RangeInclusive { .. } => {} Expression::RangeExclusive { .. } => {} } + + chunk.len() - start_len } diff --git a/ndc_vm/src/vm.rs b/ndc_vm/src/vm.rs index 990d3ce1..7269be3f 100644 --- a/ndc_vm/src/vm.rs +++ b/ndc_vm/src/vm.rs @@ -62,6 +62,24 @@ impl Vm { self.locals.push(value); } } + OpCode::JumpIfFalse(offset) => { + let top = self.stack.last().expect("stack underflow"); + if let Value::Bool(false) = top { + self.ip = self.ip.wrapping_add(*offset); // This will probably go wrong because of the +1 later + } + } + OpCode::JumpIfTrue(offset) => { + let top = self.stack.last().expect("stack underflow"); + if let Value::Bool(true) = top { + self.ip = self.ip.wrapping_add(*offset); // This will probably go wrong because of the +1 later + } + } + OpCode::Jump(offset) => { + self.ip = self.ip.wrapping_add_signed(*offset); + } + OpCode::Pop => { + self.stack.pop(); + } } } } From 2cd2f900775250f1256cb5acf3bc74b751df64e4 Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Wed, 4 Mar 2026 12:40:03 +0100 Subject: [PATCH 008/185] Make function definition expression contain a signature instead of expressions --- Cargo.lock | 1 + ndc_interpreter/src/evaluate/mod.rs | 8 +- ndc_interpreter/src/function.rs | 15 +- ndc_interpreter/src/lib.rs | 9 +- ndc_interpreter/src/semantic/analyser.rs | 63 +++------ ndc_lsp/src/backend.rs | 4 +- ndc_parser/src/expression.rs | 166 +---------------------- ndc_parser/src/parser.rs | 34 ++++- ndc_parser/src/static_type.rs | 40 ++++-- ndc_vm/Cargo.toml | 1 + ndc_vm/src/compiler.rs | 57 ++++++-- ndc_vm/src/lib.rs | 16 +++ ndc_vm/src/value.rs | 66 +++++++++ 13 files changed, 227 insertions(+), 253 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 59aa4d83..7f643ae2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1354,6 +1354,7 @@ dependencies = [ name = "ndc_vm" version = "0.2.1" dependencies = [ + "ndc_core", "ndc_lexer", "ndc_parser", "num", diff --git a/ndc_interpreter/src/evaluate/mod.rs b/ndc_interpreter/src/evaluate/mod.rs index cf174a8a..0a36ab44 100644 --- a/ndc_interpreter/src/evaluate/mod.rs +++ b/ndc_interpreter/src/evaluate/mod.rs @@ -336,7 +336,7 @@ pub(crate) fn evaluate_expression( resolve_and_call(function, evaluated_args, environment, span)? } Expression::FunctionDeclaration { - parameters, + type_signature, body, resolved_name, return_type, @@ -344,11 +344,7 @@ pub(crate) fn evaluate_expression( .. } => { let mut user_function = FunctionBody::Closure { - parameter_names: parameters - .as_parameters() - .into_iter() - .map(|x| x.to_string()) - .collect(), + type_signature: type_signature.clone(), body: *body.clone(), return_type: return_type.clone().unwrap_or_else(StaticType::unit), environment: environment.clone(), diff --git a/ndc_interpreter/src/function.rs b/ndc_interpreter/src/function.rs index a7c4ed96..3d155e76 100644 --- a/ndc_interpreter/src/function.rs +++ b/ndc_interpreter/src/function.rs @@ -156,7 +156,7 @@ impl Function { #[derive(Clone)] pub enum FunctionBody { Closure { - parameter_names: Vec, + type_signature: TypeSignature, body: ExpressionLocation, return_type: StaticType, environment: Rc>, @@ -181,9 +181,7 @@ pub enum FunctionBody { impl FunctionBody { pub fn arity(&self) -> Option { match self { - Self::Closure { - parameter_names, .. - } => Some(parameter_names.len()), + Self::Closure { type_signature, .. } => type_signature.arity(), Self::NumericUnaryOp { .. } => Some(1), Self::NumericBinaryOp { .. } => Some(2), Self::GenericFunction { type_signature, .. } => type_signature.arity(), @@ -205,14 +203,7 @@ impl FunctionBody { fn type_signature(&self) -> TypeSignature { match self { - Self::Closure { - parameter_names, .. - } => TypeSignature::Exact( - parameter_names - .iter() - .map(|name| Parameter::new(name, StaticType::Any)) - .collect(), - ), + Self::Closure { type_signature, .. } => type_signature.clone(), Self::Memoized { cache: _, function } => function.type_signature(), Self::NumericUnaryOp { .. } => { TypeSignature::Exact(vec![Parameter::new("num", StaticType::Number)]) diff --git a/ndc_interpreter/src/lib.rs b/ndc_interpreter/src/lib.rs index 5ba0dd47..9f071c3f 100644 --- a/ndc_interpreter/src/lib.rs +++ b/ndc_interpreter/src/lib.rs @@ -101,8 +101,10 @@ impl Interpreter { &mut self, expressions: impl Iterator, ) -> Result { - let code = Compiler::compile(expressions); + let code = Compiler::compile(expressions)?.into_chunk(); + let mut vm = Vm::new(code); + vm.run().expect("VM failed"); Ok(Value::unit()) @@ -163,6 +165,11 @@ pub enum InterpreterError { #[from] cause: semantic::AnalysisError, }, + #[error("Compilation error")] + Compiler { + #[from] + cause: ndc_vm::CompileError, + }, #[error("Error while executing code")] Evaluation(#[from] EvaluationError), } diff --git a/ndc_interpreter/src/semantic/analyser.rs b/ndc_interpreter/src/semantic/analyser.rs index 9c33e251..5fdcfa07 100644 --- a/ndc_interpreter/src/semantic/analyser.rs +++ b/ndc_interpreter/src/semantic/analyser.rs @@ -2,7 +2,9 @@ use crate::function::StaticType; use crate::semantic::ScopeTree; use itertools::Itertools; use ndc_lexer::Span; -use ndc_parser::{Binding, Expression, ExpressionLocation, ForBody, ForIteration, Lvalue}; +use ndc_parser::{ + Binding, Expression, ExpressionLocation, ForBody, ForIteration, Lvalue, TypeSignature, +}; use std::fmt::Debug; #[derive(Debug)] @@ -99,7 +101,7 @@ impl Analyser { Expression::FunctionDeclaration { name, resolved_name, - parameters, + type_signature, body, return_type: return_type_slot, .. @@ -109,12 +111,8 @@ impl Analyser { // Pre-register the function before analysing its body so recursive calls can // resolve the name. The return type is unknown at this point so we use Any. let pre_slot = if let Some(name) = name { - let param_types: Vec = - std::iter::repeat_n(StaticType::Any, extract_argument_arity(parameters)) - .collect(); - let placeholder = StaticType::Function { - parameters: Some(param_types), + parameters: type_signature.types(), return_type: Box::new(StaticType::Any), }; Some( @@ -126,7 +124,7 @@ impl Analyser { }; self.scope_tree.new_function_scope(); - let param_types = self.resolve_parameters_declarative(parameters)?; + let param_types = self.resolve_parameters_declarative(type_signature, *span)?; let return_type = self.analyse(body)?; self.scope_tree.destroy_scope(); @@ -472,41 +470,28 @@ impl Analyser { /// Resolve expressions as arguments to a function and return the function arity fn resolve_parameters_declarative( &mut self, - arguments: &mut ExpressionLocation, + type_signature: &TypeSignature, + span: Span, ) -> Result, AnalysisError> { - let mut types: Vec = Vec::new(); - let mut names: Vec<&str> = Vec::new(); - - let ExpressionLocation { - expression: Expression::Tuple { values }, - .. - } = arguments - else { - panic!("expected arguments to be tuple"); + let TypeSignature::Exact(parameters) = type_signature else { + return Ok(vec![]); }; - for arg in values { - let ExpressionLocation { - expression: Expression::Identifier { name, resolved }, - span, - } = arg - else { - panic!("expected tuple values to be ident"); - }; + let mut types: Vec = Vec::new(); + let mut seen_names: Vec<&str> = Vec::new(); + for param in parameters { // TODO: big challenge how do we figure out the function parameter types? // it seems like this is something we need an HM like system for!? let resolved_type = StaticType::Any; types.push(resolved_type.clone()); - if names.contains(&name.as_str()) { - return Err(AnalysisError::parameter_redefined(name, *span)); + if seen_names.contains(¶m.name.as_str()) { + return Err(AnalysisError::parameter_redefined(¶m.name, span)); } - names.push(name); + seen_names.push(¶m.name); - *resolved = Binding::Resolved( - self.scope_tree - .create_local_binding((*name).clone(), resolved_type), - ); + self.scope_tree + .create_local_binding(param.name.clone(), resolved_type); } Ok(types) @@ -571,18 +556,6 @@ impl Analyser { } } -fn extract_argument_arity(arguments: &ExpressionLocation) -> usize { - let ExpressionLocation { - expression: Expression::Tuple { values }, - .. - } = arguments - else { - panic!("expected arguments to be tuple"); - }; - - values.len() -} - #[derive(thiserror::Error, Debug)] #[error("{text}")] pub struct AnalysisError { diff --git a/ndc_lsp/src/backend.rs b/ndc_lsp/src/backend.rs index d0788c31..f9839789 100644 --- a/ndc_lsp/src/backend.rs +++ b/ndc_lsp/src/backend.rs @@ -211,13 +211,13 @@ fn collect_hints(expr: &ExpressionLocation, text: &str, hints: &mut Vec { if let Some(rt) = return_type { hints.push(InlayHint { - position: position_from_offset(text, parameters.span.end()), + position: position_from_offset(text, parameters_span.end()), label: InlayHintLabel::String(format!(" -> {rt}")), kind: Some(InlayHintKind::TYPE), text_edits: None, diff --git a/ndc_parser/src/expression.rs b/ndc_parser/src/expression.rs index 6ed5fcac..76a84186 100644 --- a/ndc_parser/src/expression.rs +++ b/ndc_parser/src/expression.rs @@ -1,3 +1,4 @@ +use crate::TypeSignature; use crate::operator::LogicalOperator; use crate::parser::Error as ParseError; use crate::static_type::StaticType; @@ -27,7 +28,7 @@ impl ResolvedVar { } } -#[derive(Eq, PartialEq, Clone)] +#[derive(Eq, PartialEq, Clone, Debug)] pub struct ExpressionLocation { pub expression: Expression, pub span: Span, @@ -71,8 +72,8 @@ pub enum Expression { FunctionDeclaration { name: Option, resolved_name: Option, - // TODO: Instead of an ExpressionLocation with a Tuple the parser should just give us something we can actually work with - parameters: Box, + type_signature: TypeSignature, + parameters_span: Span, body: Box, return_type: Option, pure: bool, @@ -267,162 +268,3 @@ impl TryFrom for Lvalue { } } } - -#[allow(clippy::missing_fields_in_debug, clippy::too_many_lines)] -impl std::fmt::Debug for ExpressionLocation { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // write!(f, "{{{:?} at {:?}}}", self.expression, self.span) - match &self.expression { - Expression::BoolLiteral(b) => { - f.debug_struct("BooleanLiteral").field("value", &b).finish() - } - Expression::StringLiteral(s) => { - f.debug_struct("StringLiteral").field("value", &s).finish() - } - Expression::Int64Literal(i) => f.debug_struct("IntLiteral").field("value", &i).finish(), - Expression::Float64Literal(v) => { - f.debug_struct("FloatLiteral").field("value", &v).finish() - } - Expression::BigIntLiteral(big_int) => f - .debug_struct("BigIntLiteral") - .field("value", &big_int) - .finish(), - Expression::ComplexLiteral(complex) => f - .debug_struct("CoplexLiteral") - .field("value", &complex) - .finish(), - Expression::Identifier { - name: ident, - resolved, - } => f - .debug_struct("Ident") - .field("value", &ident) - .field("resolved", resolved) - .finish(), - Expression::Statement(expression_location) => f - .debug_struct("Statement") - .field("expression", &expression_location) - .finish(), - Expression::Logical { - left, - operator, - right, - } => f - .debug_struct("Logical") - .field("left", left) - .field("operator", operator) - .field("right", right) - .finish(), - Expression::Grouping(expression_location) => f - .debug_struct("Grouping") - .field("expression", expression_location) - .finish(), - Expression::VariableDeclaration { l_value, value } => f - .debug_struct("VariableDeclaration") - .field("l_value", l_value) - .field("value", value) - .finish(), - Expression::Assignment { l_value, r_value } => f - .debug_struct("Assignment") - .field("l_value", l_value) - .field("r_value", r_value) - .finish(), - Expression::OpAssignment { - l_value, - r_value: value, - operation, - resolved_operation, - resolved_assign_operation, - } => f - .debug_struct("OpAssignment") - .field("l_value", l_value) - .field("value", value) - .field("operation", operation) - .field("resolved_operation", resolved_operation) - .field("resolved_assign_operation", resolved_assign_operation) - .finish(), - Expression::FunctionDeclaration { - name, - parameters, - return_type, - body, - pure, - resolved_name, - } => f - .debug_struct("FunctionDeclaration") - .field("name", name) - .field("resolved_name", resolved_name) - .field("parameters", parameters) - .field("return_type", return_type) - .field("body", body) - .field("pure", pure) - .finish(), - Expression::Block { statements } => f - .debug_struct("Block") - .field("statements", statements) - .finish(), - Expression::If { - condition, - on_true, - on_false, - } => f - .debug_struct("If") - .field("condition", condition) - .field("on_true", on_true) - .field("on_false", on_false) - .finish(), - Expression::While { - expression, - loop_body, - } => f - .debug_struct("While") - .field("expression", expression) - .field("loop_body", loop_body) - .finish(), - Expression::For { iterations, body } => f - .debug_struct("For") - .field("iterations", iterations) - .field("body", body) - .finish(), - Expression::Call { - function, - arguments, - } => f - .debug_struct("Call") - .field("function", function) - .field("arguments", arguments) - .finish(), - Expression::Index { value, index } => f - .debug_struct("Index") - .field("value", value) - .field("index", index) - .finish(), - Expression::Tuple { values } => { - f.debug_struct("Tuple").field("values", values).finish() - } - Expression::List { values } => f.debug_struct("List").field("values", values).finish(), - Expression::Map { values, default } => f - .debug_struct("Map") - .field("values", values) - .field("default", default) - .finish(), - Expression::Return { value } => f.debug_struct("Return").field("value", value).finish(), - Expression::Break => f.debug_struct("Break").finish(), - Expression::Continue => f.debug_struct("Continue").finish(), - Expression::RangeInclusive { start, end } => f - .debug_struct("RangeInclusive") - .field("start", start) - .field("end", end) - .field("start", start) - .field("end", end) - .finish(), - Expression::RangeExclusive { start, end } => f - .debug_struct("RangeExclusive") - .field("start", start) - .field("end", end) - .field("start", start) - .field("end", end) - .finish(), - } - } -} diff --git a/ndc_parser/src/parser.rs b/ndc_parser/src/parser.rs index 733566fb..fc0feacb 100644 --- a/ndc_parser/src/parser.rs +++ b/ndc_parser/src/parser.rs @@ -3,6 +3,7 @@ use std::fmt::Write; use crate::expression::Expression; use crate::expression::{Binding, ExpressionLocation, ForBody, ForIteration, Lvalue}; use crate::operator::{BinaryOperator, LogicalOperator, UnaryOperator}; +use crate::{Parameter, StaticType, TypeSignature}; use ndc_lexer::{Span, Token, TokenLocation}; pub struct Parser { @@ -1145,11 +1146,15 @@ impl Parser { None => return Err(Error::end_of_input(argument_list.span)), }; + let parameters_span = argument_list.span; let span = fn_token.span.merge(body.span); Ok(ExpressionLocation { expression: Expression::FunctionDeclaration { name: identifier, - parameters: Box::new(argument_list), + type_signature: argument_list + .try_into() + .expect("INTERNAL ERROR: type of argument list is incorrect"), + parameters_span, body: Box::new(body), return_type: None, // At some point in the future we could use type declarations here to insert the type (return type inference is cringe anyway) pure: is_pure, @@ -1324,3 +1329,30 @@ fn tokens_to_string(tokens: &[Token]) -> String { } buf } + +impl TryFrom for TypeSignature { + type Error = (); + + fn try_from( + ExpressionLocation { expression, .. }: ExpressionLocation, + ) -> Result { + let Expression::Tuple { values } = expression else { + return Err(()); + }; + + values + .into_iter() + .map(|expression_location| { + let ExpressionLocation { expression, .. } = expression_location; + + match expression { + Expression::Identifier { name, .. } => { + Ok(Parameter::new(name, StaticType::Any)) + } + _ => Err(()), + } + }) + .collect::, ()>>() + .map(TypeSignature::Exact) + } +} diff --git a/ndc_parser/src/static_type.rs b/ndc_parser/src/static_type.rs index 732b0202..21e636fc 100644 --- a/ndc_parser/src/static_type.rs +++ b/ndc_parser/src/static_type.rs @@ -7,6 +7,12 @@ pub enum TypeSignature { Exact(Vec), } +impl Default for TypeSignature { + fn default() -> Self { + Self::Exact(vec![]) + } +} + impl TypeSignature { /// Matches a list of `ValueTypes` to a type signature. It can return `None` if there is no match or /// `Some(num)` where num is the sum of the distances of the types. The type `Int`, is distance 1 @@ -42,6 +48,13 @@ impl TypeSignature { Self::Exact(args) => Some(args.len()), } } + + pub fn types(&self) -> Option> { + match self { + Self::Variadic => None, + Self::Exact(v) => Some(v.iter().map(|p| p.type_name.clone()).collect()), + } + } } #[derive(Debug, Clone, Eq, PartialEq, Hash)] @@ -59,15 +72,16 @@ impl Parameter { } } -#[derive(Debug, Clone, Eq, PartialEq, Hash)] +#[derive(Debug, Clone, Eq, PartialEq, Hash, Default)] pub enum StaticType { + #[default] Any, Bool, Function { - parameters: Option>, - return_type: Box, + parameters: Option>, + return_type: Box, }, - Option(Box), + Option(Box), // Numbers Number, @@ -77,18 +91,18 @@ pub enum StaticType { Complex, // Sequences List -> List - Sequence(Box), - List(Box), + Sequence(Box), + List(Box), String, - Tuple(Vec), + Tuple(Vec), Map { - key: Box, - value: Box, + key: Box, + value: Box, }, - Iterator(Box), - MinHeap(Box), - MaxHeap(Box), - Deque(Box), + Iterator(Box), + MinHeap(Box), + MaxHeap(Box), + Deque(Box), } impl StaticType { diff --git a/ndc_vm/Cargo.toml b/ndc_vm/Cargo.toml index 8efc140d..6e734684 100644 --- a/ndc_vm/Cargo.toml +++ b/ndc_vm/Cargo.toml @@ -7,4 +7,5 @@ version.workspace = true thiserror.workspace = true ndc_parser.workspace = true ndc_lexer.workspace = true +ndc_core.workspace = true num.workspace = true diff --git a/ndc_vm/src/compiler.rs b/ndc_vm/src/compiler.rs index 5b361620..9d206bff 100644 --- a/ndc_vm/src/compiler.rs +++ b/ndc_vm/src/compiler.rs @@ -1,23 +1,32 @@ use crate::chunk::{Chunk, OpCode}; -use crate::{Object, Value}; -use ndc_lexer::Span; -use ndc_parser::{Binding, Expression, ExpressionLocation, LogicalOperator, Lvalue, ResolvedVar}; +use crate::{Function, Object, Value}; +use ndc_parser::{ + Binding, Expression, ExpressionLocation, LogicalOperator, Lvalue, ResolvedVar, StaticType, + TypeSignature, +}; pub struct Compiler; impl Compiler { - pub fn compile(expressions: impl Iterator) -> Chunk { + pub fn compile( + expressions: impl Iterator, + ) -> Result { let mut chunk = Chunk::default(); for expr_loc in expressions { compile_expr(expr_loc, &mut chunk); } - // TODO: 0,0 span is kinda strange - chunk.write(OpCode::Halt, Span::new(0, 0)); - chunk + Ok(Function::new_compiled( + None, + None, + TypeSignature::default(), + chunk, + StaticType::Any, + )) } } + fn compile_expr( ExpressionLocation { expression, span }: ExpressionLocation, chunk: &mut Chunk, @@ -54,10 +63,10 @@ fn compile_expr( Binding::Resolved(ResolvedVar::Local { slot }) => { chunk.write(OpCode::GetLocal(slot), span); } - Binding::Resolved(ResolvedVar::Upvalue { slot, depth }) => { + Binding::Resolved(ResolvedVar::Upvalue { .. }) => { todo!("?") } - Binding::Resolved(ResolvedVar::Global { slot }) => { + Binding::Resolved(ResolvedVar::Global { .. }) => { todo!("?") } Binding::Dynamic(_) => {} @@ -111,7 +120,25 @@ fn compile_expr( } } Expression::OpAssignment { .. } => {} - Expression::FunctionDeclaration { .. } => {} + Expression::FunctionDeclaration { + name, + body, + type_signature, + return_type, + .. + } => { + let mut fn_chunk = Chunk::default(); + compile_expr(*body, &mut fn_chunk); + + // TODO: what do we do with the compiled function + Function::new_compiled( + name, + None, + type_signature, + fn_chunk, + return_type.unwrap_or_default(), + ); + } Expression::Grouping(statements) => { compile_expr(*statements, chunk); } @@ -154,7 +181,12 @@ fn compile_expr( let conditional_jump_idx = chunk.write(OpCode::JumpIfFalse(0), condition_span); chunk.write(OpCode::Pop, span); let body_size = compile_expr(*loop_body, chunk); - chunk.write(OpCode::Jump(-((body_size + 1) as isize)), span); + chunk.write( + OpCode::Jump( + -isize::try_from(body_size + 1).expect("unable to convert usize to isize"), + ), + span, + ); chunk.patch_jump(conditional_jump_idx); chunk.write(OpCode::Pop, span); } @@ -185,3 +217,6 @@ fn compile_expr( chunk.len() - start_len } + +#[derive(thiserror::Error, Debug)] +pub enum CompileError {} diff --git a/ndc_vm/src/lib.rs b/ndc_vm/src/lib.rs index 6948eac0..ac9b4c97 100644 --- a/ndc_vm/src/lib.rs +++ b/ndc_vm/src/lib.rs @@ -3,4 +3,20 @@ pub mod compiler; pub mod value; pub mod vm; +pub use compiler::CompileError; pub use value::*; + +#[cfg(test)] +mod test { + + #[test] + fn test_that_value_size_does_not_change() { + assert_eq!(size_of::(), 16) + } + + #[test] + fn test_that_opcode_size_does_not_change() { + // NOTE: this is allowed to change, but we'd like to know about it. + assert_eq!(size_of::(), 16) + } +} diff --git a/ndc_vm/src/value.rs b/ndc_vm/src/value.rs index 1cb398b5..2fd1d7ac 100644 --- a/ndc_vm/src/value.rs +++ b/ndc_vm/src/value.rs @@ -1,3 +1,10 @@ +use crate::chunk::Chunk; +use ndc_core::hash_map::HashMap; +use ndc_parser::{StaticType, TypeSignature}; +use std::cell::RefCell; +use std::fmt::Formatter; +use std::rc::Rc; + /// Enumerates all the different types of values that exist in the language /// All values should be pretty cheap to clone because the bigger ones are wrapped using Rc's #[derive(Clone, Debug)] @@ -18,11 +25,70 @@ pub enum Object { String(String), List(Vec), Tuple(Vec), + Function(Rc), // tec.... } +pub struct Function { + name: Option, + documentation: Option, + body: FunctionBody, +} + +impl Function { + pub(crate) fn new_compiled( + name: Option, + documentation: Option, + type_signature: TypeSignature, + body: Chunk, + return_type: StaticType, + ) -> Self { + Self { + name, + documentation, + body: FunctionBody::Compiled { + type_signature, + body, + return_type, + }, + } + } + + pub fn into_chunk(self) -> Chunk { + match self.body { + FunctionBody::Compiled { body, .. } => body, + FunctionBody::Memoized { .. } => panic!("cannot get chunk from memoized function"), + } + } +} + +pub enum FunctionBody { + Compiled { + type_signature: TypeSignature, + body: Chunk, + return_type: StaticType, + // environment: Rc>, + }, + // NativeFunction { + // type_signature: TypeSignature, + // return_type: StaticType, + // function: fn(&mut [Value], &Rc>) -> EvaluationResult, + // }, + Memoized { + cache: RefCell>, + function: Box, + }, +} + impl From for Value { fn from(value: Object) -> Self { Self::Object(Box::new(value)) } } + +impl std::fmt::Debug for Function { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + // Whatever + write!(f, "function {:?}", self.name) + } +} From b565f588cb7f51ab08a35fdf5b6df95baac0966a Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Wed, 4 Mar 2026 14:47:23 +0100 Subject: [PATCH 009/185] Add tests for the compiler output --- Cargo.lock | 3 + ndc_interpreter/src/lib.rs | 2 +- ndc_parser/src/parser.rs | 8 --- ndc_vm/src/chunk.rs | 11 +++- ndc_vm/src/compiler.rs | 44 +++++++------- ndc_vm/src/value.rs | 70 ++++++---------------- ndc_vm/src/vm.rs | 69 ++++++++++++++------- tests/Cargo.toml | 9 ++- tests/src/compiler.rs | 120 +++++++++++++++++++++++++++++++++++++ 9 files changed, 232 insertions(+), 104 deletions(-) create mode 100644 tests/src/compiler.rs diff --git a/Cargo.lock b/Cargo.lock index 7f643ae2..a552aae3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2086,7 +2086,10 @@ name = "tests" version = "0.2.1" dependencies = [ "ndc_interpreter", + "ndc_lexer", + "ndc_parser", "ndc_stdlib", + "ndc_vm", "owo-colors", ] diff --git a/ndc_interpreter/src/lib.rs b/ndc_interpreter/src/lib.rs index 9f071c3f..9955be71 100644 --- a/ndc_interpreter/src/lib.rs +++ b/ndc_interpreter/src/lib.rs @@ -101,7 +101,7 @@ impl Interpreter { &mut self, expressions: impl Iterator, ) -> Result { - let code = Compiler::compile(expressions)?.into_chunk(); + let code = Compiler::compile(expressions)?; let mut vm = Vm::new(code); diff --git a/ndc_parser/src/parser.rs b/ndc_parser/src/parser.rs index fc0feacb..f3e4560a 100644 --- a/ndc_parser/src/parser.rs +++ b/ndc_parser/src/parser.rs @@ -1165,14 +1165,6 @@ impl Parser { } /// Parses a block expression including the block delimiters `{` and `}` - /// example: - /// ```ndc - /// { - /// func(); - /// x := 1 + 1; - /// x - /// } - /// ``` fn block(&mut self) -> Result { let left_curly_span = self.require_token(&[Token::LeftCurlyBracket])?; diff --git a/ndc_vm/src/chunk.rs b/ndc_vm/src/chunk.rs index 931cd14c..950f8dfd 100644 --- a/ndc_vm/src/chunk.rs +++ b/ndc_vm/src/chunk.rs @@ -2,7 +2,8 @@ use crate::Value; use ndc_lexer::Span; /// A single bytecode instruction. -#[derive(Debug, Clone, PartialEq)] +// NOTE: For now we just derive Copy for OpCode since it makes our live easier and it probably won't cost THAT much performance. In the future we might want to do some proper byte packing and dive into unsafe land to optimize further. +#[derive(Debug, Clone, Copy, PartialEq)] pub enum OpCode { /// Removes the top of the stack Pop, @@ -66,8 +67,12 @@ impl Chunk { self.code.is_empty() } #[inline(always)] - pub fn opcode(&self, idx: usize) -> &OpCode { - &self.code[idx] + pub fn opcode(&self, idx: usize) -> OpCode { + self.code[idx] + } + + pub fn opcodes(&self) -> &[OpCode] { + &self.code } pub fn constant(&self, idx: usize) -> &Value { diff --git a/ndc_vm/src/compiler.rs b/ndc_vm/src/compiler.rs index 9d206bff..2104a9eb 100644 --- a/ndc_vm/src/compiler.rs +++ b/ndc_vm/src/compiler.rs @@ -1,5 +1,7 @@ use crate::chunk::{Chunk, OpCode}; -use crate::{Function, Object, Value}; +use crate::value::{CompiledFunction, Function}; +use crate::{Object, Value}; +use ndc_lexer::Span; use ndc_parser::{ Binding, Expression, ExpressionLocation, LogicalOperator, Lvalue, ResolvedVar, StaticType, TypeSignature, @@ -10,20 +12,21 @@ pub struct Compiler; impl Compiler { pub fn compile( expressions: impl Iterator, - ) -> Result { + ) -> Result { let mut chunk = Chunk::default(); for expr_loc in expressions { compile_expr(expr_loc, &mut chunk); } - Ok(Function::new_compiled( - None, - None, - TypeSignature::default(), - chunk, - StaticType::Any, - )) + chunk.write(OpCode::Halt, Span::new(0, 0)); + + Ok(CompiledFunction { + name: None, + type_signature: TypeSignature::default(), + body: chunk, + return_type: StaticType::Any, + }) } } @@ -130,14 +133,16 @@ fn compile_expr( let mut fn_chunk = Chunk::default(); compile_expr(*body, &mut fn_chunk); - // TODO: what do we do with the compiled function - Function::new_compiled( + let compiled = CompiledFunction { name, - None, type_signature, - fn_chunk, - return_type.unwrap_or_default(), + body: fn_chunk, + return_type: return_type.unwrap_or_default(), + }; + let idx = chunk.add_constant( + Object::Function(Function::Compiled(std::rc::Rc::new(compiled))).into(), ); + chunk.write(OpCode::Constant(idx), span); } Expression::Grouping(statements) => { compile_expr(*statements, chunk); @@ -177,13 +182,15 @@ fn compile_expr( loop_body, } => { let condition_span = condition.span; + let loop_start = chunk.len(); compile_expr(*condition, chunk); let conditional_jump_idx = chunk.write(OpCode::JumpIfFalse(0), condition_span); chunk.write(OpCode::Pop, span); - let body_size = compile_expr(*loop_body, chunk); + compile_expr(*loop_body, chunk); chunk.write( OpCode::Jump( - -isize::try_from(body_size + 1).expect("unable to convert usize to isize"), + -isize::try_from(chunk.len() - loop_start + 1) + .expect("loop too large to jump back"), ), span, ); @@ -191,10 +198,7 @@ fn compile_expr( chunk.write(OpCode::Pop, span); } Expression::For { .. } => {} - Expression::Call { - arguments, - function, - } => { + Expression::Call { arguments, .. } => { for argument in arguments { compile_expr(argument, chunk); } diff --git a/ndc_vm/src/value.rs b/ndc_vm/src/value.rs index 2fd1d7ac..feac96ef 100644 --- a/ndc_vm/src/value.rs +++ b/ndc_vm/src/value.rs @@ -1,7 +1,5 @@ -use crate::chunk::Chunk; -use ndc_core::hash_map::HashMap; +use crate::chunk::{Chunk, OpCode}; use ndc_parser::{StaticType, TypeSignature}; -use std::cell::RefCell; use std::fmt::Formatter; use std::rc::Rc; @@ -25,59 +23,27 @@ pub enum Object { String(String), List(Vec), Tuple(Vec), - Function(Rc), + Function(Function), // tec.... } -pub struct Function { - name: Option, - documentation: Option, - body: FunctionBody, +#[derive(Clone)] +pub enum Function { + Compiled(Rc), + Native(Rc Value>), } -impl Function { - pub(crate) fn new_compiled( - name: Option, - documentation: Option, - type_signature: TypeSignature, - body: Chunk, - return_type: StaticType, - ) -> Self { - Self { - name, - documentation, - body: FunctionBody::Compiled { - type_signature, - body, - return_type, - }, - } - } - - pub fn into_chunk(self) -> Chunk { - match self.body { - FunctionBody::Compiled { body, .. } => body, - FunctionBody::Memoized { .. } => panic!("cannot get chunk from memoized function"), - } - } +pub struct CompiledFunction { + pub name: Option, + pub(crate) type_signature: TypeSignature, + pub(crate) body: Chunk, + pub(crate) return_type: StaticType, } -pub enum FunctionBody { - Compiled { - type_signature: TypeSignature, - body: Chunk, - return_type: StaticType, - // environment: Rc>, - }, - // NativeFunction { - // type_signature: TypeSignature, - // return_type: StaticType, - // function: fn(&mut [Value], &Rc>) -> EvaluationResult, - // }, - Memoized { - cache: RefCell>, - function: Box, - }, +impl CompiledFunction { + pub fn opcodes(&self) -> &[OpCode] { + self.body.opcodes() + } } impl From for Value { @@ -88,7 +54,9 @@ impl From for Value { impl std::fmt::Debug for Function { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - // Whatever - write!(f, "function {:?}", self.name) + match self { + Self::Compiled(func) => write!(f, "function {:?}", func.name), + Self::Native(_) => write!(f, ""), + } } } diff --git a/ndc_vm/src/vm.rs b/ndc_vm/src/vm.rs index 7269be3f..71fd9b91 100644 --- a/ndc_vm/src/vm.rs +++ b/ndc_vm/src/vm.rs @@ -1,11 +1,12 @@ -use crate::Value; -use crate::chunk::{Chunk, OpCode}; +use crate::chunk::OpCode; +use crate::value::{CompiledFunction, Function}; +use crate::{Object, Value}; +use std::rc::Rc; pub struct Vm { - chunk: Chunk, - ip: usize, stack: Vec, locals: Vec, + frames: Vec, } #[derive(thiserror::Error, Debug)] @@ -14,13 +15,26 @@ pub enum VmError { RuntimeError, } +pub struct CallFrame { + function: Rc, + ip: usize, + // offset into the locals array for this call frame + frame_pointer: usize, +} + impl Vm { - pub fn new(chunk: Chunk) -> Self { + pub fn new(function: CompiledFunction) -> Self { + let function = Rc::new(function); Self { - chunk, - ip: 0, - stack: Vec::default(), + stack: vec![Value::Object(Box::new(Object::Function(Function::Compiled( + Rc::clone(&function), + ))))], locals: Vec::default(), + frames: vec![CallFrame { + function, + ip: 0, + frame_pointer: 0, + }], } } @@ -28,13 +42,15 @@ impl Vm { eprintln!("[VM] Value bytes: {}", size_of::()); eprintln!("[VM] OpCode bytes: {}", size_of::()); - if self.chunk.is_empty() { - return Ok(()); + if self.frames.is_empty() { + panic!("no call frames") } loop { - let op = self.chunk.opcode(self.ip); - self.ip += 1; + let frame = self.frames.last_mut().expect("must not be empty"); + + let op = frame.opcode(); + frame.ip += 1; eprintln!("[VM] Running: {:?}", op); @@ -48,34 +64,35 @@ impl Vm { println!("{:?}", self.stack.pop().expect("stack underflow")); } OpCode::Constant(idx) => { - self.stack.push(self.chunk.constant(*idx).clone()); + self.stack.push(frame.function.body.constant(idx).clone()); } OpCode::GetLocal(slot) => { - self.stack.push(self.locals[*slot].clone()); + self.stack.push(self.locals[frame.slot(slot)].clone()); } OpCode::SetLocal(slot) => { let value = self.stack.pop().expect("stack underflow"); - if *slot < self.locals.len() { - self.locals[*slot] = value; + if slot < self.locals.len() { + self.locals[frame.slot(slot)] = value; } else { - self.locals.resize(*slot, Value::None); + // TODO: should we really silently allow this? If the given slot mismatches the size of our local variable storage didn't the compiler mess up? + self.locals.resize(frame.slot(slot), Value::None); self.locals.push(value); } } OpCode::JumpIfFalse(offset) => { let top = self.stack.last().expect("stack underflow"); if let Value::Bool(false) = top { - self.ip = self.ip.wrapping_add(*offset); // This will probably go wrong because of the +1 later + frame.ip = frame.ip.wrapping_add(offset); } } OpCode::JumpIfTrue(offset) => { let top = self.stack.last().expect("stack underflow"); if let Value::Bool(true) = top { - self.ip = self.ip.wrapping_add(*offset); // This will probably go wrong because of the +1 later + frame.ip = frame.ip.wrapping_add(offset); } } OpCode::Jump(offset) => { - self.ip = self.ip.wrapping_add_signed(*offset); + frame.ip = frame.ip.wrapping_add_signed(offset); } OpCode::Pop => { self.stack.pop(); @@ -84,3 +101,15 @@ impl Vm { } } } + +impl CallFrame { + #[inline(always)] + fn opcode(&mut self) -> OpCode { + self.function.body.opcode(self.ip) + } + + #[inline(always)] + fn slot(&self, slot: usize) -> usize { + self.frame_pointer + slot + } +} diff --git a/tests/Cargo.toml b/tests/Cargo.toml index f381bf12..1441dec0 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -5,9 +5,16 @@ version.workspace = true [dev-dependencies] ndc_interpreter.workspace = true +ndc_lexer.workspace = true +ndc_parser.workspace = true +ndc_vm.workspace = true ndc_stdlib.workspace = true owo-colors.workspace = true [[test]] name = "ndc_tests" -path = "src/programs.rs" \ No newline at end of file +path = "src/programs.rs" + +[[test]] +name = "compiler" +path = "src/compiler.rs" \ No newline at end of file diff --git a/tests/src/compiler.rs b/tests/src/compiler.rs new file mode 100644 index 00000000..4c3ed867 --- /dev/null +++ b/tests/src/compiler.rs @@ -0,0 +1,120 @@ +use ndc_lexer::Lexer; +use ndc_parser::Parser; +use ndc_vm::chunk::OpCode; +use ndc_vm::chunk::OpCode::*; +use ndc_vm::compiler::Compiler; + +fn compile(input: &str) -> Vec { + let tokens = Lexer::new(input) + .collect::, _>>() + .expect("lex failed"); + let expressions = Parser::from_tokens(tokens) + .parse() + .expect("parse failed"); + Compiler::compile(expressions.into_iter()) + .expect("compile failed") + .opcodes() + .to_vec() +} + +// if true { 1 } +// +// 0: Constant(0) push `true` +// 1: JumpIfFalse(2) if false, skip true-branch and land on final Pop +// 2: Pop pop condition (true path) +// 3: Constant(1) push `1` +// 4: Pop pop condition (false path, jumped here) +// 5: Halt +#[test] +fn test_if_without_else() { + assert_eq!( + compile("if true { 1 }"), + [Constant(0), JumpIfFalse(2), Pop, Constant(1), Pop, Halt] + ); +} + +// if true { 1 } else { 2 } +// +// 0: Constant(0) push `true` +// 1: JumpIfFalse(3) if false, jump to else (index 5) +// 2: Pop pop condition (true path) +// 3: Constant(1) push `1` +// 4: Jump(2) skip else, jump to Halt (index 7) +// 5: Pop pop condition (false path) +// 6: Constant(2) push `2` +// 7: Halt +#[test] +fn test_if_with_else() { + assert_eq!( + compile("if true { 1 } else { 2 }"), + [ + Constant(0), + JumpIfFalse(3), + Pop, + Constant(1), + Jump(2), + Pop, + Constant(2), + Halt + ] + ); +} + +// true and false +// +// Short-circuits: if left is false, leave it on stack and jump past right. +// +// 0: Constant(0) push `true` +// 1: JumpIfFalse(2) if false, skip Pop+right and leave false on stack +// 2: Pop pop left (it was true, discard it) +// 3: Constant(1) push `false` (result) +// 4: Halt +#[test] +fn test_and() { + assert_eq!( + compile("true and false"), + [Constant(0), JumpIfFalse(2), Pop, Constant(1), Halt] + ); +} + +// true or false +// +// Short-circuits: if left is true, leave it on stack and jump past right. +// +// 0: Constant(0) push `true` +// 1: JumpIfTrue(2) if true, skip Pop+right and leave true on stack +// 2: Pop pop left (it was false, discard it) +// 3: Constant(1) push `false` (result) +// 4: Halt +#[test] +fn test_or() { + assert_eq!( + compile("true or false"), + [Constant(0), JumpIfTrue(2), Pop, Constant(1), Halt] + ); +} + +// while true { 1 } +// +// 0: Constant(0) push `true` ← loop_start +// 1: JumpIfFalse(3) if false, jump past body to exit Pop (index 5) +// 2: Pop pop condition (true path) +// 3: Constant(1) body: push `1` +// 4: Jump(-5) jump back to loop_start (index 0) +// 5: Pop pop condition (false path, loop exit) +// 6: Halt +#[test] +fn test_while() { + assert_eq!( + compile("while true { 1 }"), + [ + Constant(0), + JumpIfFalse(3), + Pop, + Constant(1), + Jump(-5), + Pop, + Halt + ] + ); +} From 6d7ac15d7486ed3b6318fd8c31bb9d2a83ea2fff Mon Sep 17 00:00:00 2001 From: Tim Fennis Date: Wed, 4 Mar 2026 15:33:47 +0100 Subject: [PATCH 010/185] Add disassembling functionality --- Cargo.lock | 1 + ndc_bin/Cargo.toml | 1 + ndc_bin/src/main.rs | 17 ++++ ndc_interpreter/src/lib.rs | 6 ++ ndc_interpreter/src/semantic/scope.rs | 20 ++--- ndc_vm/src/chunk.rs | 13 +++ ndc_vm/src/compiler.rs | 1 - ndc_vm/src/value.rs | 88 +++++++++++++++++++ .../046_comprehension_guard_outer_var.ndc | 18 ++++ 9 files changed, 154 insertions(+), 11 deletions(-) create mode 100644 tests/programs/006_lists/046_comprehension_guard_outer_var.ndc diff --git a/Cargo.lock b/Cargo.lock index a552aae3..654fb099 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1252,6 +1252,7 @@ dependencies = [ "ndc_lexer", "ndc_lsp", "ndc_stdlib", + "ndc_vm", "owo-colors", "rustyline", "strsim", diff --git a/ndc_bin/Cargo.toml b/ndc_bin/Cargo.toml index 2cd5c4f4..a2ed3ee1 100644 --- a/ndc_bin/Cargo.toml +++ b/ndc_bin/Cargo.toml @@ -16,6 +16,7 @@ strsim.workspace = true miette = { version = "7.6.0", features = ["fancy"] } ndc_lexer.workspace = true ndc_interpreter.workspace = true +ndc_vm.workspace = true ndc_stdlib.workspace = true ndc_lsp.workspace = true owo-colors.workspace = true diff --git a/ndc_bin/src/main.rs b/ndc_bin/src/main.rs index f1942a85..1735231b 100644 --- a/ndc_bin/src/main.rs +++ b/ndc_bin/src/main.rs @@ -48,6 +48,9 @@ enum Command { stdio: bool, }, + /// Print the disassembled bytecode for an .ndc file + Disassemble { file: PathBuf }, + /// Output the documentation optionally searched using a query string Docs { query: Option }, @@ -68,6 +71,7 @@ impl Default for Command { enum Action { RunLsp, RunFile { path: PathBuf, vm: bool }, + DisassembleFile(PathBuf), HighlightFile(PathBuf), StartRepl, Docs(Option), @@ -84,6 +88,7 @@ impl TryFrom for Action { } => Self::RunFile { path: file, vm }, Command::Run { file: None, .. } => Self::StartRepl, Command::Lsp { stdio: _ } => Self::RunLsp, + Command::Disassemble { file } => Self::DisassembleFile(file), Command::Highlight { file } => Self::HighlightFile(file), Command::Docs { query } => Self::Docs(query), Command::Unknown(args) => { @@ -147,6 +152,18 @@ fn main() -> anyhow::Result<()> { } } } + Action::DisassembleFile(path) => { + let string = fs::read_to_string(path)?; + let stdout = std::io::stdout(); + let mut interpreter = Interpreter::new(stdout).with_stdlib(); + match interpreter.compile_str(&string) { + Ok(compiled) => print!("{compiled}"), + Err(e) => { + eprintln!("{:?}", miette::Report::new(diagnostic::NdcReport::from(e))); + process::exit(1); + } + } + } Action::HighlightFile(path) => { let string = fs::read_to_string(path)?; diff --git a/ndc_interpreter/src/lib.rs b/ndc_interpreter/src/lib.rs index 9955be71..add6e6b1 100644 --- a/ndc_interpreter/src/lib.rs +++ b/ndc_interpreter/src/lib.rs @@ -17,6 +17,7 @@ use crate::value::Value; use ndc_lexer::{Lexer, TokenLocation}; use ndc_parser::ExpressionLocation; use ndc_vm::compiler::Compiler; +use ndc_vm::value::CompiledFunction; use ndc_vm::vm::Vm; use std::cell::RefCell; use std::rc::Rc; @@ -62,6 +63,11 @@ impl Interpreter { self.parse_and_analyse(input) } + pub fn compile_str(&mut self, input: &str) -> Result { + let expressions = self.parse_and_analyse(input)?; + Ok(Compiler::compile(expressions.into_iter())?) + } + pub fn run_str(&mut self, input: &str) -> Result { self.run_str_with_options(input, false) } diff --git a/ndc_interpreter/src/semantic/scope.rs b/ndc_interpreter/src/semantic/scope.rs index baf3486d..c30cf7b7 100644 --- a/ndc_interpreter/src/semantic/scope.rs +++ b/ndc_interpreter/src/semantic/scope.rs @@ -141,14 +141,14 @@ impl ScopeTree { ResolvedVar::Local { slot } => self.find_type_by_slot(self.current_scope_idx, slot), ResolvedVar::Upvalue { slot, depth } => { let mut scope_idx = self.current_scope_idx; - let mut depth = depth; - while depth > 0 { + let mut env_count = 0; + while env_count < depth { + if self.scopes[scope_idx].creates_environment { + env_count += 1; + } scope_idx = self.scopes[scope_idx] .parent_idx .expect("parent_idx was None while traversing the scope tree"); - if self.scopes[scope_idx].creates_environment { - depth -= 1; - } } self.find_type_by_slot(scope_idx, slot) } @@ -369,14 +369,14 @@ impl ScopeTree { } ResolvedVar::Upvalue { depth, .. } => { let mut scope_idx = self.current_scope_idx; - let mut depth = depth; - while depth > 0 { + let mut env_count = 0; + while env_count < depth { + if self.scopes[scope_idx].creates_environment { + env_count += 1; + } scope_idx = self.scopes[scope_idx] .parent_idx .expect("parent_idx was None while traversing the scope tree"); - if self.scopes[scope_idx].creates_environment { - depth -= 1; - } } self.find_scope_owning_slot(scope_idx, var.slot()) } diff --git a/ndc_vm/src/chunk.rs b/ndc_vm/src/chunk.rs index 950f8dfd..9c1a2d96 100644 --- a/ndc_vm/src/chunk.rs +++ b/ndc_vm/src/chunk.rs @@ -78,4 +78,17 @@ impl Chunk { pub fn constant(&self, idx: usize) -> &Value { &self.constants[idx] } + + /// Iterates opcodes as `(index, opcode, constant_value)` where `constant_value` + /// is `Some` only for `Constant(idx)` opcodes. + pub fn iter(&self) -> impl Iterator)> { + self.code.iter().copied().enumerate().map(|(i, op)| { + let val = if let OpCode::Constant(idx) = op { + Some(&self.constants[idx]) + } else { + None + }; + (i, op, val) + }) + } } diff --git a/ndc_vm/src/compiler.rs b/ndc_vm/src/compiler.rs index 2104a9eb..b23f304c 100644 --- a/ndc_vm/src/compiler.rs +++ b/ndc_vm/src/compiler.rs @@ -34,7 +34,6 @@ fn compile_expr( ExpressionLocation { expression, span }: ExpressionLocation, chunk: &mut Chunk, ) -> usize { - eprintln!("[COMPILING]: {expression:?}"); let start_len = chunk.len(); match expression { Expression::BoolLiteral(b) => { diff --git a/ndc_vm/src/value.rs b/ndc_vm/src/value.rs index feac96ef..4ac579e5 100644 --- a/ndc_vm/src/value.rs +++ b/ndc_vm/src/value.rs @@ -1,5 +1,6 @@ use crate::chunk::{Chunk, OpCode}; use ndc_parser::{StaticType, TypeSignature}; +use std::fmt; use std::fmt::Formatter; use std::rc::Rc; @@ -60,3 +61,90 @@ impl std::fmt::Debug for Function { } } } + +impl fmt::Display for Value { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Value::Int(n) => write!(f, "{n}"), + Value::Float(n) => write!(f, "{n}"), + Value::Bool(b) => write!(f, "{b}"), + Value::None => write!(f, "None"), + Value::Object(obj) => write!(f, "{obj}"), + } + } +} + +impl fmt::Display for Object { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Object::Some(v) => write!(f, "Some({v})"), + Object::BigInt(n) => write!(f, "{n}"), + Object::Complex(c) => write!(f, "{c}"), + Object::Rational(r) => write!(f, "{r}"), + Object::String(s) => write!(f, "\"{s}\""), + Object::List(vs) => { + write!(f, "[")?; + for (i, v) in vs.iter().enumerate() { + if i > 0 { write!(f, ", ")?; } + write!(f, "{v}")?; + } + write!(f, "]") + } + Object::Tuple(vs) => { + write!(f, "(")?; + for (i, v) in vs.iter().enumerate() { + if i > 0 { write!(f, ", ")?; } + write!(f, "{v}")?; + } + write!(f, ")") + } + Object::Function(func) => write!(f, "{func}"), + } + } +} + +impl fmt::Display for Function { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Function::Compiled(func) => { + let name = func.name.as_deref().unwrap_or("?"); + write!(f, "") + } + Function::Native(_) => write!(f, ""), + } + } +} + +impl fmt::Display for CompiledFunction { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let name = self.name.as_deref().unwrap_or("