diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..61c456f5 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,108 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Before commit +- Run `cargo fmt` +- Run `cargo check` and fix warnings and errors +- Ensure all tests pass (`cargo test`) +- Do not leave `TODO` comments in code — either fix the issue immediately or open a GitHub issue and record it in `TODO.md` + +## Common Commands + +```bash +# Build +cargo build + +# Run all tests +cargo test + +# Run a single test by name (substring match on the filename) +cargo test test_001_math_001_addition + +# Run benchmarks +cargo bench -p benches + +# Start REPL +cargo run --bin ndc + +# Run a .ndc script +cargo run --bin ndc -- script.ndc + +# Disassemble bytecode +cargo run --bin ndc -- disassemble script.ndc + +# Show documentation (optionally filtered by query) +cargo run --bin ndc -- docs [query] [--no-color] + +# Profile with perf (requires release-with-debug profile in Cargo.toml) +cargo build --profile release-with-debug +hyperfine --warmup 3 './target/release-with-debug/ndc script.ndc' +perf stat ./target/release-with-debug/ndc script.ndc +perf record -g --call-graph=dwarf -o /tmp/out.perf ./target/release-with-debug/ndc script.ndc +perf report -i /tmp/out.perf --stdio --no-children --percent-limit=1 +``` + +## Manual + +User-facing language documentation lives in `manual/src/`. It is an mdBook project. The entry point is `manual/src/SUMMARY.md`. + +When making changes that affect language behaviour or runtime semantics, update the relevant manual page. + +## Architecture + +This is a custom language interpreter ("Andy C++") with a bytecode VM backend: + +``` +Source → [Lexer] → Tokens → [Parser] → AST → [Analyser] → Annotated AST + ↓ + [Compiler] + ↓ + [Bytecode VM] → Value +``` + +### Git Workflow +- Prefer short commit messages, only use multiple lines in case of unrelated changes +- Pull request titles must start with an emoji + +### Crate Layout + +| Crate | Role | +|---|---| +| `ndc_lexer` | Tokenisation, `Span` (offset+length) | +| `ndc_parser` | AST (`Expression`, `ExpressionLocation`), parser | +| `ndc_core` | `Number` (BigInt/Rational/Complex), `StaticType`, `FunctionRegistry`, ordering, hashing | +| `ndc_interpreter` | Semantic analyser, `Interpreter` facade (compile + run via VM) | +| `ndc_vm` | Bytecode `Compiler` and stack-based `Vm` | +| `ndc_stdlib` | Built-in functions registered via `FunctionRegistry` | +| `ndc_lsp` | LSP backend (hover, inlay hints) | +| `ndc_bin` | CLI entry point, REPL, syntax highlighting | + +### Key Concepts + +**Single execution path** — The bytecode VM in `ndc_vm` is the only execution path. `ndc_interpreter` acts as a facade: it runs the semantic analyser, compiles to bytecode via `ndc_vm::Compiler`, and executes via `ndc_vm::Vm`. `vm_bridge.rs` handles value conversion between `ndc_interpreter::Value` and `ndc_vm::Value`. + +**Value types** — `ndc_interpreter/src/value.rs` and `ndc_vm/src/value.rs` are separate enums. The VM `Value` is constrained to 16 bytes (`Int(i64)`, `Float(f64)`, `Bool`, `None`, `Object(Box)`). + +**Function overloading** — Functions are matched by name and arity. The semantic analyser produces `Binding::Resolved` (exact compile-time match) or `Binding::Dynamic(Vec)` (runtime dispatch among candidates). Binary operators like `+` are parsed as `Expression::Call`. + +**Semantic analyser** — `ndc_interpreter/src/semantic/analyser.rs` infers `StaticType` and resolves function bindings. `StaticType::Any` is the fallback when inference fails. + +**`FunctionRegistry`** — Lives in `ndc_core`. Holds all registered built-in functions as `Rc`. Replaces the old `Environment`-based function registry. At runtime, natives are passed to the VM as global slots. + +**Persistent REPL** — The `Interpreter` keeps `repl_state: Option<(Vm, Compiler)>` so variables declared on one REPL line are visible on subsequent lines (resume-from-halt pattern). + +### Test Infrastructure + +The `tests` crate auto-generates one test function per `.ndc` file at build time via `tests/build.rs`. For every `.ndc` file under `tests/programs/`, a single Rust test function is generated: +- `test_` — runs via `Interpreter::run_str` (VM) + +Test directives are comments inside `.ndc` files: +```ndc +// expect-output: 42 ← assert stdout equals this +// expect-error: divide ← assert error message contains this substring +``` + +### Compiler Tests + +`compiler_tests/` validates the bytecode compiler by asserting exact `OpCode` sequences. Use these when adding new VM instructions. diff --git a/Cargo.lock b/Cargo.lock index f24b433e..b0d646b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -327,6 +327,16 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "compiler_tests" +version = "0.3.0" +dependencies = [ + "ndc_interpreter", + "ndc_lexer", + "ndc_parser", + "ndc_vm", +] + [[package]] name = "convert_case" version = "0.10.0" @@ -523,41 +533,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "darling" -version = "0.20.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" -dependencies = [ - "darling_core", - "darling_macro", -] - -[[package]] -name = "darling_core" -version = "0.20.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn", -] - -[[package]] -name = "darling_macro" -version = "0.20.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" -dependencies = [ - "darling_core", - "quote", - "syn", -] - [[package]] name = "dashmap" version = "5.5.3" @@ -571,37 +546,6 @@ dependencies = [ "parking_lot_core", ] -[[package]] -name = "derive_builder" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" -dependencies = [ - "derive_builder_macro", -] - -[[package]] -name = "derive_builder_core" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" -dependencies = [ - "darling", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "derive_builder_macro" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" -dependencies = [ - "derive_builder_core", - "syn", -] - [[package]] name = "derive_more" version = "2.1.1" @@ -716,12 +660,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - [[package]] name = "foldhash" version = "0.1.5" @@ -900,6 +838,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "home" version = "0.5.12" @@ -1002,12 +946,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" -[[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" - [[package]] name = "idna" version = "1.1.0" @@ -1041,6 +979,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "is-terminal" +version = "0.4.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.59.0", +] + [[package]] name = "is_ci" version = "1.2.0" @@ -1191,7 +1140,7 @@ dependencies = [ "cfg-if", "miette-derive", "owo-colors", - "supports-color", + "supports-color 3.0.2", "supports-hyperlinks", "supports-unicode", "terminal_size", @@ -1240,14 +1189,26 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "ndc_analyser" +version = "0.3.0" +dependencies = [ + "itertools 0.14.0", + "ndc_core", + "ndc_lexer", + "ndc_parser", + "thiserror", +] + [[package]] name = "ndc_bin" -version = "0.2.1" +version = "0.3.0" dependencies = [ "anyhow", "clap", "itertools 0.14.0", "miette", + "ndc_core", "ndc_interpreter", "ndc_lexer", "ndc_lsp", @@ -1255,17 +1216,17 @@ dependencies = [ "owo-colors", "rustyline", "strsim", - "tap", "termimad", "tokio", + "yansi", ] [[package]] name = "ndc_core" -version = "0.2.1" +version = "0.3.0" dependencies = [ "ahash", - "ndc_parser", + "itertools 0.14.0", "num", "ordered-float", "ryu", @@ -1274,23 +1235,19 @@ dependencies = [ [[package]] name = "ndc_interpreter" -version = "0.2.1" +version = "0.3.0" dependencies = [ - "anyhow", - "derive_builder", - "derive_more", - "itertools 0.14.0", + "ndc_analyser", "ndc_core", "ndc_lexer", "ndc_parser", - "num", - "self_cell", + "ndc_vm", "thiserror", ] [[package]] name = "ndc_lexer" -version = "0.2.1" +version = "0.3.0" dependencies = [ "num", "ryu", @@ -1299,12 +1256,12 @@ dependencies = [ [[package]] name = "ndc_lsp" -version = "0.2.1" +version = "0.3.0" dependencies = [ + "ndc_core", "ndc_interpreter", "ndc_lexer", "ndc_parser", - "ndc_stdlib", "tokio", "tower-lsp", ] @@ -1313,7 +1270,6 @@ dependencies = [ name = "ndc_macros" version = "0.1.0" dependencies = [ - "itertools 0.14.0", "proc-macro2", "quote", "syn", @@ -1321,10 +1277,10 @@ dependencies = [ [[package]] name = "ndc_parser" -version = "0.2.1" +version = "0.3.0" dependencies = [ "derive_more", - "itertools 0.14.0", + "ndc_core", "ndc_lexer", "num", "thiserror", @@ -1332,14 +1288,15 @@ dependencies = [ [[package]] name = "ndc_stdlib" -version = "0.2.1" +version = "0.3.0" dependencies = [ "anyhow", "factorial", "itertools 0.14.0", "md5", - "ndc_interpreter", + "ndc_core", "ndc_macros", + "ndc_vm", "num", "once_cell", "rand", @@ -1349,6 +1306,18 @@ dependencies = [ "tap", ] +[[package]] +name = "ndc_vm" +version = "0.3.0" +dependencies = [ + "ndc_core", + "ndc_lexer", + "ndc_parser", + "num", + "ordered-float", + "thiserror", +] + [[package]] name = "nibble_vec" version = "0.1.0" @@ -1478,6 +1447,10 @@ name = "owo-colors" version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d211803b9b6b570f68772237e415a029d5a50c65d382910b879fb19d3271f94d" +dependencies = [ + "supports-color 2.1.0", + "supports-color 3.0.2", +] [[package]] name = "page_size" @@ -1840,12 +1813,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "self_cell" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" - [[package]] name = "semver" version = "1.0.27" @@ -1994,6 +1961,16 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "supports-color" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89" +dependencies = [ + "is-terminal", + "is_ci", +] + [[package]] name = "supports-color" version = "3.0.2" @@ -2071,11 +2048,11 @@ dependencies = [ [[package]] name = "tests" -version = "0.2.1" +version = "0.3.0" dependencies = [ "ndc_interpreter", "ndc_stdlib", - "owo-colors", + "yansi", ] [[package]] @@ -2762,6 +2739,15 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +dependencies = [ + "is-terminal", +] + [[package]] name = "yoke" version = "0.8.1" diff --git a/Cargo.toml b/Cargo.toml index c0b4146c..4244322e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,15 @@ [workspace] resolver = "3" -members = ["ndc_macros", "ndc_bin", "ndc_core", "ndc_interpreter", "ndc_lsp", "ndc_lexer", "ndc_parser", "ndc_stdlib", "benches", "tests"] +members = ["ndc_macros", "ndc_bin", "ndc_core", "ndc_analyser", "ndc_interpreter", "ndc_vm", "ndc_lsp", "ndc_lexer", "ndc_parser", "ndc_stdlib", "benches", "tests", "compiler_tests"] + +[profile.release-with-debug] +inherits = "release" +debug = true [workspace.package] edition = "2024" publish = false -version = "0.2.1" +version = "0.3.0" license = "MIT" [workspace.dependencies] @@ -19,7 +23,9 @@ factorial = "0.4.0" itertools = "0.14.0" ndc_core = { path = "ndc_core" } ndc_lexer = { path = "ndc_lexer" } +ndc_analyser = { path = "ndc_analyser" } ndc_interpreter = { path = "ndc_interpreter" } +ndc_vm = { path = "ndc_vm" } ndc_parser = { path = "ndc_parser" } ndc_lsp = { path = "ndc_lsp" } ndc_macros = { path = "ndc_macros" } @@ -27,7 +33,8 @@ ndc_stdlib = { path = "ndc_stdlib" } num = "0.4.3" once_cell = "1.21.3" ordered-float = "5.1.0" -owo-colors = "4.3.0" +owo-colors = { version = "4.3.0", features = ["supports-colors"] } +yansi = { version = "1.0.1", features = ["detect-tty", "detect-env"] } rand = "0.10.0" rand_chacha = "0.10.0" regex = "1.12.3" diff --git a/TODO.md b/TODO.md new file mode 100644 index 00000000..186dbaa7 --- /dev/null +++ b/TODO.md @@ -0,0 +1,96 @@ +# TODO + +Open tasks collected from in-code comments. Resolve by implementing or opening a GitHub issue. + +--- + +## Medium: Type-checking warnings and errors + +These can be implemented incrementally once the basic type system is stable. + +- **Logical-operator operand types** (`ndc_analyser/src/analyser.rs` ~line 58) + `and` / `or` operands are not checked to be `Bool`. Should emit a warning or error when the + operand type is known and incompatible. + +- **Mismatched `if` branch types** (`ndc_analyser/src/analyser.rs` ~line 178) + When `true`-branch and `false`-branch types differ (and neither is `Any`), a warning could be + emitted. + +- **Missing semicolon warning in `if`** (`ndc_analyser/src/analyser.rs` ~line 174) + When the `true`-branch of an `if` produces a non-unit value but no `else` is present, a warning + for the missing semicolon would be helpful. + +- **`never` type for variable declarations** (`ndc_analyser/src/analyser.rs` ~line 66) + `let x = …` currently resolves to `unit`. Introducing a `never` / `!` type might be more + accurate, once the type lattice is fleshed out. + +--- + +## Medium: Semantic analysis correctness + +- **Conflicting binding on re-declaration** (`ndc_analyser/src/analyser.rs` ~line 141) + When a function name is declared a second time in the same scope, the analyser silently creates + a new binding instead of either updating the old one or raising an error. The right policy needs + to be decided and implemented. + +- **`debug_assert` → `unreachable!` in `find_function_candidates`** (`ndc_analyser/src/scope.rs` ~line 99) + A variadic function match should be impossible at this call-site. The `debug_assert!(false, …)` + should be replaced with `unreachable!` once we are confident the invariant holds. + +--- + +## Medium: Number / arithmetic semantics + +- **Bitwise NOT on non-integer numbers** (`ndc_core/src/num.rs` ~line 181) + Currently `!float` and `!rational` return `NaN` (matching Noulith behaviour). Decide whether this + is intentional for this language or whether it should be an error. + +- **`bigint → int` rounding in floor/ceil/round** (`ndc_core/src/num.rs` ~line 584) + After rounding a `Rational`, the result is converted to `BigInt` rather than trying to fit it + back into a machine `i64`. Should attempt the smaller representation first. + +- **Division performance** (`ndc_core/src/num.rs` ~line 323) + `Div` always promotes both operands to `Rational`. In the common `Int / Int` case this is + unnecessary. A fast path for integer operands would avoid the allocation. + +--- + +## Small: Lexer improvements + +- **Unicode escape sequences** (`ndc_lexer/src/string.rs` ~line 72) + String literals do not support `\uXXXX` escape sequences. Add support. + +- **`_` separator after decimal point** (`ndc_lexer/src/number.rs` ~line 130) + `1_000.0` is valid, but `1.0_0` is probably not intended. Consider rejecting `_` after `.`. + +- **Number suffix error interception** (`ndc_lexer/src/number.rs` ~line 48) + The suffix-error checks inside `lex_number` may be redundant since no numeric suffixes are + supported. Consider moving the check to after the lexer returns so it applies uniformly. + +- **`validator_for_radix` performance** (`ndc_lexer/src/number.rs` ~line 231) + The string-slice approach for validating digits by radix is O(radix) per character. A lookup + table or `char::to_digit` would be faster. + +- **`consume()` internal error handling** (`ndc_lexer/src/lib.rs` ~line 202) + `consume()` panics with `expect` on underflow. Document the invariant or add a proper internal + error type. + +--- + +## Small: Parser error messages + +- **Better error for boolean-returning `if` without semicolon** (`ndc_parser/src/parser.rs` ~line 738) + The pattern `if x == y { true } else { false }` triggers a generic parse error. A targeted + diagnostic would be more helpful. + +- **"Expected expression" error quality** (`ndc_parser/src/parser.rs` ~line 1001) + The fallback "Expected an expression but got '…'" message may not always accurately describe + the failure. Audit and improve. + +--- + +## Small: Test / debug + +- **Error rendering in block-scope test** (`tests/programs/004_basic/005_block_scope_destroys_local_variables.ndc` line 6) + The error is reported correctly but rendered weirdly in the test output. Investigate why and fix + the display. diff --git a/bench.sh b/bench.sh new file mode 100755 index 00000000..1869ad7d --- /dev/null +++ b/bench.sh @@ -0,0 +1,93 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -ne 1 ]]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +FILE="$1" + +if [[ ! -f "$FILE" ]]; then + echo "Error: file not found: $FILE" >&2 + exit 1 +fi + +if ! command -v ndc &>/dev/null; then + echo "Error: 'ndc' not found in PATH" >&2 + exit 1 +fi + +if ! command -v hyperfine &>/dev/null; then + echo "Error: 'hyperfine' not found in PATH" >&2 + exit 1 +fi + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LOCAL="$SCRIPT_DIR/target/release/ndc" + +echo "Building release binary..." +cargo build --release --manifest-path "$SCRIPT_DIR/Cargo.toml" -q + +echo "" +echo "Validating versions can run '$FILE' without crashing..." + +validate() { + local label="$1" + local cmd=("${@:2}") + if ! "${cmd[@]}" &>/dev/null; then + echo " FAIL: $label crashed or returned an error" >&2 + exit 1 + fi + echo " OK: $label" +} + +validate "ndc (PATH)" ndc run "$FILE" +validate "local release (VM)" "$LOCAL" run "$FILE" + +# ndc2 and ndc1 are optional: older versions may not support all language features +# ndc1 also has a different CLI (no 'run' subcommand) +USE_NDC2=false +if command -v ndc2 &>/dev/null; then + if ndc2 run "$FILE" &>/dev/null; then + echo " OK: ndc2 (v0.2.1)" + USE_NDC2=true + else + echo " SKIP: ndc2 (v0.2.1) failed to run '$FILE', skipping" + fi +else + echo " SKIP: ndc2 not found, skipping" +fi + +USE_NDC1=false +if command -v ndc1 &>/dev/null; then + if ndc1 "$FILE" &>/dev/null; then + echo " OK: ndc1 (v0.1.0)" + USE_NDC1=true + else + echo " SKIP: ndc1 (v0.1.0) failed to run '$FILE', skipping" + fi +else + echo " SKIP: ndc1 not found, skipping" +fi + +echo "" +echo "Running benchmark..." +echo "" + +NDC2_ARGS=() +if [[ "$USE_NDC2" == true ]]; then + NDC2_ARGS=(--command-name "ndc2 (v0.2.1)" "ndc2 run '$FILE'") +fi + +NDC1_ARGS=() +if [[ "$USE_NDC1" == true ]]; then + NDC1_ARGS=(--command-name "ndc1 (v0.1.0)" "ndc1 '$FILE'") +fi + +hyperfine \ + --warmup 3 \ + --command-name "ndc (installed)" "ndc run '$FILE'" \ + --command-name "local release (VM)" "'$LOCAL' run '$FILE'" \ + "${NDC2_ARGS[@]}" \ + "${NDC1_ARGS[@]}" diff --git a/benches/Cargo.toml b/benches/Cargo.toml index 2750ec3c..42a809b9 100644 --- a/benches/Cargo.toml +++ b/benches/Cargo.toml @@ -15,3 +15,8 @@ rand_chacha.workspace = true name = "benchmark" path = "src/benchmark.rs" harness = false + +[[bench]] +name = "output_sink" +path = "src/output_sink.rs" +harness = false diff --git a/benches/programs/ackermann.ndc b/benches/programs/ackermann.ndc new file mode 100644 index 00000000..5916c85c --- /dev/null +++ b/benches/programs/ackermann.ndc @@ -0,0 +1,7 @@ +fn ackermann(m, n) { + if m == 0 { n + 1 } + else if n == 0 { ackermann(m - 1, 1) } + else { ackermann(m - 1, ackermann(m, n - 1)) } +} + +print(ackermann(3, 7)); diff --git a/benches/programs/bigint.ndc b/benches/programs/bigint.ndc new file mode 100644 index 00000000..1fa12ab6 --- /dev/null +++ b/benches/programs/bigint.ndc @@ -0,0 +1,5 @@ +let result = 1; +for i in 2..=5000 { + result *= i; +} +print(result.string.len, "digits"); diff --git a/benches/programs/closures.ndc b/benches/programs/closures.ndc new file mode 100644 index 00000000..a5ddd233 --- /dev/null +++ b/benches/programs/closures.ndc @@ -0,0 +1,6 @@ +let total = 0; +for _ in 0..20 { + let adders = [fn(x) => x + i for i in 0..10_000]; + total = total + adders.map(fn(f) => f(0)).sum; +}; +print(total); diff --git a/benches/programs/fibonacci.ndc b/benches/programs/fibonacci.ndc index 06548f9c..a634ae9e 100644 --- a/benches/programs/fibonacci.ndc +++ b/benches/programs/fibonacci.ndc @@ -2,5 +2,4 @@ fn fib(n) { if n <= 1 { 1 } else { fib(n - 2) + fib(n - 1) } } -fib(10); -fib(20); \ No newline at end of file +fib(26); diff --git a/benches/programs/hof_pipeline.ndc b/benches/programs/hof_pipeline.ndc new file mode 100644 index 00000000..4fb25056 --- /dev/null +++ b/benches/programs/hof_pipeline.ndc @@ -0,0 +1,5 @@ +let result = list(1..=100_000) + .filter(fn(x) => x % 2 == 0) + .map(fn(x) => x * x) + .reduce(fn(acc, x) => acc + x); +print(result); diff --git a/benches/programs/map_ops.ndc b/benches/programs/map_ops.ndc new file mode 100644 index 00000000..f9b72c3d --- /dev/null +++ b/benches/programs/map_ops.ndc @@ -0,0 +1,11 @@ +let m = %{}; +for i in 1..=50_000 { + let key = (i * i) % 997; + if key in m { + m[key] += 1; + } else { + m[key] = 1; + } +} +let most_common = m.keys.max_by_key(fn(k) => m[k]); +print("Unique keys:", m.keys.len, "Most common:", most_common, "count:", m[most_common]); diff --git a/benches/programs/matrix_mul.ndc b/benches/programs/matrix_mul.ndc new file mode 100644 index 00000000..e0d3de0b --- /dev/null +++ b/benches/programs/matrix_mul.ndc @@ -0,0 +1,9 @@ +fn mat_mul(a, b, n) { + [[fold(0..n, 0, fn(acc, k) => acc + a[r][k] * b[k][c]) for c in 0..n] for r in 0..n] +} + +let n = 50; +let a = [[(r + c) % 7 for c in 0..n] for r in 0..n]; +let b = [[(r * c + 1) % 5 for c in 0..n] for r in 0..n]; +let result = mat_mul(a, b, n); +print(result[0][0]); diff --git a/benches/programs/perlin.ndc b/benches/programs/perlin.ndc index e8ec3e8d..d452b581 100644 --- a/benches/programs/perlin.ndc +++ b/benches/programs/perlin.ndc @@ -1,7 +1,7 @@ let vectors = [(x, y) for x in -1..=1, y in -1..=1, if x != 0 or y != 0]; -pure fn fade(t) => t * t * t * (t * (t * 6 - 15) + 10); +fn fade(t) => t * t * t * (t * (t * 6 - 15) + 10); fn lerp(a, b, t) => a + t * (b - a); diff --git a/benches/programs/pi_approx.ndc b/benches/programs/pi_approx.ndc index 5eec0263..d34955d2 100644 --- a/benches/programs/pi_approx.ndc +++ b/benches/programs/pi_approx.ndc @@ -8,7 +8,7 @@ fn monte_carlo_pi(num_samples) { } } - return (inside_circle / num_samples) * 4; + return (inside_circle.float / num_samples) * 4; } -print(monte_carlo_pi(50_000).float) +print(monte_carlo_pi(50_000)) diff --git a/benches/programs/print_heavy.ndc b/benches/programs/print_heavy.ndc new file mode 100644 index 00000000..e810bad6 --- /dev/null +++ b/benches/programs/print_heavy.ndc @@ -0,0 +1,3 @@ +for i in 0..10_000 { + print(i); +} diff --git a/benches/programs/quicksort.ndc b/benches/programs/quicksort.ndc index e917f664..425600bc 100644 --- a/benches/programs/quicksort.ndc +++ b/benches/programs/quicksort.ndc @@ -2,11 +2,12 @@ fn quicksort(arr) { if arr.len <= 1 { return arr }; let pivot = arr[0]; let left = arr.filter(fn(x) => x < pivot); + let mid = arr.filter(fn(x) => x == pivot); let right = arr.filter(fn(x) => x > pivot); - return quicksort(left) ++ [pivot] ++ quicksort(right); + return quicksort(left) ++ mid ++ quicksort(right); } -print(quicksort([ +let arr = [ -2756468492, 2102458917, -4135848830, @@ -107,4 +108,10 @@ print(quicksort([ -2983676698, -739198794, 3809134210 -])); +]; + +let result = []; +for _ in 0..200 { + result = quicksort(arr); +}; +print(result); diff --git a/benches/programs/sieve.ndc b/benches/programs/sieve.ndc index bbb78ade..a35a104b 100644 --- a/benches/programs/sieve.ndc +++ b/benches/programs/sieve.ndc @@ -17,5 +17,5 @@ fn sieve(n) { return primes; } -// Find lots of prime numbers -sieve(10000); +// Compute primality for all numbers up to n +sieve(200_000); diff --git a/benches/src/benchmark.rs b/benches/src/benchmark.rs index dcbd7d22..04052249 100644 --- a/benches/src/benchmark.rs +++ b/benches/src/benchmark.rs @@ -1,7 +1,6 @@ use criterion::{Criterion, criterion_group, criterion_main}; use ndc_interpreter::Interpreter; use ndc_interpreter::InterpreterError; -use ndc_stdlib::WithStdlib; use rand::{RngExt, SeedableRng}; use rand_chacha::ChaCha8Rng; use std::fs; @@ -9,9 +8,11 @@ use std::path::Path; use std::time::Duration; fn run_string(input: &str) -> Result { - let buf: Vec = vec![]; - let mut interpreter = Interpreter::new(buf).with_stdlib(); - interpreter.run_str(std::hint::black_box(input)) + let mut interpreter = Interpreter::capturing(); + interpreter.configure(ndc_stdlib::register); + interpreter + .eval(std::hint::black_box(input)) + .map(|v| v.to_string()) } #[allow(unused)] diff --git a/benches/src/output_sink.rs b/benches/src/output_sink.rs new file mode 100644 index 00000000..8db2f25d --- /dev/null +++ b/benches/src/output_sink.rs @@ -0,0 +1,138 @@ +/// Microbenchmark comparing three candidate designs for the VM output sink. +/// +/// - `enum`: static dispatch via a two-variant enum (current implementation) +/// - `dyn`: heap-allocated trait object (`Box`) +/// - `rc`: `Rc>>` shared-ownership buffer +/// +/// All three implementations are defined locally so the benchmark does not +/// depend on any particular version of the VM. The goal is to measure the +/// overhead of the *write path* in isolation from interpreter execution. +use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; +use std::cell::RefCell; +use std::hint::black_box; +use std::io::Write; +use std::rc::Rc; + +// --------------------------------------------------------------------------- +// Implementation 1 – enum (current) +// --------------------------------------------------------------------------- + +enum EnumSink { + Buffer(Vec), +} + +impl Write for EnumSink { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + match self { + EnumSink::Buffer(v) => v.write(buf), + } + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +// --------------------------------------------------------------------------- +// Implementation 2 – trait object +// --------------------------------------------------------------------------- + +trait OutputTrait: Write { + fn captured(&self) -> Option<&[u8]> { + None + } +} + +struct DynBuffer(Vec); + +impl Write for DynBuffer { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.0.write(buf) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +impl OutputTrait for DynBuffer { + fn captured(&self) -> Option<&[u8]> { + Some(&self.0) + } +} + +// --------------------------------------------------------------------------- +// Implementation 3 – Rc>> +// --------------------------------------------------------------------------- + +struct RcSink(Rc>>); + +impl Write for RcSink { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.0.borrow_mut().write(buf) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +// --------------------------------------------------------------------------- +// Benchmark helpers +// --------------------------------------------------------------------------- + +/// Write `n` copies of `payload` to `sink`. +fn write_n(sink: &mut impl Write, payload: &[u8], n: usize) { + for _ in 0..n { + sink.write_all(black_box(payload)).unwrap(); + } +} + +fn bench_sinks(c: &mut Criterion) { + // Payloads of different sizes to expose cache / branch effects. + let payloads: &[(&str, &[u8])] = &[ + ("4B", b"1234"), + ("16B", b"hello, world!!!!"), + ("64B", &[b'x'; 64]), + ("256B", &[b'x'; 256]), + ]; + const WRITES: usize = 10_000; + + let mut group = c.benchmark_group("output_sink"); + + for (label, payload) in payloads { + let bytes = (payload.len() * WRITES) as u64; + group.throughput(Throughput::Bytes(bytes)); + + group.bench_with_input(BenchmarkId::new("enum", label), payload, |b, payload| { + b.iter(|| { + let mut sink = EnumSink::Buffer(Vec::with_capacity(bytes as usize)); + write_n(&mut sink, payload, WRITES); + sink + }); + }); + + group.bench_with_input(BenchmarkId::new("dyn", label), payload, |b, payload| { + b.iter(|| { + let mut sink: Box = + Box::new(DynBuffer(Vec::with_capacity(bytes as usize))); + write_n(&mut sink, payload, WRITES); + sink + }); + }); + + group.bench_with_input(BenchmarkId::new("rc", label), payload, |b, payload| { + b.iter(|| { + let buf = Rc::new(RefCell::new(Vec::with_capacity(bytes as usize))); + let mut sink = RcSink(Rc::clone(&buf)); + write_n(&mut sink, payload, WRITES); + buf + }); + }); + } + + group.finish(); +} + +criterion_group!(benches, bench_sinks); +criterion_main!(benches); diff --git a/compiler_tests/Cargo.toml b/compiler_tests/Cargo.toml new file mode 100644 index 00000000..372fee04 --- /dev/null +++ b/compiler_tests/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "compiler_tests" +edition.workspace = true +version.workspace = true + +[dev-dependencies] +ndc_interpreter.workspace = true +ndc_lexer.workspace = true +ndc_parser.workspace = true +ndc_vm.workspace = true diff --git a/compiler_tests/tests/compiler.rs b/compiler_tests/tests/compiler.rs new file mode 100644 index 00000000..baf2bda7 --- /dev/null +++ b/compiler_tests/tests/compiler.rs @@ -0,0 +1,340 @@ +use ndc_lexer::Lexer; +use ndc_parser::Parser; +use ndc_vm::chunk::OpCode; +use ndc_vm::chunk::OpCode::*; +use ndc_vm::compiler::Compiler; + +fn compile(input: &str) -> Vec { + let tokens = Lexer::new(input) + .collect::, _>>() + .expect("lex failed"); + let expressions = Parser::from_tokens(tokens).parse().expect("parse failed"); + Compiler::compile(expressions.into_iter()) + .expect("compile failed") + .opcodes() + .to_vec() +} + +fn compile_with_analysis(input: &str) -> Vec { + let mut interp = ndc_interpreter::Interpreter::capturing(); + interp + .compile_str(input) + .expect("compile failed") + .opcodes() + .to_vec() +} + +// if true { 1 } +// +// 0: Constant(0) push `true` +// 1: JumpIfFalse(3) if false, jump to else path (index 5) +// 2: Pop pop condition (true path) +// 3: Constant(1) push `1` +// 4: Jump(2) skip else path (jump to Halt at index 7) +// 5: Pop pop condition (false path, jumped here) +// 6: Constant(2) push `None` (unit, no else branch) +// 7: Halt +#[test] +fn test_if_without_else() { + assert_eq!( + compile("if true { 1 }"), + [ + Constant(0), + JumpIfFalse(3), + Pop, + Constant(1), + Jump(2), + Pop, + Constant(2), + Halt + ] + ); +} + +// if true { 1 } else { 2 } +// +// 0: Constant(0) push `true` +// 1: JumpIfFalse(3) if false, jump to else (index 5) +// 2: Pop pop condition (true path) +// 3: Constant(1) push `1` +// 4: Jump(2) skip else, jump to Halt (index 7) +// 5: Pop pop condition (false path) +// 6: Constant(2) push `2` +// 7: Halt +#[test] +fn test_if_with_else() { + assert_eq!( + compile("if true { 1 } else { 2 }"), + [ + Constant(0), + JumpIfFalse(3), + Pop, + Constant(1), + Jump(2), + Pop, + Constant(2), + Halt + ] + ); +} + +// true and false +// +// Short-circuits: if left is false, leave it on stack and jump past right. +// +// 0: Constant(0) push `true` +// 1: JumpIfFalse(2) if false, skip Pop+right and leave false on stack +// 2: Pop pop left (it was true, discard it) +// 3: Constant(1) push `false` (result) +// 4: Halt +#[test] +fn test_and() { + assert_eq!( + compile("true and false"), + [Constant(0), JumpIfFalse(2), Pop, Constant(1), Halt] + ); +} + +// true or false +// +// Short-circuits: if left is true, leave it on stack and jump past right. +// +// 0: Constant(0) push `true` +// 1: JumpIfTrue(2) if true, skip Pop+right and leave true on stack +// 2: Pop pop left (it was false, discard it) +// 3: Constant(1) push `false` (result) +// 4: Halt +#[test] +fn test_or() { + assert_eq!( + compile("true or false"), + [Constant(0), JumpIfTrue(2), Pop, Constant(1), Halt] + ); +} + +// 5; +// +// 0: Constant(0) push `5` +// 1: Pop discard value (it's a statement) +// 2: Halt +#[test] +fn test_statement() { + assert_eq!(compile("5;"), [Constant(0), Pop, Halt]); +} + +// { 5 } +// +// 0: Constant(0) push `5` (block result) +// 1: Halt +#[test] +fn test_block_with_expression() { + assert_eq!(compile("{ 5 }"), [Constant(0), Halt]); +} + +// { 5; } +// +// 0: Constant(0) push `5` +// 1: Pop discard (trailing semicolon) +// 2: Constant(1) push `()` (block result is unit) +// 3: Halt +#[test] +fn test_block_with_trailing_statement() { + assert_eq!(compile("{ 5; }"), [Constant(0), Pop, Constant(1), Halt]); +} + +// { 5; 6 } +// +// 0: Constant(0) push `5` +// 1: Pop discard intermediate statement +// 2: Constant(1) push `6` (block result) +// 3: Halt +#[test] +fn test_block_multiple_statements() { + assert_eq!(compile("{ 5; 6 }"), [Constant(0), Pop, Constant(1), Halt]); +} + +// if true { 3 } else { 3; } +// +// true branch returns 3, false branch returns () +// +// 0: Constant(0) push `true` +// 1: JumpIfFalse(3) jump to false branch (index 5) +// 2: Pop pop condition (true path) +// 3: Constant(1) push `3` +// 4: Jump(4) jump to Halt (index 9) +// 5: Pop pop condition (false path) +// 6: Constant(2) push `3` (inner of `3;`) +// 7: Pop discard (trailing semicolon) +// 8: Constant(3) push `()` (block result) +// 9: Halt +#[test] +fn test_if_with_statement_else() { + assert_eq!( + compile("if true { 3 } else { 3; }"), + [ + Constant(0), + JumpIfFalse(3), + Pop, + Constant(1), + Jump(4), + Pop, + Constant(2), + Pop, + Constant(3), + Halt + ] + ); +} + +// if true { 3; } else { 3; } +// +// Both branches return () — result is unit regardless of condition +// +// 0: Constant(0) push `true` +// 1: JumpIfFalse(5) jump to false branch (index 7) +// 2: Pop pop condition (true path) +// 3: Constant(1) push `3` +// 4: Pop discard +// 5: Constant(2) push `()` +// 6: Jump(4) jump to Halt (index 11) +// 7: Pop pop condition (false path) +// 8: Constant(3) push `3` +// 9: Pop discard +// 10: Constant(4) push `()` +// 11: Halt +#[test] +fn test_if_with_statement_branches() { + assert_eq!( + compile("if true { 3; } else { 3; }"), + [ + Constant(0), + JumpIfFalse(5), + Pop, + Constant(1), + Pop, + Constant(2), + Jump(4), + Pop, + Constant(3), + Pop, + Constant(4), + Halt + ] + ); +} + +// while true { 1 } +// +// 0: Constant(0) push `true` ← loop_start +// 1: JumpIfFalse(4) if false, jump past body to exit Pop (index 6) +// 2: Pop pop condition (true path) +// 3: Constant(1) body: push `1` +// 4: Pop discard body value (loops produce no value) +// 5: Jump(-6) jump back to loop_start (index 0) +// 6: Pop pop condition (false path, loop exit) +// 7: Halt +#[test] +fn test_while() { + assert_eq!( + compile("while true { 1 }"), + [ + Constant(0), + JumpIfFalse(4), + Pop, + Constant(1), + Pop, + Jump(-6), + Pop, + Halt + ] + ); +} + +// let a = 1; +// +// Value is compiled, then SetLocal stores it in pre-allocated slot 0. +// +// 0: Constant(0) push `1` +// 1: SetLocal(0) store in slot 0 +// 2: Halt +#[test] +fn test_declaration() { + assert_eq!( + compile_with_analysis("let a = 1;"), + [Constant(0), SetLocal(0), Halt] + ); +} + +// let a = 1; +// a = 5; +// +// Declaration stores 1 into pre-allocated slot 0. +// Assignment pushes new value, SetLocal overwrites, +// push unit as the expression result, Pop discards it. +// +// 0: Constant(0) push `1` +// 1: SetLocal(0) store in slot 0 (declaration) +// 2: Constant(1) push `5` +// 3: SetLocal(0) overwrite slot 0 (assignment) +// 4: Constant(2) push `()` (assignment result) +// 5: Pop discard (statement) +// 6: Halt +#[test] +fn test_assignment() { + assert_eq!( + compile_with_analysis("let a = 1;\na = 5;"), + [ + Constant(0), + SetLocal(0), + Constant(1), + SetLocal(0), + Constant(2), + Pop, + Halt + ] + ); +} + +// { let a = 3; a } +// +// Declaration stores 3 into pre-allocated slot 0. +// Block result is `a`, read via GetLocal. +// No cleanup needed — locals are pre-allocated. +// +// 0: Constant(0) push `3` +// 1: SetLocal(0) store in slot 0 (declaration) +// 2: GetLocal(0) push `a` (block result) +// 3: Halt +#[test] +fn test_block_scope_cleanup() { + assert_eq!( + compile_with_analysis("{ let a = 3; a }"), + [Constant(0), SetLocal(0), GetLocal(0), Halt] + ); +} + +// { let a = 1; let b = 2; a } +// +// Both locals stored via SetLocal into pre-allocated slots. +// Block result is `a`, read via GetLocal. No cleanup needed. +// +// 0: Constant(0) push `1` +// 1: SetLocal(0) store in slot 0 +// 2: Constant(1) push `2` +// 3: SetLocal(1) store in slot 1 +// 4: GetLocal(0) push `a` (block result) +// 5: Halt +#[test] +fn test_block_scope_cleanup_multiple_locals() { + assert_eq!( + compile_with_analysis("{ let a = 1; let b = 2; a }"), + [ + Constant(0), + SetLocal(0), + Constant(1), + SetLocal(1), + GetLocal(0), + Halt + ] + ); +} diff --git a/manual/src/SUMMARY.md b/manual/src/SUMMARY.md index d670072c..23f68f52 100644 --- a/manual/src/SUMMARY.md +++ b/manual/src/SUMMARY.md @@ -33,4 +33,4 @@ - [Memoization](./features/memoization.md) # Troubleshooting -- [Stackoverflow](./troubleshooting/stackoverflow.md) +- [Overload dispatch with collections](./troubleshooting/overload-dispatch-collections.md) diff --git a/manual/src/features/augmented-assignment.md b/manual/src/features/augmented-assignment.md index 11ca3690..2fd26ab0 100644 --- a/manual/src/features/augmented-assignment.md +++ b/manual/src/features/augmented-assignment.md @@ -21,7 +21,7 @@ list without creating an intermediary list first. Note: I stole this feature from [Noulith](https://github.com/betaveros/noulith). -Augmented assignment is not limited to built in operators, you can also use built in function or user created functions to agument assignment. Consider the following example: +Augmented assignment is not limited to built in operators, you can also use built in function or user created functions to augment assignment. Consider the following example: ```ndc let x = 3; diff --git a/manual/src/features/memoization.md b/manual/src/features/memoization.md index af5003bf..bddc07a2 100644 --- a/manual/src/features/memoization.md +++ b/manual/src/features/memoization.md @@ -19,7 +19,28 @@ let multiply = pure fn (x, y) { x * y }; > **Note:** the interpreter does not perform any checks to see if the function is actually pure. It's your > responsibility to ensure that functions don't have side-effects. -### Example: Fibornacci Sequence +### Performance: keep memoization keys small + +The cache key is computed by hashing **all arguments**. For container types like maps and lists this +is an O(n) operation proportional to the number of elements. Passing large containers as arguments +to a `pure fn` therefore adds hashing overhead on every call — even on cache hits. + +If a container is large and doesn't change between recursive calls (e.g. a lookup table or graph), +capture it as an **upvalue** instead of passing it as an argument. The upvalue is not part of the +cache key, so the memoization cost is proportional only to the arguments that actually vary. + +```ndc +// Slow: `graph` is hashed on every call +pure fn count(graph, node, visited) { ... } + +// Fast: `graph` captured as upvalue, only (node, visited) are hashed +let graph = build_graph(); +pure fn count(node, visited) { + // use graph here +} +``` + +### Example: Fibonacci Sequence ```ndc pure fn fib (n) { diff --git a/manual/src/getting-started/hello-world.md b/manual/src/getting-started/hello-world.md index 231b6d42..857409c7 100644 --- a/manual/src/getting-started/hello-world.md +++ b/manual/src/getting-started/hello-world.md @@ -8,11 +8,9 @@ First create a file called `hello.ndc` add the code in Listing 1-1. print("Hello, world!"); ``` -In Andy C++ we don't need main function and semicolons are mandatory just like they are in Rust. If you already know Rust, Andy C++ will feel very familiar. +In Andy C++ we don't need a main function. Semicolons are mandatory, just like they are in Rust. If you already know Rust, Andy C++ will feel very familiar. ```bash $ ndc hello.ndc Hello, world! ``` - -Now if you're already familiar with Rust and Python, or just C-style languages in general you can checkout the [overview](./overview.md) to get an idea of what the language is like. diff --git a/manual/src/reference/for-loop.md b/manual/src/reference/for-loop.md index 70d51501..b7ebd7f4 100644 --- a/manual/src/reference/for-loop.md +++ b/manual/src/reference/for-loop.md @@ -1,6 +1,6 @@ # For loop -For loops is where Andy C++ get's a little spicier. First lets look at some basic examples: +For loops are where Andy C++ gets a little spicier. First let's look at some basic examples: ```ndc for n in 1..=100 { diff --git a/manual/src/reference/functions.md b/manual/src/reference/functions.md index de06aa74..817078fa 100644 --- a/manual/src/reference/functions.md +++ b/manual/src/reference/functions.md @@ -42,9 +42,9 @@ assert_eq(10, my_function(10)); assert_eq(15, my_function(5)); assert_eq([10, 5], x); ``` -## Function overload +## Function overloading -Functions can be overloaded, allowing multiple definitions with different parameter counts ~or types~. +Functions can be overloaded by declaring multiple `fn` definitions with the same name but different parameter counts. ```ndc fn add(n) { n + 1 }; @@ -54,10 +54,53 @@ assert_eq(10, add(9)); assert_eq(12, add(8, 4)); ``` +Declaring two `fn` definitions with the same name **and** the same number of parameters in the same scope is an error: + +```ndc +fn foo(a) { a + 1 } +fn foo(a) { a + 2 } // ERROR: redefinition of 'foo' with 1 parameter +``` + > **Note:** The engine also supports overloading functions based on their argument types, and the standard > library takes advantage of this feature in some cases. However, the current syntax of the language does > not allow users to specify argument types, so this capability is not yet available for custom functions. +## Function shadowing + +A `fn` declaration in a nested scope shadows only the overload with the same parameter count from outer scopes. Other overloads remain accessible: + +```ndc +fn foo(a) { "outer-one" } +fn foo(a, b) { "outer-two" } + +{ + fn foo(a) { "inner-one" } + foo("x"); // "inner-one" — inner 1-arg shadows outer 1-arg + foo("x", "y"); // "outer-two" — outer 2-arg still reachable +} + +foo("x"); // "outer-one" — shadow is gone after the block +``` + +A `let` binding with a function value completely replaces all previous bindings with the same name — it does not participate in function overloading: + +```ndc +fn foo(a) { "one" } +fn foo(a, b) { "two" } + +let foo = fn(a) => "let"; +foo("x"); // "let" — both fn overloads are shadowed +``` + +A non-function `let` binding shadows the name for value access, but function calls still resolve to the underlying function: + +```ndc +let len = 300; +len; // 300 — the value +len("test"); // 4 — calls the stdlib function, skipping the non-function binding +"test".len; // 4 — method call also resolves to the function +``` + ## Anonymous functions Anonymous functions allow you to define inline functionality and use them as values. This feature is key to unlocking diff --git a/manual/src/reference/types/boolean.md b/manual/src/reference/types/boolean.md index 514654ac..9ed1f9f2 100644 --- a/manual/src/reference/types/boolean.md +++ b/manual/src/reference/types/boolean.md @@ -15,7 +15,7 @@ Operators defined for booleans: | `!` | not | | `or` | lazy logical or | | `and` | lazy logical and | -| `not` | logical not like `!` but lower presedence | +| `not` | logical not like `!` but lower precedence | ## Lazy evaluation of `and` and `or` diff --git a/manual/src/reference/types/function.md b/manual/src/reference/types/function.md index 002186a5..d73715c3 100644 --- a/manual/src/reference/types/function.md +++ b/manual/src/reference/types/function.md @@ -24,7 +24,7 @@ let my_function = fn(a, b) => a + b; // Also works for named functions fn foo() => "whatever"; -// Note that comma's have a very low presedence in this example x is a tuple (function, 3) +// Note that commas have a very low precedence in this example x is a tuple (function, 3) let x = fn(y) => y, 3; // If you want to return a tuple from a function written in this way you must use parentheses diff --git a/manual/src/reference/types/map-and-set.md b/manual/src/reference/types/map-and-set.md index 542d172b..068a6585 100644 --- a/manual/src/reference/types/map-and-set.md +++ b/manual/src/reference/types/map-and-set.md @@ -39,7 +39,7 @@ defaultdict[33] += 7; // adds 7 to 0 and associates it to key 33 ## Default functions You may also specify the default value as a function, in this case the function is evaluated every time a new value is produced. -This is usefull when you want the default value of the dictionary to be a new list instead of a pointer to the same list. +This is useful when you want the default value of the dictionary to be a new list instead of a pointer to the same list. ```ndc let dd = %{:fn() => []}; @@ -58,9 +58,24 @@ let dd = %{:fn() => fn(x) => x * x}; print(dd["test"](5)); // 25 ``` +## Iteration + +You can iterate over a map with a `for` loop. Each element is a `(key, value)` tuple: + +```ndc +let m = %{"a": 1, "b": 2}; +for (k, v) in m { + print(k, v); +} +``` + +**Iteration order is unspecified** — maps are hash-based, so keys may appear in any order. + +**Keys are snapshotted at the start of the loop.** Mutations to the map during iteration (adding or removing keys) are not reflected in the current loop — the set of keys visited is fixed when the `for` loop begins. Values read during iteration do reflect any changes made to existing keys. + ## Operators -| Operator | Function | Support agumented assignment [[1]](../../features/augmented-assignment.md) | Augmentable with `not` | +| Operator | Function | Support augmented assignment [[1]](../../features/augmented-assignment.md) | Augmentable with `not` | | :-: | --- | --- | --- | | `\|` | Union | `true` | `false` | | `&` | Intersection | `true` | `false` | diff --git a/manual/src/reference/types/min-max-heap.md b/manual/src/reference/types/min-max-heap.md index efe794ad..6b8ff46a 100644 --- a/manual/src/reference/types/min-max-heap.md +++ b/manual/src/reference/types/min-max-heap.md @@ -1,5 +1,5 @@ # MinHeap & MaxHeap -A datastructure backed by rust's [BinaryHeap](https://doc.rust-lang.org/std/collections/struct.BinaryHeap.html) that keeps elements in sorted order. This is very usefull when implementing algorithms like Dijkstra and A*. +A data structure backed by Rust's [BinaryHeap](https://doc.rust-lang.org/std/collections/struct.BinaryHeap.html) that keeps elements in sorted order. This is very useful when implementing algorithms like Dijkstra and A*. > **Note:** comparisons between types like `Int` and `String` are undefined and the Heap will treat them as equal. If you like well-defined behavior DO NOT MIX THEM. \ No newline at end of file diff --git a/manual/src/reference/types/number.md b/manual/src/reference/types/number.md index 282342dd..8ef556ac 100644 --- a/manual/src/reference/types/number.md +++ b/manual/src/reference/types/number.md @@ -9,13 +9,14 @@ Andy C++ has four number types that you should keep in mind when programming. ## Operators -| Operator | Function | Support agumented assignment [[1]](../../features/augmented-assignment.md) | Augmentable with `not` | +| Operator | Function | Support augmented assignment [[1]](../../features/augmented-assignment.md) | Augmentable with `not` | | :-: | --- | --- | --- | | `+` | Addition | `true` | `false` | | `-` | Subtraction | `true` | `false` | | unary `-` | Negation | `true` | `false` | | `*` | Multiplication | `true` | `false` | -| `/` | Division | `true` | `false` | +| `/` | Division (returns rational for integers) | `true` | `false` | +| `\` | Floor division (integer result, rounds toward negative infinity) | `true` | `false` | | `^` | Exponentiation | `true` | `false` | | `%` | C-style modulo (can be negative) | `true` | `false` | | `%%` | Remainder of [euclidean division](https://en.wikipedia.org/wiki/Euclidean_division) | `true` | `false` | @@ -31,7 +32,7 @@ Andy C++ has four number types that you should keep in mind when programming. Additionally for **integers** the following operations are available: -| Operator | Function | Support agumented assignment [[1]](../../features/augmented-assignment.md) | Augmentable with `not` | +| Operator | Function | Support augmented assignment [[1]](../../features/augmented-assignment.md) | Augmentable with `not` | | :-: | --- | --- | --- | | `\|` | Bitwise OR | `true` | `false` | | `&` | Bitwise AND | `true` | `false` | diff --git a/manual/src/reference/types/option.md b/manual/src/reference/types/option.md index da0f2422..f106661f 100644 --- a/manual/src/reference/types/option.md +++ b/manual/src/reference/types/option.md @@ -21,7 +21,7 @@ let fst = empty.first(); // ERROR: list is empty let fst = empty.first?(); // None let my_list = [1,2,3]; -let fst = empty.first?; // Some(1) +let fst = my_list.first?; // Some(1) ``` > **Note:** unfortunately the language doesn't support pattern matching on options diff --git a/manual/src/reference/types/string.md b/manual/src/reference/types/string.md index 890a1a57..91d191dd 100644 --- a/manual/src/reference/types/string.md +++ b/manual/src/reference/types/string.md @@ -4,7 +4,7 @@ In Andy C++ a String is a mutable list of characters. Characters don't have thei iterate over a string you get strings of length 1. Just like in Rust strings are guaranteed (and required) to be valid UTF-8. This means that you can't store arbitrary binary data in a String. -Indexing into a String is done using the characters in the string and not using byte offsets (this was probably a mistake). +Indexing into a String is done by UTF-8 codepoint (equivalent to Rust's `char`) rather than by byte offset. This means that indexing into a string is `O(n)` instead of `O(1)`. ```ndc diff --git a/manual/src/reference/types/tuple.md b/manual/src/reference/types/tuple.md index 4d469af5..4e2d5f0d 100644 --- a/manual/src/reference/types/tuple.md +++ b/manual/src/reference/types/tuple.md @@ -20,7 +20,7 @@ for item in my_tuple { You can create a 1-length tuple by adding a usually optional trailing comma inside the expression: ```ndc -assert-eq((1,).len(), 1); +assert_eq((1,).len(), 1); ``` ## Copy-on-write diff --git a/manual/src/reference/variables-and-scopes.md b/manual/src/reference/variables-and-scopes.md index cc94f5a2..5b65e535 100644 --- a/manual/src/reference/variables-and-scopes.md +++ b/manual/src/reference/variables-and-scopes.md @@ -42,7 +42,7 @@ print(x); // 3 ## Destructuring -Destructuring is more similar to how it works in python and cares mostly about where comma's are and not so much about the delimiters (`[]`, `()`) used. +Destructuring is more similar to how it works in python and cares mostly about where commas are and not so much about the delimiters (`[]`, `()`) used. The statements below are all equivalent: diff --git a/manual/src/troubleshooting/overload-dispatch-collections.md b/manual/src/troubleshooting/overload-dispatch-collections.md new file mode 100644 index 00000000..a06296d4 --- /dev/null +++ b/manual/src/troubleshooting/overload-dispatch-collections.md @@ -0,0 +1,61 @@ +# Overload dispatch with collections + +## Background + +When Andy C++ can determine at compile time which function overload to call, it does so — the +call is free of any type-checking overhead at runtime. When it cannot (because an argument was +inferred as `Any`), the VM performs **dynamic dispatch**: it tests each candidate overload at +runtime to find the best match. + +## O(1) dispatch guarantee + +For dynamic dispatch the VM checks whether a value *conforms to* the parameter type without +iterating the container contents. Specifically: + +| Parameter type | Check performed | +|---|---| +| `List` | Is the value a list? | +| `Map` | Is the value a map? | +| `Deque` | Is the value a deque? | +| `Sequence` | Is the value any sequence type? | +| `String`, `Int`, … | Exact kind check | + +This means that dispatch is **O(1)** regardless of how many elements are in the collection. + +## Limitation: element types are not checked at runtime + +Because the element-type check is skipped, the VM **cannot distinguish** overloads that differ +only in their container element types via dynamic dispatch. For example, two hypothetical overloads: + +``` +fn process(List) +fn process(List) +``` + +would both fail to match under dynamic dispatch if the list type cannot be resolved at compile +time, because verifying element types would require scanning the entire container. + +In practice this limitation is not currently visible: + +- User-defined functions cannot yet declare typed container parameters (the syntax is not + implemented), so user overloads always use `Any` and dispatch works correctly. +- All standard library overloads on container parameters use `` element types + (e.g. `List`, `Sequence`, `Map`), so they also hit the fast path. + Overloads that differ by container *kind* (e.g. `pop(List)` vs `pop(MinHeap)`) + are distinguished by the container kind check alone. + +## Workaround + +If you notice that a function call unexpectedly fails to match an overload, move the call to a +location where Andy C++ can infer the argument types statically — for example, directly at the +call site rather than through an intermediate untyped function parameter: + +```ndc +// The type of `data` is Any here — dynamic dispatch used +fn handle(data) { + process(data) +} + +// Preferred: call process() directly where the type is known +process(my_list) +``` diff --git a/manual/src/troubleshooting/stackoverflow.md b/manual/src/troubleshooting/stackoverflow.md deleted file mode 100644 index 0495b88b..00000000 --- a/manual/src/troubleshooting/stackoverflow.md +++ /dev/null @@ -1,20 +0,0 @@ -# Stackoverflow - -Andy C++ is a straightforward tree-walk interpreter that, at present, does not support tail-call optimization. -As a result, recursive function calls can consume significant stack space, potentially leading to a stack -overflow for deeply recursive functions. - -While future versions of Andy C++ may address this limitation, you can work around it by increasing the -available stack space for the interpreter. On Linux, you can do this by running the following command: - -```bash -ulimit -s 65536 -``` - -This should allow most programs with reasonable recursion depths to execute successfully. - -|Stack size|Estimated recursion depth| -|---|---| -|8192 KiB|1108| -|65536 KiB|8883| -|262144 KiB|35542| diff --git a/ndc_analyser/Cargo.toml b/ndc_analyser/Cargo.toml new file mode 100644 index 00000000..48c56f64 --- /dev/null +++ b/ndc_analyser/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "ndc_analyser" +edition.workspace = true +version.workspace = true + +[dependencies] +itertools.workspace = true +ndc_core.workspace = true +ndc_lexer.workspace = true +ndc_parser.workspace = true +thiserror.workspace = true diff --git a/ndc_analyser/RETURN_TYPE_INFERENCE.md b/ndc_analyser/RETURN_TYPE_INFERENCE.md new file mode 100644 index 00000000..8969b813 --- /dev/null +++ b/ndc_analyser/RETURN_TYPE_INFERENCE.md @@ -0,0 +1,68 @@ +# Return type inference problem and solution + +## The problem + +The analyser infers function return types from the body expression's type. The body is usually a `Block`, and a block's type is the type of its last expression. + +The issue: when a function uses an explicit `return` statement with a semicolon (which is common), the parser wraps it as `Statement(Return { value })`. The `Statement` handler discards the inner type and returns `unit`: + +```rust +Expression::Statement(inner) => { + self.analyse(inner)?; + Ok(StaticType::unit()) // ← return type lost +} +``` + +This means a function like: + +```ndc +fn make_fn() { + fn inner() { 42 } + return inner; // ← semicolon makes this a Statement(Return) +} +``` + +...has its return type inferred as `Tuple([])` (unit) instead of `Function([] -> Int)`. + +### Why this matters now + +Previously this was masked because the function resolution fallback (`find_all_slots_by_name`) included ALL bindings regardless of type. A variable like `let f = make_fn()` with type `unit` would still be included in the dynamic overload set, and the VM would resolve it correctly at runtime. + +When we changed function resolution to skip known non-callable types (so that `let foo = 300; foo(42)` produces a compile error instead of a confusing runtime error), the mis-typed `unit` bindings got filtered out, breaking legitimate code. + +## The solution: `return_type_stack` + +Added a `return_type_stack: Vec>` to the `Analyser` struct. It tracks explicit `return` types for each nested function scope: + +1. **On function entry**: push `None` onto the stack +2. **On `return expr`**: analyse `expr`, then `fold_lub` its type into the top of the stack +3. **On function exit**: pop the stack, combine with the block's implicit return type: + +```rust +let implicit_return = self.analyse(body)?; +let explicit_return = self.return_type_stack.pop().unwrap(); + +let return_type = match explicit_return { + Some(ret) => ret.lub(&implicit_return), + None => implicit_return, +}; +``` + +This correctly handles: +- Functions with only implicit returns (no `return` keyword) → `explicit_return` is `None`, uses block type +- Functions with only explicit returns → combines explicit type with block's unit type via `lub` +- Functions with both early returns and an implicit final value → `lub` of all paths +- Nested functions → stack isolates each function's return types + +## Alternatives considered + +1. **Change `Statement(Return)` to preserve type**: Could special-case returns in the Statement handler, but this breaks the semantic of semicolons (which always discard) and doesn't handle early returns before the last statement. + +2. **Use a `Never`/`NoReturn` type for return statements**: The `return` expression could return a bottom type, and the block would then use `lub(Never, unit) = unit` for the block type, with the actual return type tracked separately. This would be more principled but requires introducing a bottom type throughout the type system. + +3. **Track returns at the Block level**: Instead of a stack, each block could propagate return types upward. More complex and doesn't clearly improve on the stack approach. + +## Limitations + +- `break` and `continue` have a similar issue (they discard the block's implicit type) but are less impactful since they don't produce callable values +- The `lub` combination means `fn foo() { if cond { return 42; } "hello" }` has return type `Any` (lub of Int and String), which is correct but imprecise — a union type would be more accurate diff --git a/ndc_analyser/src/analyser.rs b/ndc_analyser/src/analyser.rs new file mode 100644 index 00000000..77b9a063 --- /dev/null +++ b/ndc_analyser/src/analyser.rs @@ -0,0 +1,622 @@ +use crate::scope::ScopeTree; +use itertools::Itertools; +use ndc_core::{StaticType, TypeSignature}; +use ndc_lexer::Span; +use ndc_parser::{Binding, Expression, ExpressionLocation, ForBody, ForIteration, Lvalue}; +use std::fmt::Debug; + +#[derive(Debug)] +pub struct Analyser { + scope_tree: ScopeTree, + /// Stack of explicit `return` types for each enclosing function scope. + /// Pushed on function entry, popped on exit. The value accumulates the + /// lub of all `return ` types seen so far. + return_type_stack: Vec>, +} + +impl Analyser { + pub fn from_scope_tree(scope_tree: ScopeTree) -> Self { + Self { + scope_tree, + return_type_stack: Vec::new(), + } + } + + pub fn checkpoint(&self) -> ScopeTree { + self.scope_tree.clone() + } + + pub fn restore(&mut self, checkpoint: ScopeTree) { + self.scope_tree = checkpoint; + } + + pub fn analyse( + &mut self, + ExpressionLocation { expression, span }: &mut ExpressionLocation, + ) -> Result { + match expression { + Expression::BoolLiteral(_) => Ok(StaticType::Bool), + Expression::StringLiteral(_) => Ok(StaticType::String), + Expression::Int64Literal(_) | Expression::BigIntLiteral(_) => Ok(StaticType::Int), + Expression::Float64Literal(_) => Ok(StaticType::Float), + Expression::ComplexLiteral(_) => Ok(StaticType::Complex), + Expression::Continue | Expression::Break => Ok(StaticType::unit()), + Expression::Identifier { + name: ident, + resolved, + } => { + if ident == "None" { + return Ok(StaticType::Option(Box::new(StaticType::Any))); + } + let binding = self.scope_tree.get_binding_any(ident).ok_or_else(|| { + AnalysisError::identifier_not_previously_declared(ident, *span) + })?; + + *resolved = Binding::Resolved(binding); + + Ok(self.scope_tree.get_type(binding).clone()) + } + Expression::Statement(inner) => { + self.analyse(inner)?; + Ok(StaticType::unit()) + } + Expression::Logical { left, right, .. } => { + self.analyse(left)?; + self.analyse(right)?; + Ok(StaticType::Bool) + } + Expression::Grouping(expr) => self.analyse(expr), + Expression::VariableDeclaration { l_value, value } => { + let typ = self.analyse(value)?; + self.resolve_lvalue_declarative(l_value, typ, *span)?; + Ok(StaticType::unit()) + } + Expression::Assignment { l_value, r_value } => { + self.resolve_lvalue(l_value, *span)?; + self.analyse(r_value)?; + Ok(StaticType::unit()) + } + Expression::OpAssignment { + l_value, + r_value, + operation, + resolved_assign_operation, + resolved_operation, + } => { + let left_type = self.resolve_single_lvalue(l_value, *span)?; + let right_type = self.analyse(r_value)?; + let arg_types = vec![left_type, right_type]; + + *resolved_assign_operation = self + .scope_tree + .resolve_function_binding(&format!("{operation}="), &arg_types); + *resolved_operation = self + .scope_tree + .resolve_function_binding(operation, &arg_types); + + if let Binding::None = resolved_operation { + return Err(AnalysisError::function_not_found( + operation, &arg_types, *span, + )); + } + + Ok(StaticType::unit()) + } + Expression::FunctionDeclaration { + name, + resolved_name, + type_signature, + body, + return_type: return_type_slot, + captures, + .. + } => { + // Pre-register the function before analysing its body so recursive calls can + // resolve the name. The return type is unknown at this point so we use Any. + let pre_slot = if let Some(name) = name { + let arity = type_signature.types().map(|t| t.len()); + if self.scope_tree.has_function_in_current_scope(name, arity) { + return Err(AnalysisError::function_redefinition(name, arity, *span)); + } + + let placeholder = StaticType::Function { + parameters: type_signature.types(), + return_type: Box::new(StaticType::Any), + }; + Some( + self.scope_tree + .create_local_binding(name.clone(), placeholder), + ) + } else { + None + }; + + self.scope_tree.new_function_scope(); + self.return_type_stack.push(None); + let param_types = self.resolve_parameters_declarative(type_signature, *span)?; + + let implicit_return = self.analyse(body)?; + let explicit_return = self.return_type_stack.pop().unwrap(); + *captures = self.scope_tree.current_scope_captures(); + self.scope_tree.destroy_scope(); + + // Combine explicit `return` types with the block's implicit return type. + let return_type = match explicit_return { + Some(ret) => ret.lub(&implicit_return), + None => implicit_return, + }; + *return_type_slot = Some(return_type); + + let function_type = StaticType::Function { + parameters: Some(param_types.clone()), + return_type: Box::new( + return_type_slot + .clone() + .expect("must have a value at this point"), + ), + }; + + if let Some(slot) = pre_slot { + self.scope_tree + .update_binding_type(slot, function_type.clone()); + *resolved_name = Some(slot); + } + + Ok(function_type) + } + Expression::Block { statements } => { + self.scope_tree.new_block_scope(); + let mut last = None; + for s in statements { + last = Some(self.analyse(s)?); + } + self.scope_tree.destroy_scope(); + + Ok(last.unwrap_or_else(StaticType::unit)) + } + Expression::If { + condition, + on_true, + on_false, + } => { + self.analyse(condition)?; + let true_type = self.analyse(on_true)?; + let false_type = if let Some(on_false) = on_false { + self.analyse(on_false)? + } else { + StaticType::unit() + }; + + Ok(true_type.lub(&false_type)) + } + Expression::While { + expression, + loop_body, + } => { + self.analyse(expression)?; + self.analyse(loop_body)?; + Ok(StaticType::unit()) + } + Expression::For { iterations, body } => { + let return_type = self.resolve_for_iterations(iterations, body, *span)?; + Ok(return_type) + } + Expression::Call { + function, + arguments, + } => { + let mut type_sig = Vec::with_capacity(arguments.len()); + for a in arguments { + type_sig.push(self.analyse(a)?); + } + + let callee_type = + self.resolve_function_with_argument_types(function, &type_sig, *span)?; + + let StaticType::Function { return_type, .. } = callee_type else { + if callee_type == StaticType::Any { + return Ok(StaticType::Any); + } + return Err(AnalysisError::not_callable(&callee_type, *span)); + }; + + Ok(*return_type) + } + Expression::Tuple { values } => { + let mut types = Vec::with_capacity(values.len()); + for v in values { + types.push(self.analyse(v)?); + } + + Ok(StaticType::Tuple(types)) + } + Expression::List { values } => { + let element_type = self.analyse_multiple_expression_with_same_type(values)?; + + Ok(StaticType::List(Box::new( + element_type.unwrap_or(StaticType::Any), + ))) + } + Expression::Map { values, default } => { + let mut key_type: Option = None; + let mut value_type: Option = None; + for (key, value) in values { + Self::fold_lub(&mut key_type, self.analyse(key)?); + if let Some(value) = value { + Self::fold_lub(&mut value_type, self.analyse(value)?); + } + } + + if let Some(default) = default { + self.analyse(default)?; + } + + Ok(StaticType::Map { + key: Box::new(key_type.unwrap_or(StaticType::Any)), + value: Box::new(value_type.unwrap_or_else(StaticType::unit)), + }) + } + Expression::Return { value } => { + let typ = self.analyse(value)?; + if let Some(slot) = self.return_type_stack.last_mut() { + Self::fold_lub(slot, typ.clone()); + } + Ok(typ) + } + Expression::RangeInclusive { start, end } + | Expression::RangeExclusive { start, end } => { + if let Some(start) = start { + self.analyse(start)?; + } + if let Some(end) = end { + self.analyse(end)?; + } + + Ok(StaticType::Iterator(Box::new(StaticType::Int))) + } + } + } + + fn resolve_function_with_argument_types( + &mut self, + ident: &mut ExpressionLocation, + argument_types: &[StaticType], + span: Span, + ) -> Result { + let ExpressionLocation { + expression: Expression::Identifier { name, resolved }, + .. + } = ident + else { + // It's possible that we're not trying to invoke an identifier `foo()` but instead we're + // invoking a value like `get_function()()` so in this case we just continue like normal? + return self.analyse(ident); + }; + + let binding = self + .scope_tree + .resolve_function_binding(name, argument_types); + + let out_type = match &binding { + Binding::None => { + return Err(AnalysisError::function_not_found( + name, + argument_types, + span, + )); + } + Binding::Resolved(res) => self.scope_tree.get_type(*res).clone(), + + Binding::Dynamic(_) => StaticType::Function { + parameters: None, + return_type: Box::new(StaticType::Any), + }, + }; + + *resolved = binding; + + Ok(out_type) + } + + fn resolve_for_iterations( + &mut self, + iterations: &mut [ForIteration], + body: &mut ForBody, + span: Span, + ) -> Result { + let Some((iteration, tail)) = iterations.split_first_mut() else { + unreachable!("because this function is never called with an empty slice"); + }; + + let mut do_destroy = false; + match iteration { + ForIteration::Iteration { l_value, sequence } => { + let sequence_type = self.analyse(sequence)?; + + self.scope_tree.new_iteration_scope(); + + self.resolve_lvalue_declarative( + l_value, + sequence_type + .sequence_element_type() + .unwrap_or(StaticType::Any), + span, + )?; + do_destroy = true; + } + ForIteration::Guard(expr) => { + self.analyse(expr)?; + } + } + + let out_type = if !tail.is_empty() { + self.resolve_for_iterations(tail, body, span)? + } else { + match body { + ForBody::Block(block) => { + self.analyse(block)?; + StaticType::unit() + } + ForBody::List { + expr, + accumulator_slot, + .. + } => { + // Reserve the accumulator slot BEFORE analysing the body so + // that nested for-comprehensions receive strictly higher slot + // numbers and cannot collide with this accumulator. + *accumulator_slot = Some(self.scope_tree.reserve_anonymous_slot()); + StaticType::List(Box::new(self.analyse(expr)?)) + } + ForBody::Map { + key, + value, + default, + accumulator_slot, + .. + } => { + *accumulator_slot = Some(self.scope_tree.reserve_anonymous_slot()); + let key_type = self.analyse(key)?; + let value_type = if let Some(value) = value { + self.analyse(value)? + } else { + StaticType::unit() + }; + + if let Some(default) = default { + self.analyse(default)?; + } + + StaticType::Map { + key: Box::new(key_type), + value: Box::new(value_type), + } + } + } + }; + + if do_destroy { + self.scope_tree.destroy_scope(); + } + + Ok(out_type) + } + + fn resolve_single_lvalue( + &mut self, + lvalue: &mut Lvalue, + span: Span, + ) -> Result { + if matches!(lvalue, Lvalue::Sequence(_)) { + return Err(AnalysisError::lvalue_required_to_be_single_identifier(span)); + } + self.resolve_lvalue(lvalue, span) + } + + fn resolve_lvalue( + &mut self, + lvalue: &mut Lvalue, + span: Span, + ) -> Result { + match lvalue { + Lvalue::Identifier { + identifier, + resolved, + .. + } => { + let Some(target) = self.scope_tree.get_binding_any(identifier) else { + return Err(AnalysisError::identifier_not_previously_declared( + identifier, span, + )); + }; + + *resolved = Some(target); + Ok(self.scope_tree.get_type(target).clone()) + } + Lvalue::Index { + index, + value, + resolved_set, + resolved_get, + } => { + let index_type = self.analyse(index)?; + let type_of_index_target = self.analyse(value)?; + + let get_args = [type_of_index_target.clone(), index_type.clone()]; + let set_args = [type_of_index_target.clone(), index_type, StaticType::Any]; + + *resolved_get = Some(self.scope_tree.resolve_function_binding("[]", &get_args)); + *resolved_set = Some(self.scope_tree.resolve_function_binding("[]=", &set_args)); + + type_of_index_target + .index_element_type() + .ok_or_else(|| AnalysisError::unable_to_index_into(&type_of_index_target, span)) + } + Lvalue::Sequence(seq) => { + for sub_lvalue in seq { + self.resolve_lvalue(sub_lvalue, span)?; + } + Ok(StaticType::unit()) + } + } + } + + /// Resolve expressions as arguments to a function and return the function arity + fn resolve_parameters_declarative( + &mut self, + type_signature: &TypeSignature, + span: Span, + ) -> Result, AnalysisError> { + let TypeSignature::Exact(parameters) = type_signature else { + return Ok(vec![]); + }; + + let mut types: Vec = Vec::new(); + let mut seen_names: Vec<&str> = Vec::new(); + + for param in parameters { + types.push(StaticType::Any); + if seen_names.contains(¶m.name.as_str()) { + return Err(AnalysisError::parameter_redefined(¶m.name, span)); + } + seen_names.push(¶m.name); + + self.scope_tree + .create_local_binding(param.name.clone(), StaticType::Any); + } + + Ok(types) + } + fn resolve_lvalue_declarative( + &mut self, + lvalue: &mut Lvalue, + typ: StaticType, + span: Span, + ) -> Result<(), AnalysisError> { + match lvalue { + Lvalue::Identifier { + identifier, + resolved, + inferred_type, + .. + } => { + *resolved = Some( + self.scope_tree + .create_local_binding(identifier.clone(), typ.clone()), + ); + *inferred_type = Some(typ); + } + Lvalue::Index { index, value, .. } => { + self.analyse(index)?; + self.analyse(value)?; + } + Lvalue::Sequence(seq) => { + let sub_types = typ + .unpack() + .ok_or_else(|| AnalysisError::unable_to_unpack_type(&typ, span))?; + + for (sub_lvalue, sub_lvalue_type) in seq.iter_mut().zip(sub_types) { + self.resolve_lvalue_declarative( + sub_lvalue, + sub_lvalue_type.clone(), + /* todo: figure out how to narrow this span */ span, + )? + } + } + } + + Ok(()) + } + fn analyse_multiple_expression_with_same_type( + &mut self, + expressions: &mut Vec, + ) -> Result, AnalysisError> { + let mut element_type: Option = None; + for expression in expressions { + Self::fold_lub(&mut element_type, self.analyse(expression)?); + } + Ok(element_type) + } + + /// Fold a new type into an accumulator via least-upper-bound. + fn fold_lub(acc: &mut Option, new_type: StaticType) { + match acc { + Some(prev) => *prev = prev.lub(&new_type), + None => *acc = Some(new_type), + } + } +} + +#[derive(thiserror::Error, Debug)] +#[error("{text}")] +pub struct AnalysisError { + text: String, + span: Span, +} + +impl AnalysisError { + pub fn span(&self) -> Span { + self.span + } + fn function_redefinition(name: &str, arity: Option, span: Span) -> Self { + let arity_desc = match arity { + Some(n) => format!("{n} parameter{}", if n == 1 { "" } else { "s" }), + None => "variadic parameters".to_string(), + }; + Self { + text: format!( + "Illegal redefinition of function '{name}' with {arity_desc} in the same scope" + ), + span, + } + } + + fn parameter_redefined(param: &str, span: Span) -> Self { + Self { + text: format!("Illegal redefinition of parameter {param}"), + span, + } + } + fn unable_to_index_into(typ: &StaticType, span: Span) -> Self { + Self { + text: format!("Unable to index into {typ}"), + span, + } + } + fn unable_to_unpack_type(typ: &StaticType, span: Span) -> Self { + Self { + text: format!("Invalid unpacking of {typ}"), + span, + } + } + fn lvalue_required_to_be_single_identifier(span: Span) -> Self { + Self { + text: "This lvalue is required to be a single identifier".to_string(), + span, + } + } + + fn function_not_found(ident: &str, types: &[StaticType], span: Span) -> Self { + Self { + text: format!( + "No function called '{ident}' found that matches the arguments '{}'", + types.iter().join(", ") + ), + span, + } + } + + fn not_callable(typ: &StaticType, span: Span) -> Self { + Self { + text: format!("Unable to invoke {typ} as a function."), + span, + } + } + + fn identifier_not_previously_declared(ident: &str, span: Span) -> Self { + Self { + text: format!("Identifier {ident} has not previously been declared"), + span, + } + } +} diff --git a/ndc_analyser/src/lib.rs b/ndc_analyser/src/lib.rs new file mode 100644 index 00000000..925f99eb --- /dev/null +++ b/ndc_analyser/src/lib.rs @@ -0,0 +1,5 @@ +mod analyser; +mod scope; + +pub use analyser::{Analyser, AnalysisError}; +pub use scope::ScopeTree; diff --git a/ndc_analyser/src/scope.rs b/ndc_analyser/src/scope.rs new file mode 100644 index 00000000..69107bad --- /dev/null +++ b/ndc_analyser/src/scope.rs @@ -0,0 +1,829 @@ +use ndc_core::StaticType; +use ndc_parser::{Binding, CaptureSource, ResolvedVar}; +use std::fmt::{Debug, Formatter}; + +#[derive(Debug, Clone)] +pub(crate) struct Scope { + parent_idx: Option, + creates_environment: bool, // Only true for function scopes and for-loop iterations + base_offset: usize, + function_scope_idx: usize, + identifiers: Vec<(String, StaticType)>, + upvalues: Vec<(String, CaptureSource)>, +} + +impl Scope { + pub(crate) fn offset(&self) -> usize { + self.base_offset + self.identifiers.len() + } + + pub(crate) fn new_function_scope(parent_idx: Option, function_scope_idx: usize) -> Self { + Self { + parent_idx, + creates_environment: true, + base_offset: 0, + function_scope_idx, + identifiers: Vec::default(), + upvalues: Vec::default(), + } + } + + pub(crate) fn new_block_scope( + parent_idx: Option, + base_offset: usize, + function_scope_idx: usize, + ) -> Self { + Self { + parent_idx, + creates_environment: false, + base_offset, + function_scope_idx, + identifiers: Vec::default(), + upvalues: Vec::default(), + } + } + + /// Identical to `new_block_scope` today — kept as a separate constructor so that + /// iteration-specific behaviour (e.g. break/continue scoping) can be added later. + pub(crate) fn new_iteration_scope( + parent_idx: Option, + base_offset: usize, + function_scope_idx: usize, + ) -> Self { + Self::new_block_scope(parent_idx, base_offset, function_scope_idx) + } + + pub(crate) fn find_slot_by_name(&self, find_ident: &str) -> Option { + self.identifiers + .iter() + .rposition(|(ident, _)| ident == find_ident) + .map(|idx| idx + self.base_offset) + } + + /// Returns slots for all bindings with the given name whose type could be + /// callable at runtime (`Function` or `Any`). Bindings with concrete + /// non-function types (e.g. `Int`, `String`) are excluded so they never + /// pollute the dynamic overload set used for function resolution. + fn find_all_callable_slots_by_name(&self, find_ident: &str) -> Vec { + self.identifiers + .iter() + .enumerate() + .filter_map(|(slot, (ident, typ))| { + if ident == find_ident && typ.could_be_callable() { + Some(slot + self.base_offset) + } else { + None + } + }) + .collect() + } + + fn find_function_candidates(&self, find_ident: &str, find_types: &[StaticType]) -> Vec { + self.identifiers.iter() + .enumerate() + .rev() + .filter_map(|(slot, (ident, typ))| { + if ident != find_ident { + return None; + } + + // If the thing is not a function we're not interested + let StaticType::Function { parameters, .. } = typ else { + return None; + }; + + let Some(param_types) = parameters else { + unreachable!("find_function_candidates should never be called when there are variadic matches"); + }; + + let is_good = param_types.len() == find_types.len() + && param_types.iter().zip(find_types.iter()).all(|(typ_1, typ_2)| !typ_1.is_incompatible_with(typ_2)); + + is_good.then_some(slot) + }) + .map(|idx| idx + self.base_offset) + .collect() + } + fn find_function(&self, find_ident: &str, find_types: &[StaticType]) -> Option { + self.identifiers + .iter() + .rposition(|(ident, typ)| ident == find_ident && typ.is_fn_and_matches(find_types)) + .map(|idx| idx + self.base_offset) + } + + /// Check if this scope already contains a function with the given name and arity. + fn has_function_with_arity(&self, name: &str, arity: Option) -> bool { + self.identifiers.iter().any(|(ident, typ)| { + if ident != name { + return false; + } + match typ { + StaticType::Function { + parameters: Some(params), + .. + } => match arity { + Some(a) => params.len() == a, + None => false, + }, + StaticType::Function { + parameters: None, .. + } => arity.is_none(), + _ => false, + } + }) + } + + fn allocate(&mut self, name: String, typ: StaticType) -> usize { + self.identifiers.push((name, typ)); + // Slot is just the length of the list minus one + self.base_offset + self.identifiers.len() - 1 + } + + fn add_upvalue(&mut self, name: &str, source: CaptureSource) -> usize { + // Deduplicate by name AND source so that multiple overloads of the same + // function name can each have their own upvalue entry. + if let Some(idx) = self + .upvalues + .iter() + .position(|(n, s)| n == name && *s == source) + { + return idx; + } + + self.upvalues.push((name.to_string(), source)); + self.upvalues.len() - 1 + } + + fn find_upvalue(&self, name: &str) -> Option { + self.upvalues.iter().position(|(n, _)| n == name) + } + + fn find_upvalues_by_name(&self, name: &str) -> Vec { + self.upvalues + .iter() + .enumerate() + .filter_map(|(idx, (n, _))| if n == name { Some(idx) } else { None }) + .collect() + } +} + +#[derive(Clone)] +pub struct ScopeTree { + current_scope_idx: usize, + global_scope: Scope, + scopes: Vec, +} + +impl ScopeTree { + /// Build a `ScopeTree` seeded with pre-registered global bindings (native functions etc.). + /// + /// Two root scopes exist by design: `global_scope` holds native/built-in bindings that are + /// always accessible, while `scopes[0]` is the user's top-level function scope where + /// user-defined declarations land. This separation keeps native bindings out of the + /// mutable scope chain so they can be searched as a fallback without interfering with + /// user-level shadowing. + pub fn from_global_scope(global_scope_map: Vec<(String, StaticType)>) -> Self { + let mut global_scope = Scope::new_function_scope(None, 0); + global_scope.identifiers = global_scope_map; + + Self { + current_scope_idx: 0, + global_scope, + scopes: vec![Scope::new_function_scope(None, 0)], + } + } + + pub(crate) fn get_type(&self, res: ResolvedVar) -> &StaticType { + match res { + ResolvedVar::Local { slot } => self.find_type_by_slot(self.current_scope_idx, slot), + ResolvedVar::Upvalue { slot } => { + let mut scope_idx = self.scopes[self.current_scope_idx].function_scope_idx; + let mut check_slot = slot; + + loop { + let (_, source) = &self.scopes[scope_idx].upvalues[check_slot]; + + match source { + CaptureSource::Local(slot) => { + let parent = self.scopes[scope_idx] + .parent_idx + .expect("expected parent scope"); + return self.find_type_by_slot(parent, *slot); + } + CaptureSource::Upvalue(slot) => { + scope_idx = self.get_parent_function_scope_idx(scope_idx); + check_slot = *slot; + } + } + } + } + ResolvedVar::Global { slot } => &self.global_scope.identifiers[slot].1, + } + } + + fn get_parent_function_scope_idx(&self, scope_idx: usize) -> usize { + self.scopes[self.scopes[scope_idx] + .parent_idx + .expect("expected parent scope to exist")] + .function_scope_idx + } + + pub(crate) fn find_type_by_slot(&self, start_scope: usize, slot: usize) -> &StaticType { + let mut scope_idx = start_scope; + loop { + let scope = &self.scopes[scope_idx]; + if slot >= scope.base_offset && slot < scope.base_offset + scope.identifiers.len() { + return &scope.identifiers[slot - scope.base_offset].1; + } + scope_idx = scope + .parent_idx + .expect("slot not found in any scope within function"); + } + } + + pub(crate) fn new_block_scope(&mut self) -> &Scope { + let old_scope_idx = self.current_scope_idx; + self.current_scope_idx = self.scopes.len(); + let new_scope = Scope::new_block_scope( + Some(old_scope_idx), + self.scopes[old_scope_idx].offset(), + self.scopes[old_scope_idx].function_scope_idx, + ); + self.scopes.push(new_scope); + &self.scopes[self.current_scope_idx] + } + + pub(crate) fn new_function_scope(&mut self) -> &Scope { + let old_scope_idx = self.current_scope_idx; + self.current_scope_idx = self.scopes.len(); + let new_scope = Scope::new_function_scope(Some(old_scope_idx), self.scopes.len()); + self.scopes.push(new_scope); + &self.scopes[self.current_scope_idx] + } + + pub(crate) fn new_iteration_scope(&mut self) -> &Scope { + let old_scope_idx = self.current_scope_idx; + self.current_scope_idx = self.scopes.len(); + let new_scope = Scope::new_iteration_scope( + Some(old_scope_idx), + self.scopes[old_scope_idx].offset(), + self.scopes[old_scope_idx].function_scope_idx, + ); + self.scopes.push(new_scope); + &self.scopes[self.current_scope_idx] + } + + pub(crate) fn destroy_scope(&mut self) { + let next = self.scopes[self.current_scope_idx] + .parent_idx + .expect("tried to destroy scope while there were none"); + self.current_scope_idx = next; + } + + pub(crate) fn current_scope_captures(&self) -> Vec { + self.scopes[self.current_scope_idx] + .upvalues + .iter() + .map(|(_, source)| source.clone()) + .collect() + } + + // When the Analyser encounters an identifier as the rhs of an expression during resolution it + // will use this method to lookup if that identifier has already been seen. + pub(crate) fn get_binding_any(&mut self, ident: &str) -> Option { + let mut scope_ptr = self.current_scope_idx; + let mut env_scopes: Vec = Vec::default(); + + loop { + if let Some(slot) = self.scopes[scope_ptr].find_slot_by_name(ident) { + return Some(self.resolve_found_local(ident, slot, &env_scopes)); + } + if let Some(slot) = self.scopes[scope_ptr].find_upvalue(ident) { + return Some(self.resolve_found_upvalue(ident, slot, &env_scopes)); + } + + if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { + if self.scopes[scope_ptr].creates_environment { + env_scopes.push(scope_ptr); + } + scope_ptr = parent_idx; + } else { + return Some(ResolvedVar::Global { + slot: self.global_scope.find_slot_by_name(ident)?, + }); + } + } + } + + /// Resolve a function call binding in a single scope-chain walk. + /// + /// At each scope the priorities are: + /// 1. Exact type match on a local → return `Binding::Resolved` immediately + /// 2. Upvalues with matching name → added to candidates (not early-returned, + /// because a different overload may be an exact match in an outer scope) + /// 3. Compatible-type candidates → remember first set found (for `Binding::Dynamic`) + /// 4. All same-named bindings → accumulate as last-resort fallback + pub(crate) fn resolve_function_binding(&mut self, ident: &str, sig: &[StaticType]) -> Binding { + let mut scope_ptr = self.current_scope_idx; + let mut env_scopes: Vec = Vec::default(); + let mut loose_candidates: Option> = None; + let mut all_by_name: Vec = Vec::new(); + + loop { + // 1. Exact match on a local → return immediately + if let Some(slot) = self.scopes[scope_ptr].find_function(ident, sig) { + return Binding::Resolved(self.resolve_found_local(ident, slot, &env_scopes)); + } + + // 2. Upvalues with matching name — collect as candidates but continue + // walking, because the upvalue may be a different overload (e.g. + // different arity) and the exact match could be in a parent scope. + for uv_slot in self.scopes[scope_ptr].find_upvalues_by_name(ident) { + all_by_name.push(self.resolve_found_upvalue(ident, uv_slot, &env_scopes)); + } + + // 3. Compatible candidates (keep only the first scope's matches — shadowing) + if loose_candidates.is_none() { + let candidates = self.scopes[scope_ptr].find_function_candidates(ident, sig); + if !candidates.is_empty() { + loose_candidates = Some( + candidates + .into_iter() + .map(|slot| self.resolve_found_local(ident, slot, &env_scopes)) + .collect(), + ); + } + } + + // 4. All same-named bindings (accumulate across all scopes) + let slots = self.scopes[scope_ptr].find_all_callable_slots_by_name(ident); + all_by_name.extend( + slots + .into_iter() + .map(|slot| self.resolve_found_local(ident, slot, &env_scopes)), + ); + + // Advance to parent scope + if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { + if self.scopes[scope_ptr].creates_environment { + env_scopes.push(scope_ptr); + } + scope_ptr = parent_idx; + } else { + // Fall through to globals + if let Some(slot) = self.global_scope.find_function(ident, sig) { + return Binding::Resolved(ResolvedVar::Global { slot }); + } + + if loose_candidates.is_none() { + let candidates = self.global_scope.find_function_candidates(ident, sig); + if !candidates.is_empty() { + loose_candidates = Some( + candidates + .into_iter() + .map(|slot| ResolvedVar::Global { slot }) + .collect(), + ); + } + } + + all_by_name.extend( + self.global_scope + .find_all_callable_slots_by_name(ident) + .into_iter() + .map(|slot| ResolvedVar::Global { slot }), + ); + + break; + } + } + + if let Some(candidates) = loose_candidates { + return Binding::Dynamic(candidates); + } + if !all_by_name.is_empty() { + return Binding::Dynamic(all_by_name); + } + Binding::None + } + + pub(crate) fn create_local_binding(&mut self, ident: String, typ: StaticType) -> ResolvedVar { + ResolvedVar::Local { + slot: self.scopes[self.current_scope_idx].allocate(ident, typ), + } + } + + /// Check whether the current scope already has a `fn` declaration with + /// the given name and arity. Used to detect illegal same-scope redefinitions. + pub(crate) fn has_function_in_current_scope(&self, name: &str, arity: Option) -> bool { + self.scopes[self.current_scope_idx].has_function_with_arity(name, arity) + } + + /// Reserve a slot in the current scope without creating a named binding. + /// Used to allocate the list/map accumulator before analysing the body of a + /// for-comprehension, so that any nested comprehensions receive strictly + /// higher slot numbers and cannot collide with this accumulator. + /// + /// Uses `"\x00"` as a sentinel name that can never collide with user identifiers + /// since the lexer never produces null bytes. + pub(crate) fn reserve_anonymous_slot(&mut self) -> usize { + self.scopes[self.current_scope_idx].allocate("\x00".to_string(), StaticType::Any) + } + + pub(crate) fn update_binding_type(&mut self, var: ResolvedVar, new_type: StaticType) { + match var { + ResolvedVar::Local { slot } => { + let scope_idx = self.find_scope_owning_slot(self.current_scope_idx, slot); + let base = self.scopes[scope_idx].base_offset; + self.scopes[scope_idx].identifiers[slot - base].1 = new_type; + } + ResolvedVar::Upvalue { slot } => { + let mut scope_idx = self.scopes[self.current_scope_idx].function_scope_idx; + let mut check_slot = slot; + + loop { + let (_, source) = self.scopes[scope_idx].upvalues[check_slot].clone(); + + match source { + CaptureSource::Local(local_slot) => { + let parent = self.scopes[scope_idx] + .parent_idx + .expect("expected parent scope"); + let owning = self.find_scope_owning_slot(parent, local_slot); + let base = self.scopes[owning].base_offset; + self.scopes[owning].identifiers[local_slot - base].1 = new_type; + return; + } + CaptureSource::Upvalue(uv_slot) => { + scope_idx = self.get_parent_function_scope_idx(scope_idx); + check_slot = uv_slot; + } + } + } + } + ResolvedVar::Global { .. } => { + panic!("update_binding_type called with a global binding") + } + } + } + + pub(crate) fn find_scope_owning_slot(&self, start_scope: usize, slot: usize) -> usize { + let mut scope_idx = start_scope; + loop { + let scope = &self.scopes[scope_idx]; + if slot >= scope.base_offset && slot < scope.base_offset + scope.identifiers.len() { + return scope_idx; + } + scope_idx = scope + .parent_idx + .expect("slot not found in any scope within function"); + } + } + + /// Given a local slot found during a scope walk, return the appropriate `ResolvedVar`. + /// If `env_scopes` is empty the slot is in the current function scope and can be + /// referenced directly as a `Local`. Otherwise it must be hoisted through intervening + /// function scopes as an upvalue chain. + fn resolve_found_local( + &mut self, + ident: &str, + slot: usize, + env_scopes: &[usize], + ) -> ResolvedVar { + if env_scopes.is_empty() { + ResolvedVar::Local { slot } + } else { + let slot = self.hoist_upvalue(ident, slot, env_scopes); + ResolvedVar::Upvalue { slot } + } + } + + /// Given an upvalue slot found during a scope walk, return the appropriate `ResolvedVar`. + /// If `env_scopes` is empty the upvalue belongs to the current function scope. Otherwise + /// it must be hoisted further through intervening function scopes. + fn resolve_found_upvalue( + &mut self, + ident: &str, + slot: usize, + env_scopes: &[usize], + ) -> ResolvedVar { + if env_scopes.is_empty() { + ResolvedVar::Upvalue { slot } + } else { + let slot = self.hoist_from_upvalue(ident, slot, env_scopes); + ResolvedVar::Upvalue { slot } + } + } + + // In a situation where the analyser is recursing through the scope tree and finds an identifier + // in the current local scope, if we were searching from a nested scope we now have to 'hoist' + // this local value as an upvalue in all the nested scopes. This function is responsible for adding + // this value as an upvalue to all the nested scopes. + // + // `env_scopes`: is a list of scopes that the analyser has already searched that will need to get this upvalue. + pub(crate) fn hoist_upvalue( + &mut self, + name: &str, + local_slot: usize, + env_scopes: &[usize], + ) -> usize { + let mut capture_idx = local_slot; + let mut is_local = true; + + for &scope_idx in env_scopes.iter().rev() { + // The very first scope we encounter when we iterate this list in reverse is the scope directly inside + // the scope that captures the identifier as a local scope. In this case we want to capture the variable on the stack instead. + let source = if is_local { + CaptureSource::Local(capture_idx) + } else { + CaptureSource::Upvalue(capture_idx) + }; + capture_idx = self.scopes[scope_idx].add_upvalue(name, source); + is_local = false; // only the first iteration is local + } + + capture_idx + } + + pub(crate) fn hoist_from_upvalue( + &mut self, + name: &str, + upvalue_slot: usize, + env_scopes: &[usize], + ) -> usize { + let mut capture_idx = upvalue_slot; + for &scope_idx in env_scopes.iter().rev() { + capture_idx = + self.scopes[scope_idx].add_upvalue(name, CaptureSource::Upvalue(capture_idx)); + } + + capture_idx + } +} + +impl Debug for ScopeTree { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + writeln!(f)?; + for (id, scope) in self.scopes.iter().enumerate() { + writeln!(f, "{id}: {scope:?}")?; + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use ndc_parser::ResolvedVar; + + fn empty_scope_tree() -> ScopeTree { + ScopeTree::from_global_scope(vec![]) + } + + #[test] + fn single_local_in_function_scope() { + let mut tree = empty_scope_tree(); + let var = tree.create_local_binding("x".into(), StaticType::Int); + assert_eq!(var, ResolvedVar::Local { slot: 0 }); + assert_eq!( + tree.get_binding_any("x"), + Some(ResolvedVar::Local { slot: 0 }) + ); + } + + #[test] + fn multiple_locals_get_ascending_slots() { + let mut tree = empty_scope_tree(); + let x = tree.create_local_binding("x".into(), StaticType::Int); + let y = tree.create_local_binding("y".into(), StaticType::Int); + let z = tree.create_local_binding("z".into(), StaticType::Int); + assert_eq!(x, ResolvedVar::Local { slot: 0 }); + assert_eq!(y, ResolvedVar::Local { slot: 1 }); + assert_eq!(z, ResolvedVar::Local { slot: 2 }); + } + + #[test] + fn block_scope_continues_flat_numbering() { + let mut tree = empty_scope_tree(); + let x = tree.create_local_binding("x".into(), StaticType::Int); + assert_eq!(x, ResolvedVar::Local { slot: 0 }); + + tree.new_block_scope(); + let y = tree.create_local_binding("y".into(), StaticType::Int); + assert_eq!(y, ResolvedVar::Local { slot: 1 }); + + assert_eq!( + tree.get_binding_any("x"), + Some(ResolvedVar::Local { slot: 0 }) + ); + } + + #[test] + fn nested_block_scopes_continue_numbering() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("a".into(), StaticType::Int); + + tree.new_block_scope(); + let b = tree.create_local_binding("b".into(), StaticType::Int); + assert_eq!(b, ResolvedVar::Local { slot: 1 }); + + tree.new_block_scope(); + let c = tree.create_local_binding("c".into(), StaticType::Int); + assert_eq!(c, ResolvedVar::Local { slot: 2 }); + } + + #[test] + fn block_scope_does_not_create_upvalue() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("x".into(), StaticType::Int); + + tree.new_block_scope(); + assert_eq!( + tree.get_binding_any("x"), + Some(ResolvedVar::Local { slot: 0 }) + ); + } + + #[test] + fn function_scope_resets_slots_and_captures_as_upvalue() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("x".into(), StaticType::Int); + + tree.new_function_scope(); + let y = tree.create_local_binding("y".into(), StaticType::Int); + assert_eq!(y, ResolvedVar::Local { slot: 0 }); + + assert_eq!( + tree.get_binding_any("x"), + Some(ResolvedVar::Upvalue { slot: 0 }) + ); + } + + #[test] + fn iteration_scope_continues_numbering_and_is_transparent() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("x".into(), StaticType::Int); + + tree.new_iteration_scope(); + let i = tree.create_local_binding("i".into(), StaticType::Int); + assert_eq!(i, ResolvedVar::Local { slot: 1 }); + + assert_eq!( + tree.get_binding_any("x"), + Some(ResolvedVar::Local { slot: 0 }) + ); + } + + #[test] + fn global_lookup() { + let tree = ScopeTree::from_global_scope(vec![( + "print".into(), + StaticType::Function { + parameters: None, + return_type: Box::new(StaticType::Any), + }, + )]); + let mut tree = tree; + assert_eq!( + tree.get_binding_any("print"), + Some(ResolvedVar::Global { slot: 0 }) + ); + } + + #[test] + fn slot_reuse_after_scope_destroy() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("a".into(), StaticType::Int); + + tree.new_block_scope(); + tree.create_local_binding("b".into(), StaticType::Int); + tree.destroy_scope(); + + let c = tree.create_local_binding("c".into(), StaticType::Int); + assert_eq!(c, ResolvedVar::Local { slot: 1 }); + } + + #[test] + fn get_type_returns_correct_type() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("x".into(), StaticType::Int); + tree.create_local_binding("y".into(), StaticType::String); + + assert_eq!( + tree.get_type(ResolvedVar::Local { slot: 0 }), + &StaticType::Int + ); + assert_eq!( + tree.get_type(ResolvedVar::Local { slot: 1 }), + &StaticType::String + ); + } + + // Simulates: let x = 1; fn outer() { fn inner() { x } } + // inner needs x, which is 2 function scopes away. hoist_upvalue should + // create a capture chain: outer captures x as Local(0) from the top-level, + // then inner captures it as Upvalue(0) from outer. + #[test] + fn upvalue_hoisting_across_two_function_scopes() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("x".into(), StaticType::Int); + + tree.new_function_scope(); // outer + tree.new_function_scope(); // inner + + let resolved = tree.get_binding_any("x"); + assert_eq!(resolved, Some(ResolvedVar::Upvalue { slot: 0 })); + + // Verify the capture chain was built correctly. + // inner's upvalue 0 should point to outer's upvalue via Upvalue(0). + // outer's upvalue 0 should capture the top-level local via Local(0). + let inner_scope_idx = tree.current_scope_idx; + let inner_capture = &tree.scopes[inner_scope_idx].upvalues[0]; + assert_eq!(inner_capture.1, CaptureSource::Upvalue(0)); + + let outer_scope_idx = tree.scopes[inner_scope_idx] + .parent_idx + .expect("inner must have parent"); + let outer_capture = &tree.scopes[outer_scope_idx].upvalues[0]; + assert_eq!(outer_capture.1, CaptureSource::Local(0)); + } + + // Simulates: let a = 1; let b = 2; fn f() { a; b } + // Both a and b are captured. They should get distinct upvalue indices. + #[test] + fn multiple_upvalues_get_distinct_indices() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("a".into(), StaticType::Int); + tree.create_local_binding("b".into(), StaticType::String); + + tree.new_function_scope(); + + let a = tree.get_binding_any("a"); + let b = tree.get_binding_any("b"); + assert_eq!(a, Some(ResolvedVar::Upvalue { slot: 0 })); + assert_eq!(b, Some(ResolvedVar::Upvalue { slot: 1 })); + } + + // Resolving the same upvalue twice should return the same index, + // not create a duplicate entry. + #[test] + fn duplicate_upvalue_resolution_reuses_index() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("x".into(), StaticType::Int); + + tree.new_function_scope(); + + let first = tree.get_binding_any("x"); + let second = tree.get_binding_any("x"); + assert_eq!(first, second); + assert_eq!(first, Some(ResolvedVar::Upvalue { slot: 0 })); + + let fn_scope = &tree.scopes[tree.current_scope_idx]; + assert_eq!(fn_scope.upvalues.len(), 1); + } + + // Simulates: let x = 1; fn outer() { fn inner() { x } } + // After resolving x in inner (which hoists through outer), we should + // be able to look up x's type via get_type on the upvalue. + #[test] + fn get_type_follows_upvalue_chain() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("x".into(), StaticType::Int); + + tree.new_function_scope(); // outer + tree.new_function_scope(); // inner + + let resolved = tree.get_binding_any("x").unwrap(); + assert_eq!(tree.get_type(resolved), &StaticType::Int); + } + + // Simulates: fn outer() { let x = 1; fn middle() { fn inner() { x } } } + // When inner's upvalue for x is resolved, a sibling function of inner + // should be able to find x already registered as an upvalue on middle + // (via find_upvalue), rather than re-walking all the way to outer. + #[test] + fn sibling_closure_finds_existing_upvalue() { + let mut tree = empty_scope_tree(); + tree.create_local_binding("x".into(), StaticType::Int); + + tree.new_function_scope(); // middle + + // First closure resolves x, which registers it on middle's upvalue list + tree.new_function_scope(); // inner1 + let r1 = tree.get_binding_any("x"); + assert_eq!(r1, Some(ResolvedVar::Upvalue { slot: 0 })); + tree.destroy_scope(); // back to middle + + // Second closure resolves x — middle already has it as an upvalue, + // so it should be found via find_upvalue without re-hoisting + tree.new_function_scope(); // inner2 + let r2 = tree.get_binding_any("x"); + assert_eq!(r2, Some(ResolvedVar::Upvalue { slot: 0 })); + tree.destroy_scope(); // back to middle + + // middle should still have exactly one upvalue entry + let middle_idx = tree.current_scope_idx; + assert_eq!(tree.scopes[middle_idx].upvalues.len(), 1); + } +} diff --git a/ndc_bin/Cargo.toml b/ndc_bin/Cargo.toml index 2cd5c4f4..4398dc77 100644 --- a/ndc_bin/Cargo.toml +++ b/ndc_bin/Cargo.toml @@ -17,9 +17,10 @@ miette = { version = "7.6.0", features = ["fancy"] } ndc_lexer.workspace = true ndc_interpreter.workspace = true ndc_stdlib.workspace = true +ndc_core.workspace = true ndc_lsp.workspace = true owo-colors.workspace = true +yansi.workspace = true rustyline.workspace = true -tap.workspace = true termimad = "0.34.1" tokio.workspace = true diff --git a/ndc_bin/src/diagnostic.rs b/ndc_bin/src/diagnostic.rs index add518b0..89375a4b 100644 --- a/ndc_bin/src/diagnostic.rs +++ b/ndc_bin/src/diagnostic.rs @@ -1,6 +1,6 @@ use miette::{Diagnostic, LabeledSpan, SourceSpan}; -use ndc_lexer::Span; use ndc_interpreter::InterpreterError; +use ndc_lexer::Span; use std::fmt; fn span_to_source_span(span: Span) -> SourceSpan { @@ -46,28 +46,34 @@ impl From for NdcReport { fn from(err: InterpreterError) -> Self { match err { InterpreterError::Lexer { cause } => Self { - message: cause.to_string(), + message: format!("LexerError: {}", cause), span: Some(span_to_source_span(cause.span())), label: "here", help: cause.help_text().map(str::to_owned), }, InterpreterError::Parser { cause } => Self { - message: cause.to_string(), + message: format!("ParserError: {}", cause), span: Some(span_to_source_span(cause.span())), label: "here", help: cause.help_text().map(str::to_owned), }, InterpreterError::Resolver { cause } => Self { - message: cause.to_string(), + message: format!("ResolverError: {}", cause), span: Some(span_to_source_span(cause.span())), label: "related to this", help: None, }, - InterpreterError::Evaluation(cause) => Self { - message: cause.to_string(), + InterpreterError::Compiler { cause } => Self { + message: format!("CompilerError: {}", cause), span: Some(span_to_source_span(cause.span())), label: "related to this", - help: cause.help_text().map(str::to_owned), + help: None, + }, + InterpreterError::Vm(err) => Self { + message: format!("VMError: {}", err.message), + span: err.span.map(span_to_source_span), + label: "related to this", + help: None, }, } } diff --git a/ndc_bin/src/docs.rs b/ndc_bin/src/docs.rs index 5101e83f..878c588d 100644 --- a/ndc_bin/src/docs.rs +++ b/ndc_bin/src/docs.rs @@ -1,89 +1,141 @@ +use ndc_core::{Parameter, StaticType, TypeSignature}; use ndc_interpreter::Interpreter; -use ndc_interpreter::function::{Parameter, TypeSignature}; -use ndc_stdlib::WithStdlib; use std::cmp::Ordering; use std::fmt::Write; use strsim::normalized_damerau_levenshtein; -use tap::Tap; -use termimad::crossterm::style::Stylize; -use termimad::{Alignment, MadSkin}; +use yansi::Paint; -/// Returns `true` if `needle` is a substring of `haystack` or if they are at least 80% similar fn string_match(needle: &str, haystack: &str) -> bool { haystack.contains(needle) || normalized_damerau_levenshtein(needle, haystack) > 0.8 } -pub fn docs(query: Option<&str>) -> anyhow::Result<()> { - let interpreter = Interpreter::new(Vec::new()) // Discard the output - .with_stdlib(); +fn terminal_width() -> usize { + termimad::crossterm::terminal::size() + .map(|(w, _)| w as usize) + .unwrap_or(80) + .min(120) +} + +/// Wraps `text` to `max_width` columns, indenting every line with `indent`. +/// Blank lines in the input are treated as paragraph breaks and preserved. +fn wrap_text(text: &str, max_width: usize, indent: &str) -> String { + let available = max_width.saturating_sub(indent.len()); + let mut result = String::new(); + + for (i, paragraph) in text.split("\n\n").enumerate() { + if i > 0 { + result.push('\n'); + result.push_str(indent); + result.push('\n'); + } + result.push_str(indent); + let mut col = 0usize; + for word in paragraph.split_whitespace() { + if col > 0 && col + 1 + word.len() > available { + result.push('\n'); + result.push_str(indent); + col = 0; + } else if col > 0 { + result.push(' '); + col += 1; + } + result.push_str(word); + col += word.len(); + } + } + + result +} + +pub fn docs(query: Option<&str>, no_color: bool) -> anyhow::Result<()> { + if no_color { + yansi::disable(); + } else { + yansi::whenever(yansi::Condition::TTY_AND_COLOR); + } - let functions = interpreter.environment().borrow().get_all_functions(); + let mut interpreter = Interpreter::capturing(); + interpreter.configure(ndc_stdlib::register); - let matched_functions = functions - .into_iter() + let mut functions: Vec<_> = interpreter + .functions() .filter(|func| { if let Some(query) = query { - string_match(query, func.name()) + string_match(query, &func.name) } else { true } }) - .collect::>() - .tap_mut(|list| { - list.sort_by(|l, r| { - if let Some(query) = query { - normalized_damerau_levenshtein(l.name(), query) - .partial_cmp(&normalized_damerau_levenshtein(r.name(), query)) - .unwrap_or(Ordering::Equal) - .reverse() - } else { - l.name().cmp(r.name()) - } - }) - }); + .collect(); + + functions.sort_by(|l, r| { + if let Some(query) = query { + normalized_damerau_levenshtein(&l.name, query) + .partial_cmp(&normalized_damerau_levenshtein(&r.name, query)) + .unwrap_or(Ordering::Equal) + .reverse() + } else { + l.name.cmp(&r.name) + } + }); - let mut skin = MadSkin::default(); + let width = terminal_width(); + + for (i, function) in functions.iter().enumerate() { + if i > 0 { + println!(); + } - skin.headers[0].align = Alignment::Left; - skin.headers[1].align = Alignment::Left; - skin.headers[2].align = Alignment::Left; + let (type_sig, return_type) = match &function.static_type { + StaticType::Function { + parameters, + return_type, + } => { + let sig = match parameters { + None => TypeSignature::Variadic, + Some(types) => TypeSignature::Exact( + types + .iter() + .enumerate() + .map(|(i, t)| Parameter::new(format!("arg{i}"), t.clone())) + .collect(), + ), + }; + (sig, return_type.as_ref()) + } + other => (TypeSignature::Variadic, other), + }; - for function in matched_functions { - let mut signature = String::new(); - let type_sig = function.type_signature(); - match type_sig { + let mut line = String::new(); + write!(line, "{}", function.name.bold().yellow())?; + match &type_sig { TypeSignature::Variadic => { - write!(signature, "(*args**)")?; + write!(line, "{}", "(...)".dim())?; } TypeSignature::Exact(params) => { - write!(signature, "(")?; - let mut param_iter = params.iter().peekable(); - while let Some(Parameter { name, type_name }) = param_iter.next() { - write!( - signature, - "*{name}*: **{}**", - format!("{}", type_name).green() - )?; - - if param_iter.peek().is_some() { - write!(signature, ", ")?; + write!(line, "(")?; + for (i, Parameter { name, type_name }) in params.iter().enumerate() { + if i > 0 { + write!(line, "{}", ", ".dim())?; } + write!(line, "{}", name.italic())?; + write!(line, "{}", ":".dim())?; + write!(line, " {}", type_name.to_string().cyan())?; } - - write!(signature, ")")?; + write!(line, ")")?; } } - let name = function.name(); - let documentation = function.documentation().trim(); - let return_type = function.return_type(); - let markdown = format!( - "---\n\n## **{}**{signature} -> {}\n\n{documentation}{}", - name.green(), - format!("{}", return_type).green().bold(), - if documentation.is_empty() { "" } else { "\n\n" } - ); + write!(line, " {} ", "->".dim())?; + write!(line, "{}", return_type.to_string().bold().cyan())?; + println!("{line}"); - skin.print_text(&markdown); + if let Some(docs) = &function.documentation { + let docs = docs.trim(); + if !docs.is_empty() { + let wrapped = wrap_text(docs, width, " "); + println!("{}", wrapped.dim()); + } + } } Ok(()) diff --git a/ndc_bin/src/main.rs b/ndc_bin/src/main.rs index e7da5005..0313bb7e 100644 --- a/ndc_bin/src/main.rs +++ b/ndc_bin/src/main.rs @@ -6,7 +6,6 @@ use clap::{Parser, Subcommand}; use highlighter::{AndycppHighlighter, AndycppHighlighterState}; use miette::{NamedSource, highlighters::HighlighterState}; use ndc_interpreter::{Interpreter, InterpreterError}; -use ndc_stdlib::WithStdlib; use std::path::PathBuf; use std::process; use std::{fs, io::Write}; @@ -20,7 +19,7 @@ mod highlighter; #[derive(Parser)] #[command(name = "Andy C++")] #[command(author = "Tim Fennis ")] -#[command(version = "0.2.0")] +#[command(version)] #[command(about = "An interpreter for the Andy C++ language")] struct Cli { #[arg(short = 'C', long, default_value_t = 1)] @@ -43,8 +42,16 @@ enum Command { stdio: bool, }, + /// Print the disassembled bytecode for an .ndc file + Disassemble { file: PathBuf }, + /// Output the documentation optionally searched using a query string - Docs { query: Option }, + Docs { + query: Option, + /// Disable color output + #[arg(long)] + no_color: bool, + }, // This is a fallback case #[command(external_subcommand)] @@ -59,10 +66,16 @@ impl Default for Command { enum Action { RunLsp, - RunFile(PathBuf), + RunFile { + path: PathBuf, + }, + DisassembleFile(PathBuf), HighlightFile(PathBuf), StartRepl, - Docs(Option), + Docs { + query: Option, + no_color: bool, + }, } impl TryFrom for Action { @@ -70,18 +83,21 @@ impl TryFrom for Action { fn try_from(value: Command) -> Result { let action = match value { - Command::Run { file: Some(file) } => Self::RunFile(file), + Command::Run { file: Some(file) } => Self::RunFile { path: file }, Command::Run { file: None } => Self::StartRepl, Command::Lsp { stdio: _ } => Self::RunLsp, + Command::Disassemble { file } => Self::DisassembleFile(file), Command::Highlight { file } => Self::HighlightFile(file), - Command::Docs { query } => Self::Docs(query), + Command::Docs { query, no_color } => Self::Docs { query, no_color }, Command::Unknown(args) => { match args.len() { 0 => { // This case should have defaulted to `Command::Run { file: None }` unreachable!("fallback case reached with 0 arguments (should never happen)") } - 1 => Self::RunFile(args[0].parse::().context("invalid path")?), + 1 => Self::RunFile { + path: args[0].parse::().context("invalid path")?, + }, n => return Err(anyhow!("invalid number of arguments: {n}")), } } @@ -110,7 +126,7 @@ fn main() -> anyhow::Result<()> { let action: Action = cli.command.unwrap_or_default().try_into()?; match action { - Action::RunFile(path) => { + Action::RunFile { path } => { let filename = path .file_name() .and_then(|name| name.to_str()) @@ -118,9 +134,9 @@ fn main() -> anyhow::Result<()> { let string = fs::read_to_string(path)?; - let stdout = std::io::stdout(); - let mut interpreter = Interpreter::new(stdout).with_stdlib(); - match into_miette_result(interpreter.run_str(&string)) { + let mut interpreter = Interpreter::new(); + interpreter.configure(ndc_stdlib::register); + match into_miette_result(interpreter.eval(&string)) { // we can just ignore successful runs because we have print statements Ok(_final_value) => {} Err(report) => { @@ -133,6 +149,18 @@ fn main() -> anyhow::Result<()> { } } } + Action::DisassembleFile(path) => { + let string = fs::read_to_string(path)?; + let mut interpreter = Interpreter::new(); + interpreter.configure(ndc_stdlib::register); + match interpreter.disassemble_str(&string) { + Ok(output) => print!("{output}"), + Err(e) => { + eprintln!("{:?}", miette::Report::new(diagnostic::NdcReport::from(e))); + process::exit(1); + } + } + } Action::HighlightFile(path) => { let string = fs::read_to_string(path)?; @@ -143,7 +171,7 @@ fn main() -> anyhow::Result<()> { } std::io::stdout().flush()?; } - Action::Docs(query) => return docs(query.as_deref()), + Action::Docs { query, no_color } => return docs(query.as_deref(), no_color), Action::StartRepl => { repl::run()?; } @@ -164,7 +192,7 @@ fn start_lsp() { .enable_all() .build() .expect("Failed building the Runtime") - .block_on(async { ndc_lsp::start_lsp().await }); + .block_on(async { ndc_lsp::start_lsp(ndc_stdlib::register).await }); } } diff --git a/ndc_bin/src/repl.rs b/ndc_bin/src/repl.rs index d6e1fc4e..7f8cd98c 100644 --- a/ndc_bin/src/repl.rs +++ b/ndc_bin/src/repl.rs @@ -2,7 +2,6 @@ use itertools::Itertools; use miette::highlighters::HighlighterState; use ndc_interpreter::Interpreter; -use ndc_stdlib::WithStdlib; use rustyline::Helper; use rustyline::config::Configurer; use rustyline::error::ReadlineError; @@ -36,8 +35,8 @@ pub fn run() -> anyhow::Result<()> { rl.set_color_mode(ColorMode::Enabled); rl.set_helper(Some(h)); - let stdout = std::io::stdout(); - let mut interpreter = Interpreter::new(stdout).with_stdlib(); + let mut interpreter = Interpreter::new(); + interpreter.configure(ndc_stdlib::register); loop { match rl.readline("λ ") { Ok(line) => { @@ -45,8 +44,9 @@ pub fn run() -> anyhow::Result<()> { let _ = rl.add_history_entry(line.as_str()); // Run the line we just read through the interpreter - match into_miette_result(interpreter.run_str(line.as_str())) { - Ok(output) => { + match into_miette_result(interpreter.eval(line.as_str())) { + Ok(value) => { + let output = value.to_string(); if !output.is_empty() { println!("{output}") } diff --git a/ndc_core/Cargo.toml b/ndc_core/Cargo.toml index 0de49715..61c174c3 100644 --- a/ndc_core/Cargo.toml +++ b/ndc_core/Cargo.toml @@ -5,7 +5,7 @@ version.workspace = true [dependencies] ahash.workspace = true -ndc_parser.workspace = true +itertools.workspace = true num.workspace = true ordered-float.workspace = true ryu.workspace = true diff --git a/ndc_core/src/lib.rs b/ndc_core/src/lib.rs index e99a7fd0..0394894d 100644 --- a/ndc_core/src/lib.rs +++ b/ndc_core/src/lib.rs @@ -2,3 +2,31 @@ pub mod compare; pub mod hash_map; pub mod int; pub mod num; +pub mod static_type; + +pub use static_type::{Parameter, StaticType, TypeSignature}; +use std::slice::Iter; + +pub struct FunctionRegistry { + functions: Vec, +} + +impl Default for FunctionRegistry { + fn default() -> Self { + Self { functions: vec![] } + } +} + +impl FunctionRegistry { + pub fn declare_global_fn(&mut self, f: T) { + self.functions.push(f) + } + + pub fn iter(&'_ self) -> Iter<'_, T> { + self.functions.iter() + } + + pub fn take(&mut self) -> Vec { + std::mem::take(&mut self.functions) + } +} diff --git a/ndc_core/src/num.rs b/ndc_core/src/num.rs index 355881f6..5dfc32c6 100644 --- a/ndc_core/src/num.rs +++ b/ndc_core/src/num.rs @@ -4,8 +4,8 @@ use std::hash::{Hash, Hasher}; use std::num::TryFromIntError; use std::ops::{Add, Div, Mul, Neg, Not, Rem, Sub}; +use crate::StaticType; use crate::int::Int; -use ndc_parser::{BinaryOperator, StaticType}; use num::bigint::TryFromBigIntError; use num::complex::{Complex64, ComplexFloat}; use num::{BigInt, BigRational, Complex, FromPrimitive, Signed, ToPrimitive, Zero}; @@ -178,7 +178,6 @@ impl Not for Number { fn not(self) -> Self::Output { match self { Self::Int(int) => int.not().into(), - // TODO: bitwise negation of all non integer numbers in Noulith result in NAN, is that what we want for our language too? _ => f64::NAN.into(), } } @@ -219,11 +218,7 @@ impl BinaryOperatorError { Self(message) } - pub fn undefined_operation( - operator: BinaryOperator, - left: &StaticType, - right: &StaticType, - ) -> Self { + pub fn undefined_operation(operator: &str, left: &StaticType, right: &StaticType) -> Self { Self(format!( "operator {operator} is not defined for {left} and {right}" )) @@ -324,7 +319,6 @@ impl_binary_operator_all!(Rem, rem, Rem::rem, Rem::rem, Rem::rem, Rem::rem); impl Div<&Number> for &Number { type Output = Number; - /// TODO: always converting operands to rational numbers is needlessly slow in some cases fn div(self, rhs: &Number) -> Self::Output { match (self.to_rational(), rhs.to_rational()) { (Some(left), Some(right)) if !right.is_zero() => Number::rational(left / right), @@ -392,7 +386,7 @@ impl Number { (Self::Float(p1), Self::Float(p2)) => Ok(Self::Float(p1.rem_euclid(p2))), (left, right) => Err(BinaryOperatorError::undefined_operation( - BinaryOperator::EuclideanModulo, + "%%", &left.static_type(), &right.static_type(), )), @@ -479,9 +473,7 @@ impl Number { if let Some(bi) = BigInt::from_f64(*f) { Self::Int(Int::BigInt(bi).simplified()) } else { - return Err(NumberConversionError(format!( - "cannot convert {f} to int" - ))); + return Err(NumberConversionError(format!("cannot convert {f} to int"))); } } Self::Rational(r) => Self::Int(Int::BigInt(r.to_integer()).simplified()), @@ -587,7 +579,6 @@ macro_rules! implement_rounding { Number::Float(f) } } - // TODO: fix bigint -> int Number::Rational(r) => Number::Int(Int::BigInt(r.$method().to_integer())), Number::Complex(c) => Complex::new(c.re.$method(), c.im.$method()).into(), } diff --git a/ndc_parser/src/static_type.rs b/ndc_core/src/static_type.rs similarity index 95% rename from ndc_parser/src/static_type.rs rename to ndc_core/src/static_type.rs index 732b0202..a7567f8b 100644 --- a/ndc_parser/src/static_type.rs +++ b/ndc_core/src/static_type.rs @@ -7,6 +7,12 @@ pub enum TypeSignature { Exact(Vec), } +impl Default for TypeSignature { + fn default() -> Self { + Self::Exact(vec![]) + } +} + impl TypeSignature { /// Matches a list of `ValueTypes` to a type signature. It can return `None` if there is no match or /// `Some(num)` where num is the sum of the distances of the types. The type `Int`, is distance 1 @@ -42,6 +48,13 @@ impl TypeSignature { Self::Exact(args) => Some(args.len()), } } + + pub fn types(&self) -> Option> { + match self { + Self::Variadic => None, + Self::Exact(v) => Some(v.iter().map(|p| p.type_name.clone()).collect()), + } + } } #[derive(Debug, Clone, Eq, PartialEq, Hash)] @@ -59,15 +72,16 @@ impl Parameter { } } -#[derive(Debug, Clone, Eq, PartialEq, Hash)] +#[derive(Debug, Clone, Eq, PartialEq, Hash, Default)] pub enum StaticType { + #[default] Any, Bool, Function { - parameters: Option>, - return_type: Box, + parameters: Option>, + return_type: Box, }, - Option(Box), + Option(Box), // Numbers Number, @@ -77,18 +91,18 @@ pub enum StaticType { Complex, // Sequences List -> List - Sequence(Box), - List(Box), + Sequence(Box), + List(Box), String, - Tuple(Vec), + Tuple(Vec), Map { - key: Box, - value: Box, + key: Box, + value: Box, }, - Iterator(Box), - MinHeap(Box), - MaxHeap(Box), - Deque(Box), + Iterator(Box), + MinHeap(Box), + MaxHeap(Box), + Deque(Box), } impl StaticType { @@ -448,6 +462,13 @@ impl StaticType { self.sequence_element_type() } + /// Returns `true` if this type could be a callable function at runtime. + /// This is true for `Function` types and for `Any` (which might be a + /// function at runtime). Concrete non-function types return `false`. + pub fn could_be_callable(&self) -> bool { + matches!(self, Self::Function { .. } | Self::Any) + } + pub fn is_fn_and_matches(&self, types: &[Self]) -> bool { // If the thing is not a function we're not interested let Self::Function { parameters, .. } = self else { diff --git a/ndc_interpreter/Cargo.toml b/ndc_interpreter/Cargo.toml index 1420330e..b23d6247 100644 --- a/ndc_interpreter/Cargo.toml +++ b/ndc_interpreter/Cargo.toml @@ -4,15 +4,13 @@ name = "ndc_interpreter" edition.workspace = true version.workspace = true +[features] +vm-trace = ["ndc_vm/vm-trace"] + [dependencies] -anyhow.workspace = true -derive_more.workspace = true -derive_builder.workspace = true -itertools.workspace = true +ndc_analyser.workspace = true ndc_core.workspace = true +ndc_vm = { workspace = true } ndc_lexer.workspace = true ndc_parser.workspace = true -num.workspace = true -self_cell.workspace = true thiserror.workspace = true - diff --git a/ndc_interpreter/src/environment.rs b/ndc_interpreter/src/environment.rs deleted file mode 100644 index 73f1271c..00000000 --- a/ndc_interpreter/src/environment.rs +++ /dev/null @@ -1,198 +0,0 @@ -use crate::function::{Function, StaticType}; - -use crate::value::Value; -use ndc_parser::ResolvedVar; -use std::cell::RefCell; -use std::fmt; -use std::fmt::Formatter; -use std::io::{Stdout, Write, stdout}; -use std::rc::Rc; - -pub struct RootEnvironment { - pub output: Box, - // These are global values - global_functions: Vec, -} - -pub struct Environment { - root: Rc>, - parent: Option>>, - values: Vec, -} - -impl fmt::Debug for Environment { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!( - f, - "Environment[has_parent: {:?}, values.len(): {}]", - self.parent.is_some(), - self.values.len(), - ) - } -} - -impl Environment { - pub fn with_output(&mut self, f: F) -> Result<(), std::io::Error> - where - F: FnOnce(&mut Box) -> Result<(), std::io::Error>, - { - let mut root = self.root.borrow_mut(); - let output = &mut root.output; - f(output) - } - - #[must_use] - pub fn get_output(&self) -> Option> { - let root = self.root.clone(); - let root = root.borrow(); - let output = root.output.get_output(); - output.cloned() - } - - #[must_use] - pub fn new(writer: Box) -> Self { - let root = RootEnvironment { - output: writer, - global_functions: Default::default(), - }; - - Self { - root: Rc::new(RefCell::new(root)), - parent: None, - values: Default::default(), - } - } - - pub fn get_global_identifiers(&self) -> Vec<(String, StaticType)> { - self.root - .borrow() - .global_functions - .iter() - .map(|function| (function.name().to_string(), function.static_type())) - .collect::>() - } - - #[must_use] - pub fn get_all_functions(&self) -> Vec { - self.root.borrow().global_functions.clone() - } - - pub fn set(&mut self, var: ResolvedVar, value: Value) { - match var { - ResolvedVar::Captured { depth: 0, slot } => { - if self.values.len() > slot { - self.values[slot] = value - } else { - debug_assert!(slot == self.values.len()); - self.values.push(value); - } - } - - // Recursively insert - ResolvedVar::Captured { depth, slot } => { - self.parent - .clone() - .expect("tried to get parent but failed") - .borrow_mut() - .set( - ResolvedVar::Captured { - depth: depth - 1, - slot, - }, - value, - ); - } - ResolvedVar::Global { .. } => { - unreachable!("cannot assign value to global") - } - } - } - - /// Declare a function globally using its self-exposed name, if there already exists a function - /// with the same name it's simply overloaded. - pub fn declare_global_fn(&mut self, function: impl Into) { - let new_function = function.into(); - - let root: &mut RootEnvironment = &mut self.root.borrow_mut(); - - root.global_functions.push(new_function.clone()); - } - - fn get_copy_from_slot(&self, depth: usize, slot: usize) -> Value { - if depth == 0 { - assert!( - self.values.len() > slot, - "failed to take item out of slot {slot} because it was empty" - ); - self.values[slot].clone() - } else { - self.parent - .clone() - .expect("expected parent env did not exist") - .borrow() - .get_copy_from_slot(depth - 1, slot) - } - } - - #[must_use] - pub fn get(&self, var: ResolvedVar) -> Value { - match var { - ResolvedVar::Captured { depth, slot } => self.get_copy_from_slot(depth, slot), - ResolvedVar::Global { slot } => { - Value::function(self.root.borrow().global_functions[slot].clone()) - } - } - } - - /// Takes the named variable from memory and leaves `Value::unit()` in its place - #[must_use] - pub fn take(&mut self, var: ResolvedVar) -> Option { - match var { - ResolvedVar::Captured { depth: 0, slot } => Some(std::mem::replace( - self.values.get_mut(slot).expect("slot can't be empty"), - Value::unit(), - )), - ResolvedVar::Captured { depth, slot } => self - .parent - .clone() - .expect("expected parent env did not exist") - .borrow_mut() - .take(ResolvedVar::Captured { - depth: depth - 1, - slot, - }), - ResolvedVar::Global { .. } => panic!("cannot take global variable from environment"), - } - } - - pub fn new_scope(parent: &Rc>) -> Self { - let root_ref = Rc::clone(&parent.borrow().root); - Self { - parent: Some(parent.clone()), - root: root_ref, - values: Default::default(), - } - } -} - -impl Default for Environment { - fn default() -> Self { - Self::new(Box::new(stdout())) - } -} - -pub trait InterpreterOutput: Write { - fn get_output(&self) -> Option<&Vec>; -} - -impl InterpreterOutput for Vec { - fn get_output(&self) -> Option<&Vec> { - Some(self) - } -} - -impl InterpreterOutput for Stdout { - fn get_output(&self) -> Option<&Vec> { - None - } -} diff --git a/ndc_interpreter/src/evaluate/index.rs b/ndc_interpreter/src/evaluate/index.rs deleted file mode 100644 index 0a6dc9b5..00000000 --- a/ndc_interpreter/src/evaluate/index.rs +++ /dev/null @@ -1,359 +0,0 @@ -//! # Indexing -//! -//! Visual explanation of how indexing works -//! -//! +----------------+-----+----+----+----+----+----+----+----+----+----+ -//! | List values | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -//! +----------------+-----+----+----+----+----+----+----+----+----+----+ -//! | Forward index | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -//! | Backward index | -10 | -9 | -8 | -7 | -6 | -5 | -4 | -3 | -2 | -1 | -//! +----------------+-----+----+----+----+----+----+----+----+----+----+ - -use super::{EvaluationError, EvaluationResult, IntoEvaluationResult, evaluate_expression}; -use crate::environment::Environment; -use crate::{function::FunctionCarrier, sequence::Sequence, value::Value}; -use itertools::Itertools; -use ndc_lexer::Span; -use ndc_parser::{Expression, ExpressionLocation}; -use std::cell::RefCell; -use std::cmp::min; -use std::ops::IndexMut; -use std::rc::Rc; - -#[derive(Clone)] -pub enum EvaluatedIndex { - Index(Value), - Slice { - from: Option, - to: Option, - inclusive: bool, - }, -} - -impl EvaluatedIndex { - // TODO: improve error contract so we don't need EvaluationError (maybe??) - pub fn try_into_offset(self, size: usize, span: Span) -> Result { - Ok(match self { - Self::Index(idx) => { - Offset::Element(value_to_bounded_forward_index(idx, size, false, span)?) - } - Self::Slice { - from, - to, - inclusive, - } => { - let from_idx = if let Some(from) = from { - value_to_bounded_forward_index(from, size, true, span)? - } else { - 0 - }; - - let to_idx = if let Some(to) = to { - value_to_bounded_forward_index(to, size, true, span)? - } else { - size - }; - - Offset::Range(from_idx, to_idx + usize::from(inclusive)) - } - }) - } -} - -pub(crate) fn evaluate_as_index( - expression_location: &ExpressionLocation, - environment: &Rc>, -) -> Result { - let (range_start, range_end, inclusive) = match expression_location.expression { - Expression::RangeExclusive { - start: ref range_start, - end: ref range_end, - } => (range_start, range_end, false), - Expression::RangeInclusive { - start: ref range_start, - end: ref range_end, - } => (range_start, range_end, true), - _ => { - let result = evaluate_expression(expression_location, environment)?; - return Ok(EvaluatedIndex::Index(result)); - } - }; - - if inclusive && range_end.is_none() { - return Err(EvaluationError::new( - "inclusive ranges must have an end".to_string(), - expression_location.span, - ) - .into()); - } - - let start = if let Some(range_start) = range_start { - Some(evaluate_expression(range_start, environment)?) - } else { - None - }; - - let end = if let Some(range_end) = range_end { - Some(evaluate_expression(range_end, environment)?) - } else { - None - }; - - Ok(EvaluatedIndex::Slice { - from: start, - to: end, - inclusive, - }) -} - -fn invalid_index_err(span: Span) -> impl Fn(T) -> EvaluationError { - move |_: T| { - EvaluationError::with_help( - "Invalid list index".to_string(), - span, - "The value used as a list index is not valid. List indices must be convertible to a signed 64-bit integer. Ensure the index is a valid integer within the range of -2^63 to 2^63-1".to_string(), - ) - } -} - -/// This function converts a native Andy C++ `Value` (hopefully a number) into a valid usize index -/// into a vector of size `size`. The `allow_oob` argument allows the argument to be out of bounds -/// which is needed when evaluating range expressions. -fn value_to_bounded_forward_index( - value: Value, - size: usize, - for_slice: bool, - span: Span, -) -> Result { - let index = i64::try_from(value).map_err(invalid_index_err(span))?; - - if index.is_negative() { - let index = usize::try_from(index.abs()) - .map_err(|_err| EvaluationError::new("invalid index: too large".to_string(), span))?; - - if for_slice { - Ok(size.saturating_sub(index)) - } else { - size.checked_sub(index) - .ok_or_else(|| EvaluationError::new("index out of bounds".to_string(), span)) - } - } else { - let index = usize::try_from(index).map_err(invalid_index_err(span))?; - if for_slice { - return Ok(min(index, size)); - } - - if index >= size { - return Err(EvaluationError::new( - "index out of bounds".to_string(), - span, - )); - } - Ok(index) - } -} - -#[derive(Clone, Copy, Eq, PartialEq)] -pub enum Offset { - Element(usize), - Range(usize, usize), -} - -impl Offset { - pub fn into_tuple(self) -> (usize, usize) { - match self { - Self::Element(idx) => (idx, idx + 1), - Self::Range(from, to) => (from, to), - } - } -} - -pub fn get_at_index( - lhs: &Value, - index: EvaluatedIndex, - span: Span, - environment: &Rc>, -) -> Result { - let Some(size) = lhs.sequence_length() else { - return Err(EvaluationError::new( - "cannot index into this type because it doesn't have a length".to_string(), - span, - ) - .into()); - }; - - match lhs { - Value::Sequence(Sequence::List(list)) => { - let list = list.borrow(); - let index = index.try_into_offset(size, span)?; - - match index { - Offset::Element(index_usize) => Ok(list[index_usize].clone()), - Offset::Range(from_usize, to_usize) => Ok(Value::list(&list[from_usize..to_usize])), - } - } - Value::Sequence(Sequence::String(insertion_target)) => { - let index = index.try_into_offset(size, span)?; - Ok(Value::string(match index { - Offset::Element(e) => insertion_target - .borrow() - .chars() - .nth(e) - .map(String::from) - .expect("Safe because bounds were already checked"), - Offset::Range(s, e) => insertion_target - .borrow() - .chars() - .dropping(s) - .take(e) - .collect::(), - })) - } - Value::Sequence(Sequence::Map(map, default)) => { - let key = match index { - EvaluatedIndex::Index(idx) => idx, - EvaluatedIndex::Slice { .. } => { - return Err(EvaluationError::syntax_error( - "cannot use range expression as index in map".to_string(), - span, - ) - .into()); - } - }; - - let value = map - .try_borrow() - .into_evaluation_result(span)? - .get(&key) - .cloned(); - - if let Some(value) = value { - Ok(value) - } else if let Some(default) = default { - let default_value = produce_default_value(default, environment, span)?; - map.try_borrow_mut() - .into_evaluation_result(span)? - .insert(key, default_value.clone()); - Ok(default_value) - } else { - Err(EvaluationError::key_not_found(&key, span).into()) - } - } - _ => Err(EvaluationError::syntax_error( - format!("cannot insert into {} at index", lhs.static_type()), - span, - ) - .into()), - } -} - -pub(super) fn produce_default_value( - default: &Value, - environment: &Rc>, - span: Span, -) -> EvaluationResult { - match default { - Value::Function(function) => match function.call_checked(&mut [], environment) { - Err(FunctionCarrier::FunctionTypeMismatch) => { - Err(FunctionCarrier::EvaluationError(EvaluationError::new( - "default function is not callable without arguments".to_string(), - span, - ))) - } - a => a, - }, - value => Ok(value.clone()), - } -} -pub fn set_at_index( - lhs: &mut Value, - rhs: Value, - index: EvaluatedIndex, - span: Span, -) -> Result<(), FunctionCarrier> { - let Some(size) = lhs.sequence_length() else { - return Err(EvaluationError::new( - "cannot index into this type because it doesn't have a length".to_string(), - span, - ) - .into()); - }; - - match lhs { - Value::Sequence(Sequence::List(list)) => { - let mut list = list.try_borrow_mut().map_err(|_err| { - EvaluationError::mutation_error( - "you cannot mutate a value in a list while you're iterating over this list", - span, - ) - })?; - - let index = index.try_into_offset(size, span)?; - - match index { - Offset::Element(index_usize) => { - let x = list.index_mut(index_usize); - *x = rhs; - } - Offset::Range(from_usize, to_usize) => { - let tail = list.drain(from_usize..).collect::>(); - - list.extend( - rhs.try_into_vec() - .expect("this must succeed, but not sure why") - .into_iter(), - ); - - list.extend_from_slice(&tail[(to_usize - from_usize)..]); - } - } - } - Value::Sequence(Sequence::String(insertion_target)) => { - if let Value::Sequence(Sequence::String(target_string)) = rhs { - let target_string = target_string.borrow(); - - let mut insertion_target = insertion_target.borrow_mut(); - - let index = index.try_into_offset(size, span)?; - - match index { - Offset::Element(index) => { - insertion_target.replace_range(index..=index, target_string.as_str()); - } - Offset::Range(from, to) => { - insertion_target.replace_range(from..to, target_string.as_str()); - } - } - } else { - return Err(EvaluationError::syntax_error( - format!("cannot insert {} into a string", rhs.static_type()), - span, - ) - .into()); - } - } - Value::Sequence(Sequence::Map(map, _)) => { - let mut map = map.try_borrow_mut().into_evaluation_result(span)?; - - let key = match index { - EvaluatedIndex::Index(idx) => idx, - EvaluatedIndex::Slice { .. } => { - return Err(EvaluationError::syntax_error( - "cannot use range expression as index".to_string(), - span, - ) - .into()); - } - }; - map.insert(key, rhs); - } - _ => { - return Err(EvaluationError::syntax_error( - format!("cannot insert into {} at index", lhs.static_type()), - span, - ) - .into()); - } - }; - Ok(()) -} diff --git a/ndc_interpreter/src/evaluate/mod.rs b/ndc_interpreter/src/evaluate/mod.rs deleted file mode 100644 index 60436285..00000000 --- a/ndc_interpreter/src/evaluate/mod.rs +++ /dev/null @@ -1,1089 +0,0 @@ -use crate::hash_map::HashMap; -use crate::environment::Environment; -use crate::function::{Function, FunctionBody, FunctionCarrier, StaticType}; -use crate::int::Int; -use crate::iterator::mut_value_to_iterator; -use crate::num::Number; -use crate::sequence::Sequence; -use crate::value::Value; -use index::{Offset, evaluate_as_index, get_at_index, set_at_index}; -use itertools::Itertools; -use ndc_lexer::Span; -use ndc_parser::{ - Binding, Expression, ExpressionLocation, ForBody, ForIteration, LogicalOperator, Lvalue, -}; -use std::cell::RefCell; -use std::fmt; -use std::rc::Rc; - -pub type EvaluationResult = Result; - -mod index; - -#[allow(clippy::too_many_lines)] -pub(crate) fn evaluate_expression( - expression_location: &ExpressionLocation, - environment: &Rc>, -) -> EvaluationResult { - let span = expression_location.span; - let literal: Value = match &expression_location.expression { - Expression::BoolLiteral(b) => Value::Bool(*b), - Expression::StringLiteral(s) => Value::string(s), - Expression::Int64Literal(n) => Value::Number(Number::Int(Int::Int64(*n))), - Expression::BigIntLiteral(n) => Value::Number(Number::Int(Int::BigInt(n.clone()))), - Expression::Float64Literal(n) => Value::Number(Number::Float(*n)), - Expression::ComplexLiteral(n) => Value::Number(Number::Complex(*n)), - Expression::Grouping(expr) => evaluate_expression(expr, environment)?, - Expression::Identifier { name, resolved } => { - if name == "None" { - return Ok(Value::none()); - } - - match resolved { - Binding::None => panic!("binding not resolved at runtime"), - Binding::Resolved(resolved) => environment.borrow().get(*resolved), - Binding::Dynamic(_) => panic!("attempted to evaluate dynamic binding"), - } - } - Expression::VariableDeclaration { l_value, value } => { - let value = evaluate_expression(value, environment)?; - declare_or_assign_variable(l_value, value, environment, span)?; - Value::unit() - } - Expression::Assignment { - l_value, - r_value: value, - } => match l_value { - l_value @ (Lvalue::Identifier { .. } | Lvalue::Sequence(_)) => { - let value = evaluate_expression(value, environment)?; - declare_or_assign_variable(l_value, value, environment, span)? - } - Lvalue::Index { - value: lhs_expression, - index: index_expression, - } => { - let mut lhs = evaluate_expression(lhs_expression, environment)?; - - // the computation of this value may need the list that we assign to, - // therefore the value needs to be computed before we mutably borrow the list - // see: `bug0001_in_place_map.ndct` - let rhs = evaluate_expression(value, environment)?; - - let index = evaluate_as_index(index_expression, environment)?; - - set_at_index(&mut lhs, rhs, index, span)?; - - Value::unit() - } - }, - Expression::OpAssignment { - l_value, - r_value, - resolved_assign_operation, - resolved_operation, - .. - } => { - match l_value { - Lvalue::Identifier { - identifier, - resolved: resolved_l_value, - .. - } => { - let resolved_l_value = resolved_l_value.expect("lvalue must be resolved"); - let rhs = evaluate_expression(r_value, environment)?; - - // TODO: this statement does damage which isn't reverted when for instance we can't find the function - let Some(lhs) = environment.borrow_mut().take(resolved_l_value) else { - return Err(EvaluationError::undefined_variable(identifier, span).into()); - }; - - let types = [lhs.static_type(), rhs.static_type()]; - let mut arguments = [lhs, rhs]; - - let mut operations_to_try = [ - ( - resolve_dynamic_binding(resolved_assign_operation, &types, environment), - true, - ), - ( - resolve_dynamic_binding(resolved_operation, &types, environment), - false, - ), - ] - .into_iter() - .filter_map(|(value, in_place)| value.map(|value| (value, in_place))) - .peekable(); - - while let Some((operation, modified_in_place)) = operations_to_try.next() { - let Value::Function(func) = operation else { - unreachable!( - "the resolver pass should have guaranteed that the operation points to a function" - ); - }; - // (&func, &mut arguments, environment, span) - let result = match func.call_checked(&mut arguments, environment) { - Err(FunctionCarrier::FunctionTypeMismatch) - if operations_to_try.peek().is_none() => - { - let argument_string = - arguments.iter().map(Value::static_type).join(", "); - - return Err(FunctionCarrier::EvaluationError( - EvaluationError::new( - format!( - "no function called 'TODO FIGURE OUT NAME' found matches the arguments: ({argument_string})" - ), - span, - ), - )); - } - Err(FunctionCarrier::FunctionTypeMismatch) => continue, - Err(carrier @ FunctionCarrier::IntoEvaluationError(_)) => { - return Err(carrier.lift_if(span)); - } - eval_result => eval_result?, - }; - - if modified_in_place { - environment.borrow_mut().set( - resolved_l_value, - std::mem::replace(&mut arguments[0], Value::unit()), - ); - } else { - environment.borrow_mut().set(resolved_l_value, result); - } - - break; // LMAO!?! - } - - Value::unit() - } - // NOTE THIS IS AN ABSOLUTE MESS BUT WE'LL FIX IT LATER - Lvalue::Index { - value: lhs_expression, - index: index_expression, - } => { - let mut lhs_value = evaluate_expression(lhs_expression, environment)?; - let index = evaluate_as_index(index_expression, environment)?; - let value_at_index = - get_at_index(&lhs_value, index.clone(), span, environment)?; - - let right_value = evaluate_expression(r_value, environment)?; - - let types = [value_at_index.static_type(), right_value.static_type()]; - let mut operations_to_try = [ - ( - resolve_dynamic_binding(resolved_assign_operation, &types, environment), - true, - ), - ( - resolve_dynamic_binding(resolved_operation, &types, environment), - false, - ), - ] - .into_iter() - .filter_map(|(value, in_place)| value.map(|value| (value, in_place))) - .peekable(); - - while let Some((operation_val, modified_in_place)) = operations_to_try.next() { - let Value::Function(func) = operation_val else { - unreachable!( - "the resolver pass should have guaranteed that the operation points to a function" - ); - }; - - let result = match func.call_checked( - &mut [value_at_index.clone(), right_value.clone()], - environment, - ) { - Err(FunctionCarrier::FunctionTypeMismatch) - if operations_to_try.peek().is_none() => - { - return Err(FunctionCarrier::EvaluationError( - EvaluationError::new( - format!( - "no function called 'TODO FIGURE OUT NAME' found matches the arguments: ({}, {})", - value_at_index.static_type(), - right_value.static_type() - ), - span, - ), - )); - } - Err(FunctionCarrier::FunctionTypeMismatch) => continue, - Err(carrier @ FunctionCarrier::IntoEvaluationError(_)) => { - return Err(carrier.lift_if(span)); - } - eval_result => eval_result?, - }; - - if !modified_in_place { - set_at_index(&mut lhs_value, result, index.clone(), span)?; - } - - break; - } - - Value::unit() - } - Lvalue::Sequence(_) => { - return Err(EvaluationError::syntax_error( - "cannot use augmented assignment in combination with destructuring" - .to_string(), - span, - ) - .into()); - } - } - } - Expression::Block { statements } => { - let local_scope = Rc::new(RefCell::new(Environment::new_scope(environment))); - - let mut value = Value::unit(); - for stm in statements { - value = evaluate_expression(stm, &local_scope)?; - } - - drop(local_scope); - value - } - Expression::If { - condition, - on_true, - on_false, - } => { - let result = evaluate_expression(condition, environment)?; - - match (result, on_false) { - (Value::Bool(true), _) => evaluate_expression(on_true, environment)?, - (Value::Bool(false), Some(block)) => evaluate_expression(block, environment)?, - (Value::Bool(false), None) => Value::unit(), - (value, _) => { - return Err(EvaluationError::new( - format!( - "mismatched types: expected {}, found {}", - StaticType::Bool, - value.static_type() - ), - span, - ) - .into()); - } - } - } - Expression::Statement(expression) => { - evaluate_expression(expression, environment)?; - Value::unit() - } - Expression::Logical { - operator, - left, - right, - } => { - let left = evaluate_expression(left, environment)?; - match (operator, left) { - (LogicalOperator::And, Value::Bool(true)) - | (LogicalOperator::Or, Value::Bool(false)) => { - evaluate_expression(right, environment)? - } - (LogicalOperator::And, Value::Bool(false)) => Value::Bool(false), - (LogicalOperator::Or, Value::Bool(true)) => Value::Bool(true), - (LogicalOperator::And | LogicalOperator::Or, value) => { - return Err(EvaluationError::new( - format!( - "Cannot apply logical operator to non bool value {}", - value.static_type() - ), - span, - ) - .into()); - } - } - } - Expression::While { - expression, - loop_body, - } => { - loop { - let lit = evaluate_expression(expression, environment)?; - if lit == Value::Bool(true) { - let result = evaluate_expression(loop_body, environment); - match result { - Err(FunctionCarrier::Break(value)) => return Ok(value), - Err(FunctionCarrier::Continue) | Ok(_) => {} - Err(err) => return Err(err), - } - } else if lit == Value::Bool(false) { - break; - } else { - return Err(EvaluationError::new( - "Expression in a while structure must return a bool".to_string(), - span, - ) - .into()); - } - } - Value::unit() - } - Expression::Call { - function, - arguments, - } => { - let mut evaluated_args = Vec::new(); - - for argument in arguments { - let arg = evaluate_expression(argument, environment)?; - evaluated_args.push(arg); - } - - resolve_and_call(function, evaluated_args, environment, span)? - } - Expression::FunctionDeclaration { - parameters, - body, - resolved_name, - return_type, - pure, - .. - } => { - let mut user_function = FunctionBody::Closure { - parameter_names: parameters - .as_parameters() - .into_iter() - .map(|x| x.to_string()) - .collect(), - body: *body.clone(), - return_type: return_type.clone().unwrap_or_else(StaticType::unit), - environment: environment.clone(), - }; - - if *pure { - user_function = FunctionBody::Memoized { - cache: RefCell::default(), - function: Box::new(user_function), - } - } - - if let Some(resolved_name) = *resolved_name { - environment.borrow_mut().set( - resolved_name, - // TODO: put name in declaration? - Value::function(Function::from_body(user_function)), - ); - - Value::unit() - } else { - Value::function(Function::from_body(user_function)) - } - } - - Expression::Tuple { values } => { - let mut out_values = Vec::with_capacity(values.len()); - for value in values { - out_values.push(evaluate_expression(value, environment)?); - } - - Value::Sequence(Sequence::Tuple(Rc::new(out_values))) - } - Expression::List { values } => { - let mut values_out = Vec::with_capacity(values.len()); - for expression in values { - let v = evaluate_expression(expression, environment)?; - values_out.push(v); - } - Value::Sequence(Sequence::List(Rc::new(RefCell::new(values_out)))) - } - Expression::Map { values, default } => { - let mut hashmap = HashMap::with_capacity(values.len()); - for (key, value) in values { - let key = evaluate_expression(key, environment)?; - let value = if let Some(value) = value { - evaluate_expression(value, environment)? - } else { - Value::unit() - }; - - hashmap.insert(key, value); - } - - let default = if let Some(default) = default { - Some(Box::new(evaluate_expression(default, environment)?)) - } else { - None - }; - - Value::Sequence(Sequence::Map(Rc::new(RefCell::new(hashmap)), default)) - } - Expression::For { iterations, body } => { - let mut out_values = Vec::new(); - let result = - execute_for_iterations(iterations, body, &mut out_values, environment, span); - - match result { - Err(FunctionCarrier::Break(break_value)) => return Ok(break_value), - Err(FunctionCarrier::Continue) => unreachable!(), - Err(err) => return Err(err), - Ok(_) => {} - } - - match &**body { - ForBody::Block(_) => Value::unit(), - ForBody::List(_) => Value::list(out_values), - ForBody::Map { - key: _, - value: _, - default, - } => Value::Sequence(Sequence::Map( - Rc::new(RefCell::new( - out_values - .into_iter() - .map(TryInto::<(Value, Value)>::try_into) - .collect::, _>>() - .into_evaluation_result(span)?, - )), - default - .as_ref() - .map(|default| evaluate_expression(default, environment).map(Box::new)) - .transpose()?, - )), - } - } - Expression::Return { value } => { - return Err(FunctionCarrier::Return(evaluate_expression( - value, - environment, - )?)); - } - Expression::Break => return Err(FunctionCarrier::Break(Value::unit())), - Expression::Continue => return Err(FunctionCarrier::Continue), - Expression::Index { - value: lhs_expr, - index: index_expr, - } => { - let lhs_value = evaluate_expression(lhs_expr, environment)?; - - match lhs_value { - Value::Sequence(Sequence::String(string)) => { - let string = string.borrow(); - - let index = evaluate_as_index(index_expr, environment)? - .try_into_offset(string.chars().count(), index_expr.span)?; - - let (start, end) = index.into_tuple(); - let new = string - .chars() - .dropping(start) - .take(end - start) - .collect::(); - - new.into() - } - Value::Sequence(Sequence::List(list)) => { - let list_length = list.borrow().len(); - - let index = evaluate_as_index(index_expr, environment)? - .try_into_offset(list_length, index_expr.span)?; - - match index { - Offset::Element(usize_index) => { - let list = list.borrow(); - let Some(value) = list.get(usize_index) else { - return Err( - EvaluationError::out_of_bounds(index, index_expr.span).into() - ); - }; - value.clone() - } - Offset::Range(from_usize, to_usize) => { - let list = list.borrow(); - let Some(values) = list.get(from_usize..to_usize) else { - return Err( - EvaluationError::out_of_bounds(index, index_expr.span).into() - ); - }; - - Value::list(values) - } - } - } - Value::Sequence(Sequence::Tuple(tuple)) => { - let index = evaluate_as_index(index_expr, environment)? - .try_into_offset(tuple.len(), index_expr.span)?; - - match index { - Offset::Element(index_usize) => { - let Some(value) = tuple.get(index_usize) else { - return Err( - EvaluationError::out_of_bounds(index, index_expr.span).into() - ); - }; - - value.clone() - } - - Offset::Range(from_usize, to_usize) => { - let Some(values) = tuple.get(from_usize..to_usize) else { - return Err( - EvaluationError::out_of_bounds(index, index_expr.span).into() - ); - }; - - Value::Sequence(Sequence::Tuple(Rc::new(values.to_vec()))) - } - } - } - Value::Sequence(Sequence::Deque(deque)) => { - let list_length = deque.borrow().len(); - - let index = evaluate_as_index(index_expr, environment)? - .try_into_offset(list_length, index_expr.span)?; - - match index { - Offset::Element(usize_index) => { - let list = deque.borrow(); - let Some(value) = list.get(usize_index) else { - return Err( - EvaluationError::out_of_bounds(index, index_expr.span).into() - ); - }; - value.clone() - } - Offset::Range(from_usize, to_usize) => { - let list = deque.borrow(); - let out = list - .iter() - .dropping(from_usize) - .take(to_usize - from_usize) - .cloned() - .collect::>(); - - Value::list(out) - } - } - } - Value::Sequence(Sequence::Map(dict, default)) => { - let key = evaluate_expression(index_expr, environment)?; - // let dict = dict.borrow(); - - let value = { dict.borrow().get(&key).cloned() }; - - return if let Some(value) = value { - Ok(value) - } else if let Some(default) = default { - let default_value = index::produce_default_value( - &default, - environment, - // NOTE: this span points at the entire expression instead of the - // function that cannot be executed because we don't have that span here - // maybe we can check the function signature earlier when we do have the span - lhs_expr.span.merge(index_expr.span), - )?; - - // TODO: This borrow_mut can fail, handle it better!! - // NOTE: WHEN DOES IT FAIL!? - dict.borrow_mut().insert(key, default_value.clone()); - - Ok(default_value) - } else { - Err(EvaluationError::key_not_found(&key, index_expr.span).into()) - }; - } - value => { - return Err(EvaluationError::new( - format!("cannot index into {}", value.static_type()), - lhs_expr.span, - ) - .into()); - } - } - } - Expression::RangeInclusive { - start: range_start, - end: range_end, - } => { - let range_start = if let Some(range_start) = range_start { - evaluate_expression(range_start, environment)? - } else { - return Err(EvaluationError::new( - "ranges without a lower bound cannot be evaluated into a value".to_string(), - span, - ) - .into()); - }; - - let range_start = i64::try_from(range_start).into_evaluation_result(span)?; - - if let Some(range_end) = range_end { - let range_end = evaluate_expression(range_end, environment)?; - let range_end = i64::try_from(range_end).into_evaluation_result(span)?; - - Value::from(range_start..=range_end) - } else { - Value::from(range_start..) - } - } - Expression::RangeExclusive { - start: range_start, - end: range_end, - } => { - let range_start = if let Some(range_start) = range_start { - evaluate_expression(range_start, environment)? - } else { - return Err(EvaluationError::new( - "ranges without a lower bound cannot be evaluated into a value".to_string(), - span, - ) - .into()); - }; - - let range_start = i64::try_from(range_start).into_evaluation_result(span)?; - - if let Some(range_end) = range_end { - let range_end = evaluate_expression(range_end, environment)?; - let range_end = i64::try_from(range_end).into_evaluation_result(span)?; - - Value::from(range_start..range_end) - } else { - Value::from(range_start..) - } - } - }; - - Ok(literal) -} - -fn declare_or_assign_variable( - l_value: &Lvalue, - value: Value, - environment: &Rc>, - span: Span, -) -> EvaluationResult { - match l_value { - Lvalue::Identifier { resolved, .. } => { - environment - .borrow_mut() - .set(resolved.expect("must be resolved"), value.clone()); - } - Lvalue::Sequence(l_values) => { - let r_values = value.try_into_vec().ok_or_else(|| { - FunctionCarrier::EvaluationError(EvaluationError::syntax_error( - "failed to unpack non iterable value into pattern".to_string(), - span, - )) - })?; - - if l_values.len() != r_values.len() { - return Err(EvaluationError::syntax_error( - "failed to unpack value into pattern because the lengths do not match" - .to_string(), - span, - ) - .into()); - } - let mut iter = l_values.iter().zip(r_values); - - for (l_value, value) in iter.by_ref() { - declare_or_assign_variable(l_value, value, environment, span)?; - } - } - Lvalue::Index { - value: lhs_expr, - index, - } => { - let mut lhs = evaluate_expression(lhs_expr, environment)?; - - let index = evaluate_as_index(index, environment)?; - - set_at_index(&mut lhs, value, index, span)?; - } - }; - - Ok(Value::unit()) -} - -#[derive(thiserror::Error, Debug)] -#[error("{text}")] -pub struct EvaluationError { - text: String, - span: Span, - help_text: Option, -} - -impl EvaluationError { - #[must_use] - pub fn with_help(text: String, span: Span, help_text: String) -> Self { - Self { - text, - span, - help_text: Some(help_text), - } - } - - #[must_use] - pub fn undefined_variable(identifier: &str, span: Span) -> Self { - Self { - text: format!("Undefined variable '{identifier}'"), - span, - help_text: None, - } - } - - #[must_use] - pub fn new(message: String, span: Span) -> Self { - Self { - text: message, - span, - help_text: None, - } - } - - #[must_use] - pub fn mutation_error(message: &str, span: Span) -> Self { - Self { - text: format!("Mutation error: {message}"), - span, - help_text: None, - } - } - #[must_use] - pub fn type_error(message: String, span: Span) -> Self { - Self { - text: message, - span, - help_text: None, - } - } - #[must_use] - pub fn syntax_error(message: String, span: Span) -> Self { - Self { - text: message, - span, - help_text: None, - } - } - - #[must_use] - pub fn io_error(err: &std::io::Error, span: Span) -> Self { - Self { - text: format!("IO error: {err}"), - span, - help_text: None, - } - } - - #[must_use] - pub fn out_of_bounds(index: Offset, span: Span) -> Self { - match index { - Offset::Element(index) => Self { - text: format!("Index {index} out of bounds"), - span, - help_text: None, - }, - Offset::Range(from, to) => Self { - text: format!("Index {from}..{to} out of bounds"), - span, - help_text: None, - }, - } - } - - #[must_use] - pub fn key_not_found(key: &Value, span: Span) -> Self { - Self { - text: format!("Key not found in map: {key}"), - span, - help_text: None, - } - } - - #[must_use] - pub fn argument_error(message: &str, span: Span) -> Self { - Self { - text: message.to_string(), - span, - help_text: None, - } - } - - pub fn span(&self) -> Span { - self.span - } - - pub fn help_text(&self) -> Option<&str> { - self.help_text.as_deref() - } -} - -pub trait ErrorConverter: fmt::Debug + fmt::Display { - fn as_evaluation_error(&self, span: Span) -> EvaluationError; -} - -impl ErrorConverter for E -where - E: fmt::Debug + fmt::Display, -{ - fn as_evaluation_error(&self, span: Span) -> EvaluationError { - EvaluationError { - text: format!("{self}"), - span, - help_text: None, - } - } -} - -pub trait LiftEvaluationResult { - fn add_span(self, span: Span) -> Result; -} - -impl LiftEvaluationResult for Result { - fn add_span(self, span: Span) -> Self { - self.map_err(|err| err.lift_if(span)) - } -} - -// NOTE: this is called `IntoEvaluationResult` but it actually only takes care of the error part of an evaluation result. -// `EvaluationResult` always wants the `Ok` type to be `Value` but this converter doesn't care. -pub trait IntoEvaluationResult { - fn into_evaluation_result(self, span: Span) -> Result; -} - -impl IntoEvaluationResult for Result -where - E: ErrorConverter, -{ - fn into_evaluation_result(self, span: Span) -> Result { - self.map_err(|err| FunctionCarrier::EvaluationError(err.as_evaluation_error(span))) - } -} - -fn execute_for_body( - body: &ForBody, - environment: &Rc>, - result: &mut Vec, -) -> EvaluationResult { - match body { - ForBody::Block(expr) => { - evaluate_expression(expr, environment)?; - } - ForBody::List(expr) => { - let value = evaluate_expression(expr, environment)?; - result.push(value); - } - ForBody::Map { key, value, .. } => { - result.push(Value::tuple(vec![ - evaluate_expression(key, environment)?, - value - .as_ref() - .map(|value| evaluate_expression(value, environment)) - .transpose()? - .unwrap_or(Value::unit()), - ])); - } - } - Ok(Value::unit()) -} - -/// Execute a `ForBody` for a slice of `ForIteration`s. -/// # Panics -/// If the slice of `ForIterations` is empty which is something the parser should take care of for us -#[allow(clippy::too_many_lines)] -fn execute_for_iterations( - iterations: &[ForIteration], - body: &ForBody, - out_values: &mut Vec, - environment: &Rc>, - span: Span, -) -> Result { - let Some((cur, tail)) = iterations.split_first() else { - unreachable!("slice of for-iterations was empty") - }; - - match cur { - ForIteration::Iteration { l_value, sequence } => { - let mut sequence = evaluate_expression(sequence, environment)?; - let iter = mut_value_to_iterator(&mut sequence).into_evaluation_result(span)?; - - for r_value in iter { - // In a previous version this scope was lifted outside the loop and reset for every iteration inside the loop - // in the following code sample this matters (a lot): - // ```ndc - // [fn(x) { x + i } for i in 0...10] - // ``` - // With the current implementation with a new scope declared for every iteration this produces 10 functions - // each with their own scope and their own version of `i`, this might potentially be a bit slower though - let scope = Rc::new(RefCell::new(Environment::new_scope(environment))); - declare_or_assign_variable(l_value, r_value, &scope, span)?; - - if tail.is_empty() { - match execute_for_body(body, &scope, out_values) { - Err(FunctionCarrier::Continue) => {} - Err(error) => return Err(error), - Ok(_value) => {} - } - } else { - execute_for_iterations(tail, body, out_values, &scope, span)?; - } - } - } - ForIteration::Guard(guard) => match evaluate_expression(guard, environment)? { - Value::Bool(true) if tail.is_empty() => { - execute_for_body(body, environment, out_values)?; - } - Value::Bool(true) => { - execute_for_iterations(tail, body, out_values, environment, span)?; - } - Value::Bool(false) => {} - value => { - return Err(EvaluationError::type_error( - format!( - "mismatched types: expected {}, found {}", - StaticType::Bool, - value.static_type(), - ), - span, - ) - .into()); - } - }, - } - - Ok(Value::unit()) -} - -// fn evaluate_as_function( -// function_expression: &ExpressionLocation, -// arg_types: &[StaticType], -// environment: &Rc>, -// ) -> EvaluationResult { -// let ExpressionLocation { expression, .. } = function_expression; -// -// if let Expression::Identifier { resolved, .. } = expression { -// resolve_dynamic_binding(resolved, arg_types, environment).ok_or_else(|| { -// FunctionCarrier::EvaluationError(EvaluationError::new( -// format!( -// "Failed to find a function that can handle the arguments ({}) at runtime", -// arg_types.iter().join(", ") -// ), -// function_expression.span, -// )) -// }) -// } else { -// evaluate_expression(function_expression, environment) -// } -// } - -fn resolve_and_call( - function_expression: &ExpressionLocation, - mut args: Vec, - environment: &Rc>, - span: Span, -) -> EvaluationResult { - let ExpressionLocation { expression, .. } = function_expression; - - let function_as_value = if let Expression::Identifier { name, resolved, .. } = expression { - let arg_types = args.iter().map(|arg| arg.static_type()).collect::>(); - - let opt = match resolved { - Binding::None => None, - Binding::Resolved(var) => Some(environment.borrow().get(*var)), - Binding::Dynamic(dynamic_binding) => dynamic_binding.iter().find_map(|binding| { - let value = environment.borrow().get(*binding); - - let Value::Function(fun) = &value else { - panic!("dynamic binding resolved to non-function type at runtime"); - }; - - if fun.static_type().is_fn_and_matches(&arg_types) { - return Some(value); - } - - None - }), - }; - - if opt.is_none() { - if let Binding::Dynamic(dynamic_binding) = resolved { - if let [left_type, right_type] = arg_types.as_slice() { - if left_type.supports_vectorization_with(right_type) { - let elem_types = vectorized_element_types(left_type, right_type); - let inner_fn = dynamic_binding.iter().find_map(|binding| { - let value = environment.borrow().get(*binding); - let Value::Function(fun) = &value else { - panic!("dynamic binding resolved to non-function type at runtime"); - }; - if fun.static_type().is_fn_and_matches(&elem_types) { - Some(Rc::clone(&fun)) - } else { - None - } - }); - if let Some(inner_fn) = inner_fn { - return inner_fn - .call_vectorized(&mut args, environment) - .add_span(span); - } - } - } - } - } - - opt.ok_or_else(|| { - FunctionCarrier::EvaluationError(EvaluationError::new( - format!( - "no function called '{name}' found matches the arguments: ({})", - arg_types.iter().join(", ") - ), - function_expression.span, - )) - })? - } else { - evaluate_expression(function_expression, environment)? - }; - - if let Value::Function(function) = function_as_value { - function.call(&mut args, environment).add_span(span) - } else { - Err(FunctionCarrier::EvaluationError(EvaluationError::new( - format!( - "Unable to invoke {} as a function.", - function_as_value.static_type() - ), - span, - ))) - } -} - -fn vectorized_element_types(left: &StaticType, right: &StaticType) -> [StaticType; 2] { - let left_elem = left.sequence_element_type().unwrap_or_else(|| left.clone()); - let right_elem = right - .sequence_element_type() - .unwrap_or_else(|| right.clone()); - [left_elem, right_elem] -} - -fn resolve_dynamic_binding( - binding: &Binding, - arg_types: &[StaticType], - environment: &Rc>, -) -> Option { - match binding { - Binding::None => None, - Binding::Resolved(var) => Some(environment.borrow().get(*var)), - Binding::Dynamic(dynamic_binding) => dynamic_binding - .iter() // TODO: should we consider the binding order? - .find_map(|binding| { - let value = environment.borrow().get(*binding); - - let Value::Function(fun) = &value else { - panic!("dynamic binding resolved to non-function type at runtime"); - }; - - // Find the first function that matches - if fun.static_type().is_fn_and_matches(arg_types) { - return Some(value); - } - - None - }), - } -} diff --git a/ndc_interpreter/src/function.rs b/ndc_interpreter/src/function.rs deleted file mode 100644 index 3aa65743..00000000 --- a/ndc_interpreter/src/function.rs +++ /dev/null @@ -1,393 +0,0 @@ -use crate::hash_map::{DefaultHasher, HashMap}; -use crate::environment::Environment; -use crate::evaluate::{ - ErrorConverter, EvaluationError, EvaluationResult, evaluate_expression, -}; -use crate::num::{BinaryOperatorError, Number}; -use crate::sequence::Sequence; -use crate::value::Value; -use derive_builder::Builder; -use ndc_lexer::Span; -use ndc_parser::{ExpressionLocation, ResolvedVar}; -pub use ndc_parser::{Parameter, StaticType, TypeSignature}; -use std::cell::{BorrowError, BorrowMutError, RefCell}; -use std::fmt; -use std::hash::{Hash, Hasher}; -use std::rc::Rc; - -/// Callable is a wrapper around a `OverloadedFunction` pointer and the environment to make it -/// easy to have an executable function as a method signature in the standard library -pub struct Callable<'a> { - pub function: Rc, - pub environment: &'a Rc>, -} - -impl Callable<'_> { - pub fn call(&self, args: &mut [Value]) -> EvaluationResult { - self.function.call(args, self.environment) - } -} - -#[derive(Clone, Builder)] -pub struct Function { - #[builder(default, setter(strip_option))] - name: Option, - #[builder(default, setter(strip_option))] - documentation: Option, - body: FunctionBody, -} - -impl fmt::Debug for Function { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "Function(name={}, sig={})", - self.name(), - self.type_signature() - ) - } -} - -impl Function { - pub fn arity(&self) -> Option { - self.body.arity() - } - pub fn name(&self) -> &str { - self.name.as_deref().unwrap_or_default() - } - - pub fn documentation(&self) -> &str { - self.documentation.as_deref().unwrap_or_default() - } - - pub fn short_documentation(&self) -> &str { - self.documentation() - .trim() - .lines() - .next() - .unwrap_or_default() - } - - pub fn from_body(body: FunctionBody) -> Self { - Self { - name: None, - documentation: None, - body, - } - } - - pub fn body(&self) -> &FunctionBody { - &self.body - } - - pub fn type_signature(&self) -> TypeSignature { - self.body.type_signature() - } - pub fn return_type(&self) -> &StaticType { - self.body.return_type() - } - - pub fn static_type(&self) -> StaticType { - StaticType::Function { - parameters: match self.body.type_signature() { - TypeSignature::Variadic => None, - TypeSignature::Exact(types) => { - Some(types.iter().map(|x| x.type_name.clone()).collect()) - } - }, - return_type: Box::new(self.body.return_type().clone()), - } - } - - pub fn call(&self, args: &mut [Value], env: &Rc>) -> EvaluationResult { - let result = self.body.call(args, env); - - match result { - Err(FunctionCarrier::Return(value)) | Ok(value) => Ok(value), - e => e, - } - } - - pub fn call_checked( - &self, - args: &mut [Value], - env: &Rc>, - ) -> EvaluationResult { - let arg_types = args.iter().map(|arg| arg.static_type()).collect::>(); - - if self.static_type().is_fn_and_matches(&arg_types) { - self.call(args, env) - } else { - Err(FunctionCarrier::FunctionTypeMismatch) - } - } - - pub fn call_vectorized( - &self, - args: &mut [Value], - env: &Rc>, - ) -> EvaluationResult { - let [left, right] = args else { - panic!("incorrect argument count for vectorization should have been handled by caller"); - }; - - let result = match (left, right) { - ( - Value::Sequence(Sequence::Tuple(left_rc)), - Value::Sequence(Sequence::Tuple(right_rc)), - ) => left_rc - .iter() - .zip(right_rc.iter()) - .map(|(l, r)| self.call(&mut [l.clone(), r.clone()], env)) - .collect::, _>>()?, - (left @ Value::Number(_), Value::Sequence(Sequence::Tuple(right_rc))) => right_rc - .iter() - .map(|r| self.call(&mut [left.clone(), r.clone()], env)) - .collect::, _>>()?, - (Value::Sequence(Sequence::Tuple(left_rc)), right @ Value::Number(_)) => left_rc - .iter() - .map(|l| self.call(&mut [l.clone(), right.clone()], env)) - .collect::, _>>()?, - _ => panic!("caller should handle all checks before vectorizing"), - }; - - Ok(Value::Sequence(Sequence::Tuple(Rc::new(result)))) - } -} - -#[derive(Clone)] -pub enum FunctionBody { - Closure { - parameter_names: Vec, - body: ExpressionLocation, - return_type: StaticType, - environment: Rc>, - }, - NumericUnaryOp { - body: fn(number: Number) -> Number, - }, - NumericBinaryOp { - body: fn(left: Number, right: Number) -> Result, - }, - GenericFunction { - type_signature: TypeSignature, - return_type: StaticType, - function: fn(&mut [Value], &Rc>) -> EvaluationResult, - }, - Memoized { - cache: RefCell>, - function: Box, - }, -} - -impl FunctionBody { - pub fn arity(&self) -> Option { - match self { - Self::Closure { - parameter_names, .. - } => Some(parameter_names.len()), - Self::NumericUnaryOp { .. } => Some(1), - Self::NumericBinaryOp { .. } => Some(2), - Self::GenericFunction { type_signature, .. } => type_signature.arity(), - Self::Memoized { function, .. } => function.arity(), - } - } - - pub fn generic( - type_signature: TypeSignature, - return_type: StaticType, - function: fn(&mut [Value], &Rc>) -> EvaluationResult, - ) -> Self { - Self::GenericFunction { - type_signature, - return_type, - function, - } - } - - fn type_signature(&self) -> TypeSignature { - match self { - Self::Closure { - parameter_names, .. - } => TypeSignature::Exact( - parameter_names - .iter() - .map(|name| Parameter::new(name, StaticType::Any)) - .collect(), - ), - Self::Memoized { cache: _, function } => function.type_signature(), - Self::NumericUnaryOp { .. } => { - TypeSignature::Exact(vec![Parameter::new("num", StaticType::Number)]) - } - Self::NumericBinaryOp { .. } => TypeSignature::Exact(vec![ - Parameter::new("left", StaticType::Number), - Parameter::new("right", StaticType::Number), - ]), - Self::GenericFunction { type_signature, .. } => type_signature.clone(), - } - } - - pub fn return_type(&self) -> &StaticType { - match self { - Self::Closure { return_type, .. } | Self::GenericFunction { return_type, .. } => { - return_type - } - Self::NumericUnaryOp { .. } | Self::NumericBinaryOp { .. } => &StaticType::Number, - Self::Memoized { function, .. } => function.return_type(), - } - } - pub fn call(&self, args: &mut [Value], env: &Rc>) -> EvaluationResult { - match self { - Self::Closure { - body, environment, .. - } => { - let mut local_scope = Environment::new_scope(environment); - - { - for (position, value) in args.iter().enumerate() { - // NOTE: stores a copy of the value in the environment (which is fine?) - // NOTE: we just assume here that the arguments are slotted in order starting at 0 - // because why not? Is this a call convention? - local_scope.set( - ResolvedVar::Captured { - depth: 0, - slot: position, - }, - value.clone(), - ) - } - } - - let local_scope = Rc::new(RefCell::new(local_scope)); - match evaluate_expression(body, &local_scope) { - Err(FunctionCarrier::Return(v)) => Ok(v), - r => r, - } - } - Self::NumericUnaryOp { body } => match args { - [Value::Number(num)] => Ok(Value::Number(body(num.clone()))), - [v] => Err(FunctionCallError::ArgumentTypeError { - expected: StaticType::Number, - actual: v.static_type(), - } - .into()), - args => Err(FunctionCallError::ArgumentCountError { - expected: 1, - actual: args.len(), - } - .into()), - }, - Self::NumericBinaryOp { body } => match args { - [Value::Number(left), Value::Number(right)] => Ok(Value::Number( - body(left.clone(), right.clone()) - .map_err(|err| FunctionCarrier::IntoEvaluationError(Box::new(err)))?, - )), - [Value::Number(_), right] => Err(FunctionCallError::ArgumentTypeError { - expected: StaticType::Number, - actual: right.static_type(), - } - .into()), - [left, _] => Err(FunctionCallError::ArgumentTypeError { - expected: StaticType::Number, - actual: left.static_type(), - } - .into()), - args => Err(FunctionCallError::ArgumentCountError { - expected: 2, - actual: args.len(), - } - .into()), - }, - Self::GenericFunction { function, .. } => function(args, env), - Self::Memoized { cache, function } => { - let mut hasher = DefaultHasher::default(); - for arg in &*args { - arg.hash(&mut hasher); - } - - let key = hasher.finish(); - - if !cache.borrow().contains_key(&key) { - let result = function.call(args, env)?; - cache.borrow_mut().insert(key, result); - } - - Ok(cache - .borrow() - .get(&key) - .expect("guaranteed to work") - .clone()) - } - } - } -} - -#[derive(thiserror::Error, Debug)] -pub enum FunctionCallError { - #[error("invalid argument, expected {expected} got {actual}")] - ArgumentTypeError { - expected: StaticType, - actual: StaticType, - }, - - #[error("invalid argument count, expected {expected} arguments got {actual}")] - ArgumentCountError { expected: usize, actual: usize }, - - #[error("cannot convert argument to native type: {0}")] - ConvertToNativeTypeError(String), -} - -impl From for FunctionCarrier { - fn from(value: FunctionCallError) -> Self { - Self::IntoEvaluationError(Box::new(value)) - } -} - -// Named after the Carrier trait -#[derive(thiserror::Error, Debug)] -pub enum FunctionCarrier { - #[error("not an error")] - Return(Value), - #[error("not an error")] - Break(Value), - #[error("not an error")] - Continue, - #[error("evaluation error {0}")] - EvaluationError(#[from] EvaluationError), - #[error("function does not exist")] - FunctionTypeMismatch, // This error has specific handling behavior and needs its own variant - #[error("unconverted evaluation error")] - IntoEvaluationError(Box), -} - -impl FunctionCarrier { - #[must_use] - pub fn lift_if(self, span: Span) -> Self { - match self { - Self::IntoEvaluationError(into) => { - Self::EvaluationError(into.as_evaluation_error(span)) - } - e => e, - } - } -} - -impl From for FunctionCarrier { - fn from(value: anyhow::Error) -> Self { - Self::IntoEvaluationError(Box::new(value)) - } -} - -impl From for FunctionCarrier { - fn from(value: BorrowMutError) -> Self { - // TODO: maybe this needs a better message - Self::IntoEvaluationError(Box::new(value)) - } -} - -impl From for FunctionCarrier { - fn from(value: BorrowError) -> Self { - // TODO: maybe this needs a better message - Self::IntoEvaluationError(Box::new(value)) - } -} diff --git a/ndc_interpreter/src/heap.rs b/ndc_interpreter/src/heap.rs deleted file mode 100644 index c8c96c82..00000000 --- a/ndc_interpreter/src/heap.rs +++ /dev/null @@ -1,106 +0,0 @@ -use crate::value::Value; -use derive_more::{Deref, DerefMut}; -use std::cmp::{Ordering, Reverse}; -use std::collections::BinaryHeap; - -pub type MinHeap = ManagedHeap>; -pub type MaxHeap = ManagedHeap; - -#[derive(Deref, DerefMut)] -pub struct ManagedHeap { - #[deref] - #[deref_mut] - heap: BinaryHeap, -} - -impl Default for ManagedHeap -where - T: Ord, -{ - fn default() -> Self { - Self::new() - } -} - -impl ManagedHeap -where - T: Ord, -{ - pub fn new() -> Self { - Self { - heap: BinaryHeap::default(), - } - } - - pub fn from_heap(heap: BinaryHeap) -> Self { - Self { heap } - } - - pub fn into_inner(self) -> BinaryHeap { - self.heap - } -} - -impl MinHeap { - pub fn push(&mut self, value: Value) { - self.heap.push(Reverse(HeapValue(value))); - } - - pub fn pop(&mut self) -> Option { - self.heap.pop().map(|v| v.0.0) - } -} - -impl MaxHeap { - pub fn push(&mut self, value: Value) { - self.heap.push(HeapValue(value)); - } - - pub fn pop(&mut self) -> Option { - self.heap.pop().map(|v| v.0) - } -} - -impl FromIterator for MinHeap { - fn from_iter>(iter: T) -> Self { - Self::from_heap( - iter.into_iter() - .map(|v| Reverse(HeapValue(v))) - .collect::>(), - ) - } -} - -impl FromIterator for MaxHeap { - fn from_iter>(iter: T) -> Self { - Self::from_heap(iter.into_iter().map(HeapValue).collect::>()) - } -} - -#[derive(Clone, Deref, DerefMut)] -pub struct HeapValue(pub Value); - -impl From for Value { - fn from(value: HeapValue) -> Self { - value.0 - } -} - -impl PartialEq for HeapValue { - fn eq(&self, other: &Self) -> bool { - self.0 == other.0 - } -} - -impl Eq for HeapValue {} -impl PartialOrd for HeapValue { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for HeapValue { - fn cmp(&self, other: &Self) -> Ordering { - self.0.partial_cmp(&other.0).unwrap_or(Ordering::Equal) - } -} diff --git a/ndc_interpreter/src/iterator.rs b/ndc_interpreter/src/iterator.rs deleted file mode 100644 index 7f351fc5..00000000 --- a/ndc_interpreter/src/iterator.rs +++ /dev/null @@ -1,462 +0,0 @@ -#![allow(clippy::mem_forget)] - -//! The implementation of the various iterators in this module were heavily inspired by the ones in -//! noulith which can be found [here](https://github.com/betaveros/noulith/blob/441d52ea433527b7ada5bc6cabd952f9ae8fb791/src/streams.rs) -//! -use super::function::{FunctionCarrier, StaticType}; -use super::int::Int::Int64; -use super::num::Number; -use crate::hash_map::HashMap; -use crate::heap::{MaxHeap, MinHeap}; -use crate::sequence::Sequence; -use crate::value::Value; -use self_cell::self_cell; -use std::cell::{Ref, RefCell}; -use std::collections::VecDeque; -use std::rc::Rc; - -#[derive(Clone)] -pub enum ValueIterator { - ValueRange(ValueRange), - ValueRangeFrom(ValueRangeFrom), - ValueRangeInclusive(ValueRangeInclusive), - Repeat(Repeat), -} - -impl Iterator for ValueIterator { - type Item = Value; - - fn next(&mut self) -> Option { - match self { - Self::ValueRange(inner) => inner.next(), - Self::ValueRangeFrom(inner) => inner.next(), - Self::ValueRangeInclusive(inner) => inner.next(), - Self::Repeat(inner) => inner.next(), - } - } -} - -pub enum MutableValueIntoIterator<'a> { - Tuple(RcVecIterator<'a, Value>), - List(SharedVecIterator<'a, Value>), - String(SharedStringIterator), - Map(SharedHashMapIterator<'a>), - Iterator(Rc>), - MinHeap(MinHeapIterator), - MaxHeap(MaxHeapIterator), - Deque(SharedDequeIterator), -} - -impl Iterator for MutableValueIntoIterator<'_> { - type Item = Value; - - fn next(&mut self) -> Option { - match self { - MutableValueIntoIterator::Tuple(iter) => iter.next(), - MutableValueIntoIterator::List(iter) => iter.next(), - MutableValueIntoIterator::String(iter) => iter.next(), - MutableValueIntoIterator::Map(iter) => iter.next(), - MutableValueIntoIterator::Iterator(iter) => iter.borrow_mut().next(), - MutableValueIntoIterator::MinHeap(iter) => iter.next(), - MutableValueIntoIterator::MaxHeap(iter) => iter.next(), - MutableValueIntoIterator::Deque(iter) => iter.next(), - } - } -} - -#[derive(thiserror::Error, Debug)] -#[error("{} is not iterable", .value_type)] -pub struct NotIterableError { - value_type: StaticType, -} - -impl From for FunctionCarrier { - fn from(value: NotIterableError) -> Self { - Self::IntoEvaluationError(Box::new(value)) - } -} - -pub fn mut_value_to_iterator( - value: &mut Value, -) -> Result, NotIterableError> { - match value { - Value::Sequence(sequence) => Ok(mut_seq_to_iterator(sequence)), - value => Err(NotIterableError { - value_type: value.static_type(), - }), - } -} - -pub fn mut_seq_to_iterator(sequence: &mut Sequence) -> MutableValueIntoIterator<'_> { - match sequence { - Sequence::String(string) => { - MutableValueIntoIterator::String(SharedStringIterator::new(string)) - } - Sequence::List(list) => { - MutableValueIntoIterator::List(SharedVecIterator::from_shared_vec(list)) - } - Sequence::MinHeap(list) => { - MutableValueIntoIterator::MinHeap(MinHeapIterator::new(Rc::clone(list))) - } - Sequence::MaxHeap(list) => { - MutableValueIntoIterator::MaxHeap(MaxHeapIterator::new(Rc::clone(list))) - } - Sequence::Deque(deque) => { - MutableValueIntoIterator::Deque(SharedDequeIterator::new(Rc::clone(deque))) - } - Sequence::Tuple(tup) => MutableValueIntoIterator::Tuple(RcVecIterator::from_rc_vec(tup)), - Sequence::Map(map, _) => { - MutableValueIntoIterator::Map(SharedHashMapIterator::from_ref(map)) - } - Sequence::Iterator(iter) => MutableValueIntoIterator::Iterator(Rc::clone(iter)), - } -} -pub enum RcVecIterator<'a, T> { - Draining(std::vec::Drain<'a, T>), - Cloning(std::slice::Iter<'a, T>), -} - -impl RcVecIterator<'_, T> { - pub fn from_rc_vec(value: &mut Rc>) -> RcVecIterator<'_, T> { - if Rc::get_mut(value).is_some() { - let vec = - Rc::get_mut(value).expect("guaranteed to be some by previous call to is_some"); - RcVecIterator::Draining(vec.drain(..)) - } else { - RcVecIterator::Cloning(value.iter()) - } - } -} - -impl Iterator for RcVecIterator<'_, T> -where - T: Clone, -{ - type Item = T; - - fn next(&mut self) -> Option { - match self { - RcVecIterator::Draining(i) => i.next(), - RcVecIterator::Cloning(i) => i.next().cloned(), - } - } -} - -pub enum SharedVecIterator<'a, T> { - IntoIter(std::vec::IntoIter), - RefCellIterator(RefCellIterator<'a, T>), -} - -impl SharedVecIterator<'_, T> { - pub fn from_shared_vec(value: &mut Rc>>) -> SharedVecIterator<'_, T> { - match Rc::get_mut(value) { - // This case covers code samples where a list literal is used in a loop like: - // ```ndc - // for x in [1,2,3,4] { - // print(x); - // } - // ``` - // In this case there is only one reference to the vector and we can take ownership - Some(vec) => SharedVecIterator::IntoIter(vec.take().into_iter()), - None => SharedVecIterator::RefCellIterator(RefCellIterator { - inner: Some(Ref::map(value.borrow(), |it| &it[..])), - }), - } - } -} - -impl Iterator for SharedVecIterator<'_, T> -where - T: Clone, -{ - type Item = T; - fn next(&mut self) -> Option { - match self { - SharedVecIterator::RefCellIterator(i) => i.next().map(|it| it.to_owned()), - SharedVecIterator::IntoIter(i) => i.next(), - } - } -} - -pub struct MaxHeapIterator { - heap: Rc>, - idx: usize, -} - -impl Iterator for MaxHeapIterator { - type Item = Value; - - fn next(&mut self) -> Option { - let heap = self.heap.borrow(); - if self.idx < heap.len() { - let x = &heap.as_slice()[self.idx]; - self.idx += 1; - Some(x.0.clone()) - } else { - None - } - } -} - -impl MaxHeapIterator { - pub fn new(heap: Rc>) -> Self { - Self { heap, idx: 0 } - } -} - -pub struct MinHeapIterator { - heap: Rc>, - idx: usize, -} - -impl MinHeapIterator { - pub fn new(heap: Rc>) -> Self { - Self { heap, idx: 0 } - } -} - -impl Iterator for MinHeapIterator { - type Item = Value; - - fn next(&mut self) -> Option { - let heap = self.heap.borrow(); - if self.idx < heap.len() { - let x = &heap.as_slice()[self.idx]; - self.idx += 1; - Some(x.0.0.clone()) - } else { - None - } - } -} - -pub struct SharedDequeIterator { - deque: Rc>>, - idx: usize, -} - -impl SharedDequeIterator { - pub fn new(deque: Rc>>) -> Self { - Self { deque, idx: 0 } - } -} - -impl Iterator for SharedDequeIterator { - type Item = Value; - - fn next(&mut self) -> Option { - let deque = self.deque.borrow(); - if self.idx < deque.len() { - let out = deque.get(self.idx).cloned(); - self.idx += 1; - out - } else { - None - } - } -} - -/// The mutable string iterator effectively takes a reference to the string and keeps track of the -/// current offset in order to implement character by character iteration (instead of iterating over -/// u8's) -pub struct SharedStringIterator { - inner: Rc>, - offset: usize, -} - -impl SharedStringIterator { - pub fn new(value: &Rc>) -> Self { - Self { - inner: Rc::clone(value), - offset: 0, - } - } -} - -impl Iterator for SharedStringIterator { - type Item = Value; - - fn next(&mut self) -> Option { - let current_char = self.inner.borrow()[self.offset..] - .chars() - .take(1) - .collect::(); - self.offset += current_char.len(); - if current_char.is_empty() { - None - } else { - Some(Value::from(current_char)) - } - } -} - -/// This `RefCellIterator` is adapted from this stack-overflow answer: -/// -/// It returns a `Ref` to a slice of the vector it's iterating over using `Ref::map_split` making it -/// so that we only need a `Ref` to the original list rather than cloning the `Rc` -pub struct RefCellIterator<'a, T> { - inner: Option>, -} - -impl<'a, T> Iterator for RefCellIterator<'a, T> -where - T: Clone, -{ - type Item = Ref<'a, T>; - - fn next(&mut self) -> Option { - match self.inner.take() { - Some(borrow) => { - if borrow.is_empty() { - None - } else { - let (head, tail) = Ref::map_split(borrow, |slice| (&slice[0], &slice[1..])); - self.inner.replace(tail); - Some(head) - } - } - None => None, - } - } -} - -struct HashMapIter<'a>(pub std::collections::hash_map::Iter<'a, Value, Value>); - -self_cell! { - pub struct SharedHashMapIterator<'a> { - owner: Ref<'a, HashMap>, - - #[covariant] - dependent: HashMapIter, - } -} - -impl SharedHashMapIterator<'_> { - pub fn from_ref(value: &Rc>>) -> SharedHashMapIterator<'_> { - let borrow = value.borrow(); - SharedHashMapIterator::new(borrow, |map| HashMapIter(map.iter())) - } -} - -impl Iterator for SharedHashMapIterator<'_> { - type Item = Value; - - fn next(&mut self) -> Option { - let cur = self.with_dependent_mut(|_map, iter| iter.next()); - // Creates copies of the values inside the map - cur.map(|cur| Value::tuple(vec![cur.0.clone(), cur.1.clone()])) - } -} - -impl<'a> Iterator for HashMapIter<'a> { - type Item = (&'a Value, &'a Value); - - fn next(&mut self) -> Option { - self.0.next() - } -} - -#[derive(Clone)] -pub struct Repeat { - pub value: Value, - pub cur: usize, - pub limit: Option, -} - -impl Iterator for Repeat { - type Item = Value; - - fn next(&mut self) -> Option { - if let Some(times) = self.limit { - if self.cur < times { - self.cur += 1; - Some(self.value.clone()) - } else { - None - } - } else { - Some(self.value.clone()) - } - } -} -/// Ranges are a whole thing -#[derive(Clone)] -pub struct ValueRange(pub std::ops::Range); - -impl ValueRange { - #[must_use] - pub fn contains(&self, v: &Value) -> bool { - match v { - Value::Number(Number::Int(Int64(v))) => self.0.contains(v), - _ => false, - } - } -} - -impl Iterator for ValueRange { - type Item = Value; - - fn next(&mut self) -> Option { - self.0.next().map(Value::from) - } -} - -#[derive(Clone)] -pub struct ValueRangeInclusive(pub std::ops::RangeInclusive); - -impl ValueRangeInclusive { - #[must_use] - pub fn contains(&self, v: &Value) -> bool { - match v { - Value::Number(Number::Int(Int64(v))) => self.0.contains(v), - _ => false, - } - } -} - -impl Iterator for ValueRangeInclusive { - type Item = Value; - - fn next(&mut self) -> Option { - self.0.next().map(Value::from) - } -} - -#[derive(Clone)] -pub struct ValueRangeFrom(pub std::ops::RangeFrom); -impl ValueRangeFrom { - #[must_use] - pub fn contains(&self, v: &Value) -> bool { - match v { - Value::Number(Number::Int(Int64(v))) => self.0.contains(v), - _ => false, - } - } -} - -impl Iterator for ValueRangeFrom { - type Item = Value; - - fn next(&mut self) -> Option { - self.0.next().map(Value::from) - } -} - -/// Cursed experiment -pub struct RcIter { - iter: Rc>, -} - -impl RcIter { - pub fn new(iter: Rc>) -> Self { - Self { iter } - } -} - -impl Iterator for RcIter { - type Item = Value; - - fn next(&mut self) -> Option { - self.iter.borrow_mut().next() - } -} diff --git a/ndc_interpreter/src/lib.rs b/ndc_interpreter/src/lib.rs index c13018ad..ac9f038d 100644 --- a/ndc_interpreter/src/lib.rs +++ b/ndc_interpreter/src/lib.rs @@ -1,57 +1,75 @@ -pub use ndc_core::{compare, hash_map, int, num}; - -pub mod environment; -pub mod evaluate; -pub mod function; -pub mod heap; -pub mod iterator; -pub mod semantic; -pub mod sequence; -pub mod value; - -use std::cell::RefCell; -use std::rc::Rc; - -use crate::environment::{Environment, InterpreterOutput}; -use crate::evaluate::{EvaluationError, evaluate_expression}; -use crate::function::FunctionCarrier; -use crate::semantic::analyser::{Analyser, ScopeTree}; -use crate::value::Value; +use ndc_analyser::{Analyser, ScopeTree}; +use ndc_core::FunctionRegistry; use ndc_lexer::{Lexer, TokenLocation}; use ndc_parser::ExpressionLocation; +use ndc_vm::compiler::Compiler; +use ndc_vm::value::CompiledFunction; +use ndc_vm::{OutputSink, Vm}; +use std::rc::Rc; + +pub use ndc_vm::{NativeFunction, Value}; pub struct Interpreter { - environment: Rc>, + registry: FunctionRegistry>, + capturing: bool, analyser: Analyser, + /// Persistent REPL VM and the compiler checkpoint from the last run. + /// `None` until the first `eval` call; kept alive afterwards so that + /// variables declared on one line are visible on subsequent lines. + repl_state: Option<(Vm, Compiler)>, } impl Interpreter { + /// Create an interpreter that writes output to stdout. #[must_use] - pub fn new(dest: T) -> Self - where - T: InterpreterOutput + 'static, - { - Self::from_env(Environment::new(Box::new(dest))) + pub fn new() -> Self { + Self::from_capturing(false) } + /// Create an interpreter that captures output into an internal buffer, + /// retrievable via [`get_output`]. #[must_use] - pub fn from_env(environment: Environment) -> Self { - let global_identifiers = environment.get_global_identifiers(); + pub fn capturing() -> Self { + Self::from_capturing(true) + } + + fn from_capturing(capturing: bool) -> Self { Self { - environment: Rc::new(RefCell::new(environment)), - analyser: Analyser::from_scope_tree(ScopeTree::from_global_scope(global_identifiers)), + registry: FunctionRegistry::default(), + capturing, + analyser: Analyser::from_scope_tree(ScopeTree::from_global_scope(vec![])), + repl_state: None, } } - pub fn configure(&mut self, f: F) { - f(&mut self.environment.borrow_mut()); - let global_identifiers = self.environment.borrow().get_global_identifiers(); - self.analyser = Analyser::from_scope_tree(ScopeTree::from_global_scope(global_identifiers)); + pub fn configure>)>(&mut self, f: F) { + f(&mut self.registry); + let functions = self + .registry + .iter() + .map(|fun| (fun.name.clone(), fun.static_type.clone())) + .collect(); + + self.analyser = Analyser::from_scope_tree(ScopeTree::from_global_scope(functions)); + } + + pub fn functions(&self) -> impl Iterator> { + self.registry.iter() } - #[must_use] - pub fn environment(self) -> Rc> { - self.environment + /// Returns the captured output, or `None` if this interpreter writes to stdout. + /// Returns `Some(vec![])` for a capturing interpreter that hasn't run yet. + pub fn get_output(&self) -> Option> { + if self.capturing { + Some( + self.repl_state + .as_ref() + .and_then(|(vm, _)| vm.get_output().map(<[u8]>::to_vec)) + .unwrap_or_default(), + ) + } else { + None + } } pub fn analyse_str( @@ -61,10 +79,24 @@ impl Interpreter { self.parse_and_analyse(input) } - pub fn run_str(&mut self, input: &str) -> Result { + pub fn compile_str(&mut self, input: &str) -> Result { + let expressions = self.parse_and_analyse(input)?; + Ok(Compiler::compile(expressions.into_iter())?) + } + + pub fn disassemble_str(&mut self, input: &str) -> Result { + let compiled = self.compile_str(input)?; + let mut out = String::new(); + out.push_str(&ndc_vm::disassemble::disassemble(&compiled, Some(input))); + Ok(out) + } + + /// Execute source code and return the resulting [`Value`]. + /// + /// Statements (semicolon-terminated) produce [`Value::unit()`]. + pub fn eval(&mut self, input: &str) -> Result { let expressions = self.parse_and_analyse(input)?; - let final_value = self.interpret(expressions.into_iter())?; - Ok(format!("{final_value}")) + self.interpret_vm(input, expressions.into_iter()) } fn parse_and_analyse( @@ -85,41 +117,65 @@ impl Interpreter { Ok(expressions) } - fn interpret( + fn interpret_vm( &mut self, + #[cfg(feature = "vm-trace")] input: &str, + #[cfg(not(feature = "vm-trace"))] _input: &str, expressions: impl Iterator, ) -> Result { - let mut value = Value::unit(); - for expr in expressions { - match evaluate_expression(&expr, &self.environment) { - Ok(val) => value = val, - Err(FunctionCarrier::Return(_)) => { - Err(EvaluationError::syntax_error( - "unexpected return statement outside of function body".to_string(), - expr.span, - ))?; - } - Err(FunctionCarrier::Break(_)) => { - Err(EvaluationError::syntax_error( - "unexpected break statement outside of loop body".to_string(), - expr.span, - ))?; - } - Err(FunctionCarrier::Continue) => { - Err(EvaluationError::syntax_error( - "unexpected continue statement outside of loop body".to_string(), - expr.span, - ))?; + use ndc_vm::{Function as VmFunction, Object as VmObject, Value as VmValue}; + + let globals: Vec = self + .registry + .iter() + .map(|native| { + VmValue::Object(Rc::new(VmObject::Function(VmFunction::Native(Rc::clone( + native, + ))))) + }) + .collect(); + + let result = match self.repl_state.take() { + None => { + let output = if self.capturing { + OutputSink::Buffer(Vec::new()) + } else { + OutputSink::Stdout + }; + let (code, checkpoint) = Compiler::compile_resumable(expressions)?; + let mut vm = Vm::new(code, globals).with_output(output); + #[cfg(feature = "vm-trace")] + { + vm = vm.with_source(input); } - Err(FunctionCarrier::EvaluationError(e)) => return Err(InterpreterError::from(e)), - _ => { - panic!( - "internal error: unhandled function carrier variant returned from evaluate_expression" - ); + vm.run()?; + let result = vm.last_value(checkpoint.num_locals()); + self.repl_state = Some((vm, checkpoint)); + result + } + Some((mut vm, checkpoint)) => { + let resume_ip = checkpoint.halt_ip(); + let prev_num_locals = checkpoint.num_locals(); + let (code, new_checkpoint) = checkpoint.resume(expressions)?; + vm.resume_from_halt(code, globals, resume_ip, prev_num_locals); + #[cfg(feature = "vm-trace")] + { + vm.set_source(input); } + vm.run()?; + let result = vm.last_value(new_checkpoint.num_locals()); + self.repl_state = Some((vm, new_checkpoint)); + result } - } - Ok(value) + }; + + Ok(result) + } +} + +impl Default for Interpreter { + fn default() -> Self { + Self::new() } } @@ -138,8 +194,13 @@ pub enum InterpreterError { #[error("Error during static analysis")] Resolver { #[from] - cause: semantic::analyser::AnalysisError, + cause: ndc_analyser::AnalysisError, + }, + #[error("Compilation error")] + Compiler { + #[from] + cause: ndc_vm::CompileError, }, - #[error("Error while executing code")] - Evaluation(#[from] EvaluationError), + #[error("{0}")] + Vm(#[from] ndc_vm::VmError), } diff --git a/ndc_interpreter/src/semantic/analyser.rs b/ndc_interpreter/src/semantic/analyser.rs deleted file mode 100644 index 57b9b9b9..00000000 --- a/ndc_interpreter/src/semantic/analyser.rs +++ /dev/null @@ -1,909 +0,0 @@ -use crate::function::StaticType; -use itertools::Itertools; -use ndc_lexer::Span; -use ndc_parser::{ - Binding, Expression, ExpressionLocation, ForBody, ForIteration, Lvalue, ResolvedVar, -}; -use std::fmt::{Debug, Formatter}; - -pub struct Analyser { - scope_tree: ScopeTree, -} - -impl Analyser { - pub fn from_scope_tree(scope_tree: ScopeTree) -> Self { - Self { scope_tree } - } - - pub fn checkpoint(&self) -> ScopeTree { - self.scope_tree.clone() - } - - pub fn restore(&mut self, checkpoint: ScopeTree) { - self.scope_tree = checkpoint; - } - - pub fn analyse( - &mut self, - ExpressionLocation { expression, span }: &mut ExpressionLocation, - ) -> Result { - match expression { - Expression::BoolLiteral(_) => Ok(StaticType::Bool), - Expression::StringLiteral(_) => Ok(StaticType::String), - Expression::Int64Literal(_) | Expression::BigIntLiteral(_) => Ok(StaticType::Int), - Expression::Float64Literal(_) => Ok(StaticType::Float), - Expression::ComplexLiteral(_) => Ok(StaticType::Complex), - Expression::Continue | Expression::Break => Ok(StaticType::unit()), - Expression::Identifier { - name: ident, - resolved, - } => { - if ident == "None" { - // TODO: we're going to need something like HM to infer the type of option here, maybe force type annotations? - return Ok(StaticType::Option(Box::new(StaticType::Any))); - } - let binding = self.scope_tree.get_binding_any(ident).ok_or_else(|| { - AnalysisError::identifier_not_previously_declared(ident, *span) - })?; - - *resolved = Binding::Resolved(binding); - - Ok(self.scope_tree.get_type(binding).clone()) - } - Expression::Statement(inner) => { - self.analyse(inner)?; - Ok(StaticType::unit()) - } - Expression::Logical { left, right, .. } => { - self.analyse(left)?; // TODO: throw error if type does not match bool? - self.analyse(right)?; // TODO: throw error if type does not match bool? - Ok(StaticType::Bool) - } - Expression::Grouping(expr) => self.analyse(expr), - Expression::VariableDeclaration { l_value, value } => { - let typ = self.analyse(value)?; - self.resolve_lvalue_declarative(l_value, typ, *span)?; - Ok(StaticType::unit()) // TODO: never type here? - } - Expression::Assignment { l_value, r_value } => { - self.resolve_lvalue(l_value, *span)?; - self.analyse(r_value)?; - Ok(StaticType::unit()) - } - Expression::OpAssignment { - l_value, - r_value, - operation, - resolved_assign_operation, - resolved_operation, - } => { - let left_type = self.resolve_single_lvalue(l_value, *span)?; - let right_type = self.analyse(r_value)?; - let arg_types = vec![left_type, right_type]; - - *resolved_assign_operation = self - .scope_tree - .resolve_function2(&format!("{operation}="), &arg_types); - *resolved_operation = self.scope_tree.resolve_function2(operation, &arg_types); - - if let Binding::None = resolved_operation { - return Err(AnalysisError::function_not_found( - operation, &arg_types, *span, - )); - } - - Ok(StaticType::unit()) - } - Expression::FunctionDeclaration { - name, - resolved_name, - parameters, - body, - return_type: return_type_slot, - .. - } => { - // TODO: figuring out the type signature of function declarations is the rest of the owl - - // Pre-register the function before analysing its body so recursive calls can - // resolve the name. The return type is unknown at this point so we use Any. - let pre_slot = if let Some(name) = name { - let param_types: Vec = - std::iter::repeat_n(StaticType::Any, extract_argument_arity(parameters)) - .collect(); - - let placeholder = StaticType::Function { - parameters: Some(param_types), - return_type: Box::new(StaticType::Any), - }; - Some( - self.scope_tree - .create_local_binding(name.clone(), placeholder), - ) - } else { - None - }; - - self.scope_tree.new_scope(); - let param_types = self.resolve_parameters_declarative(parameters)?; - - let return_type = self.analyse(body)?; - self.scope_tree.destroy_scope(); - *return_type_slot = Some(return_type); - - let function_type = StaticType::Function { - parameters: Some(param_types.clone()), - return_type: Box::new( - return_type_slot - .clone() - .expect("must have a value at this point"), - ), - }; - - if let Some(slot) = pre_slot { - // TODO: is this correct, for now we just always create a new binding, we could - // also produce an error if we are generating a conflicting binding - self.scope_tree - .update_binding_type(slot, function_type.clone()); - *resolved_name = Some(slot); - } - - Ok(function_type) - } - Expression::Block { statements } => { - self.scope_tree.new_scope(); - let mut last = None; - for s in statements { - last = Some(self.analyse(s)?); - } - self.scope_tree.destroy_scope(); - - Ok(last.unwrap_or_else(StaticType::unit)) - } - Expression::If { - condition, - on_true, - on_false, - } => { - self.analyse(condition)?; - let true_type = self.analyse(on_true)?; - let false_type = if let Some(on_false) = on_false { - self.analyse(on_false)? - } else { - StaticType::unit() - }; - - if true_type != StaticType::unit() { - // TODO: Emit warning for not using a semicolon in this if - } - - if true_type != false_type { - // TODO maybe show warning? - } - - Ok(true_type.lub(&false_type)) - } - Expression::While { - expression, - loop_body, - } => { - self.analyse(expression)?; - self.analyse(loop_body)?; - Ok(StaticType::unit()) - } - Expression::For { iterations, body } => { - Ok(self.resolve_for_iterations(iterations, body, *span)?) - } - Expression::Call { - function, - arguments, - } => { - let mut type_sig = Vec::with_capacity(arguments.len()); - for a in arguments { - type_sig.push(self.analyse(a)?); - } - - let StaticType::Function { return_type, .. } = - self.resolve_function_with_argument_types(function, &type_sig, *span)? - else { - // If we couldn't resolve the identifier to a function we have to just assume that - // whatever identifier we did find is a function at runtime and will return Any - return Ok(StaticType::Any); - }; - - Ok(*return_type) - } - Expression::Index { index, value } => { - self.analyse(index)?; - let container_type = self.analyse(value)?; - - container_type - .index_element_type() - .ok_or_else(|| AnalysisError::unable_to_index_into(&container_type, *span)) - } - Expression::Tuple { values } => { - let mut types = Vec::with_capacity(values.len()); - for v in values { - types.push(self.analyse(v)?); - } - - Ok(StaticType::Tuple(types)) - } - Expression::List { values } => { - let element_type = self.analyse_multiple_expression_with_same_type(values)?; - - // TODO: for now if we encounter an empty list expression we say the list is generic over Any but this clearly is not a good solution - Ok(StaticType::List(Box::new( - element_type.unwrap_or(StaticType::Any), - ))) - } - Expression::Map { values, default } => { - let mut key_type: Option = None; - let mut value_type: Option = None; - for (key, value) in values { - // let map = %{ - // "key": 1, - // 10: 1, - // } - if let Some(key_type) = &mut key_type { - let next_type = self.analyse(key)?; - *key_type = key_type.lub(&next_type); - } else { - key_type = Some(self.analyse(key)?); - } - if let Some(value) = value { - if let Some(value_type) = &mut value_type { - let next_type = self.analyse(value)?; - if &next_type != value_type { - *value_type = value_type.lub(&next_type); - } - } else { - value_type = Some(self.analyse(value)?); - } - } - } - - if let Some(default) = default { - self.analyse(default)?; - } - - // TODO: defaulting to Any here is surely going to bite us later - Ok(StaticType::Map { - key: Box::new(key_type.unwrap_or(StaticType::Any)), - value: Box::new(value_type.unwrap_or_else(StaticType::unit)), - }) - } - // Return evaluates to the type of the expression it returns, which makes type checking easier! - // Actually it doesn't seem to make it any easier - Expression::Return { value } => self.analyse(value), - Expression::RangeInclusive { start, end } - | Expression::RangeExclusive { start, end } => { - if let Some(start) = start { - self.analyse(start)?; - } - if let Some(end) = end { - self.analyse(end)?; - } - - Ok(StaticType::Iterator(Box::new(StaticType::Int))) - } - } - } - fn resolve_function_with_argument_types( - &mut self, - ident: &mut ExpressionLocation, - argument_types: &[StaticType], - span: Span, - ) -> Result { - let ExpressionLocation { - expression: Expression::Identifier { name, resolved }, - .. - } = ident - else { - // It's possible that we're not trying to invoke an identifier `foo()` but instead we're - // invoking a value like `get_function()()` so in this case we just continue like normal? - return self.analyse(ident); - }; - - // println!("resolve fn {name} {}", argument_types.iter().join(", ")); - - let binding = self.scope_tree.resolve_function2(name, argument_types); - - let out_type = match &binding { - Binding::None => { - return Err(AnalysisError::function_not_found( - name, - argument_types, - span, - )); - } - Binding::Resolved(res) => self.scope_tree.get_type(*res).clone(), - - // TODO: are we just going to lie about the type or is this just how truthful we can be - Binding::Dynamic(_) => StaticType::Function { - parameters: None, - return_type: Box::new(StaticType::Any), - }, - }; - - *resolved = binding; - - Ok(out_type) - } - fn resolve_for_iterations( - &mut self, - iterations: &mut [ForIteration], - body: &mut ForBody, - span: Span, - ) -> Result { - let Some((iteration, tail)) = iterations.split_first_mut() else { - unreachable!("because this function is never called with an empty slice"); - }; - - let mut do_destroy = false; - match iteration { - ForIteration::Iteration { l_value, sequence } => { - let sequence_type = self.analyse(sequence)?; - - self.scope_tree.new_scope(); - - // TODO: when we give type parameters to all instances of sequence we can correctly infer StaticType::Any in this position - self.resolve_lvalue_declarative( - l_value, - sequence_type - .sequence_element_type() - .unwrap_or(StaticType::Any), - span, - )?; - do_destroy = true; // TODO: why is this correct - } - ForIteration::Guard(expr) => { - self.analyse(expr)?; - } - } - - let out_type = if !tail.is_empty() { - self.resolve_for_iterations(tail, body, span)? - } else { - match body { - ForBody::Block(block) => { - self.analyse(block)?; - StaticType::unit() - } - ForBody::List(list) => StaticType::List(Box::new(self.analyse(list)?)), - ForBody::Map { - key, - value, - default, - } => { - let key_type = self.analyse(key)?; - let value_type = if let Some(value) = value { - self.analyse(value)? - } else { - StaticType::unit() - }; - - if let Some(default) = default { - self.analyse(default)?; - } - - StaticType::Map { - key: Box::new(key_type), - value: Box::new(value_type), - } - } - } - }; - - if do_destroy { - self.scope_tree.destroy_scope(); - } - - Ok(out_type) - } - - fn resolve_single_lvalue( - &mut self, - lvalue: &mut Lvalue, - span: Span, - ) -> Result { - match lvalue { - Lvalue::Identifier { - identifier, - resolved, - .. - } => { - let Some(target) = self.scope_tree.get_binding_any(identifier) else { - return Err(AnalysisError::identifier_not_previously_declared( - identifier, span, - )); - }; - - *resolved = Some(target); - - Ok(self.scope_tree.get_type(target).clone()) - } - Lvalue::Index { index, value } => { - self.analyse(index)?; - let type_of_index_target = self.analyse(value)?; - - type_of_index_target - .index_element_type() - .ok_or_else(|| AnalysisError::unable_to_index_into(&type_of_index_target, span)) - } - Lvalue::Sequence(_) => { - Err(AnalysisError::lvalue_required_to_be_single_identifier(span)) - } - } - } - - fn resolve_lvalue(&mut self, lvalue: &mut Lvalue, span: Span) -> Result<(), AnalysisError> { - match lvalue { - Lvalue::Identifier { - identifier, - resolved, - .. - } => { - let Some(target) = self.scope_tree.get_binding_any(identifier) else { - return Err(AnalysisError::identifier_not_previously_declared( - identifier, span, - )); - }; - - *resolved = Some(target); - } - Lvalue::Index { index, value } => { - self.analyse(index)?; - self.analyse(value)?; - } - Lvalue::Sequence(seq) => { - for sub_lvalue in seq { - self.resolve_lvalue(sub_lvalue, span)? - } - } - } - - Ok(()) - } - - /// Resolve expressions as arguments to a function and return the function arity - fn resolve_parameters_declarative( - &mut self, - arguments: &mut ExpressionLocation, - ) -> Result, AnalysisError> { - let mut types: Vec = Vec::new(); - let mut names: Vec<&str> = Vec::new(); - - let ExpressionLocation { - expression: Expression::Tuple { values }, - .. - } = arguments - else { - panic!("expected arguments to be tuple"); - }; - - for arg in values { - let ExpressionLocation { - expression: Expression::Identifier { name, resolved }, - span, - } = arg - else { - panic!("expected tuple values to be ident"); - }; - - // TODO: big challenge how do we figure out the function parameter types? - // it seems like this is something we need an HM like system for!? - let resolved_type = StaticType::Any; - types.push(resolved_type.clone()); - if names.contains(&name.as_str()) { - return Err(AnalysisError::parameter_redefined(name, *span)); - } - names.push(name); - - *resolved = Binding::Resolved( - self.scope_tree - .create_local_binding((*name).clone(), resolved_type), - ); - } - - Ok(types) - } - fn resolve_lvalue_declarative( - &mut self, - lvalue: &mut Lvalue, - typ: StaticType, - span: Span, - ) -> Result<(), AnalysisError> { - match lvalue { - Lvalue::Identifier { - identifier, - resolved, - inferred_type, - .. - } => { - *resolved = Some( - self.scope_tree - .create_local_binding(identifier.clone(), typ.clone()), - ); - *inferred_type = Some(typ); - } - Lvalue::Index { index, value } => { - self.analyse(index)?; - self.analyse(value)?; - } - Lvalue::Sequence(seq) => { - let sub_types = typ - .unpack() - .ok_or_else(|| AnalysisError::unable_to_unpack_type(&typ, span))?; - - for (sub_lvalue, sub_lvalue_type) in seq.iter_mut().zip(sub_types) { - self.resolve_lvalue_declarative( - sub_lvalue, - sub_lvalue_type.clone(), - /* todo: figure out how to narrow this span */ span, - )? - } - } - } - - Ok(()) - } - fn analyse_multiple_expression_with_same_type( - &mut self, - expressions: &mut Vec, - ) -> Result, AnalysisError> { - let mut element_type: Option = None; - - for expression in expressions { - if let Some(element_type) = &mut element_type { - let following_type = self.analyse(expression)?; - - *element_type = element_type.lub(&following_type); - } else { - element_type = Some(self.analyse(expression)?); - } - } - - Ok(element_type) - } -} - -fn extract_argument_arity(arguments: &ExpressionLocation) -> usize { - let ExpressionLocation { - expression: Expression::Tuple { values }, - .. - } = arguments - else { - panic!("expected arguments to be tuple"); - }; - - values.len() -} - -#[derive(Debug, Clone)] -pub struct ScopeTree { - current_scope_idx: usize, - global_scope: Scope, - scopes: Vec, -} - -impl ScopeTree { - pub fn from_global_scope(global_scope_map: Vec<(String, StaticType)>) -> Self { - Self { - current_scope_idx: 0, - global_scope: Scope { - parent_idx: None, - identifiers: global_scope_map, - }, - scopes: vec![Scope::new(None)], - } - } - - fn get_type(&self, res: ResolvedVar) -> &StaticType { - match res { - ResolvedVar::Captured { slot, depth } => { - let mut scope_idx = self.current_scope_idx; - let mut depth = depth; - while depth > 0 { - depth -= 1; - scope_idx = self.scopes[scope_idx] - .parent_idx - .expect("parent_idx was None while traversing the scope tree"); - } - &self.scopes[scope_idx].identifiers[slot].1 - } - // for now all globals are functions - ResolvedVar::Global { slot } => &self.global_scope.identifiers[slot].1, - } - } - - fn new_scope(&mut self) -> &Scope { - let old_scope_idx = self.current_scope_idx; - self.current_scope_idx = self.scopes.len(); - let new_scope = Scope::new(Some(old_scope_idx)); - self.scopes.push(new_scope); - &self.scopes[self.current_scope_idx] - } - - fn destroy_scope(&mut self) { - let next = self.scopes[self.current_scope_idx] - .parent_idx - .expect("tried to destroy scope while there were none"); - self.current_scope_idx = next; - } - - fn get_binding_any(&mut self, ident: &str) -> Option { - let mut depth = 0; - let mut scope_ptr = self.current_scope_idx; - - loop { - if let Some(slot) = self.scopes[scope_ptr].find_slot_by_name(ident) { - return Some(ResolvedVar::Captured { slot, depth }); - } else if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { - depth += 1; - scope_ptr = parent_idx; - } else { - return Some(ResolvedVar::Global { - slot: self.global_scope.find_slot_by_name(ident)?, - }); - } - } - } - - fn resolve_function_dynamic(&mut self, ident: &str, sig: &[StaticType]) -> Vec { - let mut depth = 0; - let mut scope_ptr = self.current_scope_idx; - - loop { - let candidates = self.scopes[scope_ptr].find_function_candidates(ident, sig); - if !candidates.is_empty() { - return candidates - .into_iter() - .map(|slot| ResolvedVar::Captured { slot, depth }) - .collect(); - } else if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { - depth += 1; - scope_ptr = parent_idx; - } else { - return self - .global_scope - .find_function_candidates(ident, sig) - .into_iter() - .map(|slot| ResolvedVar::Global { slot }) - .collect(); - } - } - } - - fn resolve_function2(&mut self, ident: &str, sig: &[StaticType]) -> Binding { - self.resolve_function(ident, sig) - .map(Binding::Resolved) - .or_else(|| { - let loose_bindings = self.resolve_function_dynamic(ident, sig); - - if loose_bindings.is_empty() { - return None; - } - - Some(Binding::Dynamic(loose_bindings)) - }) - // If we can't find any function in scope that could match, fall back to all same-named - // bindings so runtime dynamic dispatch (including vectorization) can pick the right one. - .or_else(|| { - let all_bindings = self.get_all_bindings_by_name(ident); - if all_bindings.is_empty() { - return None; - } - Some(Binding::Dynamic(all_bindings)) - }) - .unwrap_or(Binding::None) - } - - fn get_all_bindings_by_name(&self, ident: &str) -> Vec { - let mut results = Vec::new(); - let mut depth = 0; - let mut scope_ptr = self.current_scope_idx; - - loop { - let slots = self.scopes[scope_ptr].find_all_slots_by_name(ident); - results.extend( - slots - .into_iter() - .map(|slot| ResolvedVar::Captured { slot, depth }), - ); - - if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { - depth += 1; - scope_ptr = parent_idx; - } else { - let global_slots = self.global_scope.find_all_slots_by_name(ident); - results.extend( - global_slots - .into_iter() - .map(|slot| ResolvedVar::Global { slot }), - ); - break; - } - } - - results - } - - fn resolve_function(&mut self, ident: &str, arg_types: &[StaticType]) -> Option { - let mut depth = 0; - let mut scope_ptr = self.current_scope_idx; - - loop { - if let Some(slot) = self.scopes[scope_ptr].find_function(ident, arg_types) { - return Some(ResolvedVar::Captured { slot, depth }); - } else if let Some(parent_idx) = self.scopes[scope_ptr].parent_idx { - depth += 1; - scope_ptr = parent_idx; - } else { - return Some(ResolvedVar::Global { - slot: self.global_scope.find_function(ident, arg_types)?, - }); - } - } - } - - fn create_local_binding(&mut self, ident: String, typ: StaticType) -> ResolvedVar { - ResolvedVar::Captured { - slot: self.scopes[self.current_scope_idx].allocate(ident, typ), - depth: 0, - } - } - - fn update_binding_type(&mut self, var: ResolvedVar, new_type: StaticType) { - let ResolvedVar::Captured { slot, depth } = var else { - panic!("update_binding_type called with a global binding"); - }; - let mut scope_idx = self.current_scope_idx; - let mut remaining = depth; - while remaining > 0 { - remaining -= 1; - scope_idx = self.scopes[scope_idx] - .parent_idx - .expect("parent_idx was None while traversing the scope tree"); - } - self.scopes[scope_idx].identifiers[slot].1 = new_type; - } -} - -#[derive(Debug, Clone)] -struct Scope { - parent_idx: Option, - identifiers: Vec<(String, StaticType)>, -} - -impl Scope { - fn new(parent_idx: Option) -> Self { - Self { - parent_idx, - identifiers: Default::default(), - } - } - - pub fn find_slot_by_name(&self, find_ident: &str) -> Option { - self.identifiers - .iter() - .rposition(|(ident, _)| ident == find_ident) - } - - fn find_all_slots_by_name(&self, find_ident: &str) -> Vec { - self.identifiers - .iter() - .enumerate() - .filter_map(|(slot, (ident, _))| { - if ident == find_ident { - Some(slot) - } else { - None - } - }) - .collect() - } - - fn find_function_candidates(&self, find_ident: &str, find_types: &[StaticType]) -> Vec { - self.identifiers.iter() - .enumerate() - .rev() - .filter_map(|(slot, (ident, typ))| { - if ident != find_ident { - return None; - } - - // If the thing is not a function we're not interested - let StaticType::Function { parameters, .. } = typ else { - return None; - }; - - let Some(param_types) = parameters else { - // If this branch happens then the function we're matching against is variadic meaning it's always a match - debug_assert!(false, "we should never be calling find_function_candidates if there were variadic matches"); - // TODO: Change to unreachable? - return Some(slot); - }; - - let is_good = param_types.len() == find_types.len() - && param_types.iter().zip(find_types.iter()).all(|(typ_1, typ_2)| !typ_1.is_incompatible_with(typ_2)); - - is_good.then_some(slot) - }) - .collect() - } - fn find_function(&self, find_ident: &str, find_types: &[StaticType]) -> Option { - self.identifiers - .iter() - .rposition(|(ident, typ)| ident == find_ident && typ.is_fn_and_matches(find_types)) - } - - fn allocate(&mut self, name: String, typ: StaticType) -> usize { - self.identifiers.push((name, typ)); - // Slot is just the length of the list minus one - self.identifiers.len() - 1 - } -} -#[derive(thiserror::Error, Debug)] -#[error("{text}")] -pub struct AnalysisError { - text: String, - span: Span, -} - -impl AnalysisError { - pub fn span(&self) -> Span { - self.span - } - fn parameter_redefined(param: &str, span: Span) -> Self { - Self { - text: format!("Illegal redefinition of parameter {param}"), - span, - } - } - fn unable_to_index_into(typ: &StaticType, span: Span) -> Self { - Self { - text: format!("Unable to index into {typ}"), - span, - } - } - fn unable_to_unpack_type(typ: &StaticType, span: Span) -> Self { - Self { - text: format!("Invalid unpacking of {typ}"), - span, - } - } - fn lvalue_required_to_be_single_identifier(span: Span) -> Self { - Self { - text: "This lvalue is required to be a single identifier".to_string(), - span, - } - } - - fn function_not_found(ident: &str, types: &[StaticType], span: Span) -> Self { - Self { - text: format!( - "No function called '{ident}' found that matches the arguments '{}'", - types.iter().join(", ") - ), - span, - } - } - - fn identifier_not_previously_declared(ident: &str, span: Span) -> Self { - Self { - text: format!("Identifier {ident} has not previously been declared"), - span, - } - } -} - -impl Debug for Analyser { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - writeln!(f)?; - for (id, scope) in self.scope_tree.scopes.iter().enumerate() { - writeln!(f, "{id}: {scope:?}")?; - } - - Ok(()) - } -} diff --git a/ndc_interpreter/src/semantic/mod.rs b/ndc_interpreter/src/semantic/mod.rs deleted file mode 100644 index 32c1c0de..00000000 --- a/ndc_interpreter/src/semantic/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod analyser; diff --git a/ndc_interpreter/src/sequence.rs b/ndc_interpreter/src/sequence.rs deleted file mode 100644 index db265840..00000000 --- a/ndc_interpreter/src/sequence.rs +++ /dev/null @@ -1,278 +0,0 @@ -use crate::hash_map::HashMap; -use crate::function::StaticType; -use crate::heap::{MaxHeap, MinHeap}; -use crate::iterator::ValueIterator; -use crate::value::Value; -use std::cell::RefCell; -use std::cmp::Ordering; -use std::collections::VecDeque; -use std::fmt; -use std::rc::Rc; - -pub type DefaultMap<'a> = (&'a HashMap, Option>); -pub type DefaultMapMut<'a> = (&'a mut HashMap, Option>); -pub type ListRepr = Rc>>; -pub type TupleRepr = Rc>; -pub type MapRepr = Rc>>; -pub type StringRepr = Rc>; - -#[derive(Clone)] -pub enum Sequence { - String(Rc>), - List(ListRepr), - Tuple(TupleRepr), - Map(Rc>>, Option>), - Iterator(Rc>), - MaxHeap(Rc>), - MinHeap(Rc>), - Deque(Rc>>), -} - -impl Sequence { - #[must_use] - pub fn length(&self) -> Option { - match self { - Self::String(string) => Some(string.borrow().chars().count()), - Self::List(list) => Some(list.borrow().len()), - Self::Tuple(tup) => Some(tup.len()), - Self::Map(map, _) => Some(map.borrow().len()), - Self::Iterator(_iter) => None, - Self::MaxHeap(heap) => Some(heap.borrow().len()), - Self::MinHeap(heap) => Some(heap.borrow().len()), - Self::Deque(deque) => Some(deque.borrow().len()), - } - } - - pub fn static_type(&self) -> StaticType { - match self { - Self::String(_) => StaticType::String, - // I predict that defaulting to Any is going to make us very sad one day - Self::List(l) => StaticType::List(Box::new( - l.borrow() - .iter() - .next() - .map(|i| i.static_type()) - .unwrap_or(StaticType::Any), - )), - Self::Tuple(t) => StaticType::Tuple(t.iter().map(Value::static_type).collect()), - Self::Map(map, _) => StaticType::Map { - key: Box::new( - map.borrow() - .keys() - .next() - .map(Value::static_type) - .unwrap_or(StaticType::Any), - ), - value: Box::new( - map.borrow() - .values() - .next() - .map(Value::static_type) - .unwrap_or(StaticType::Any), - ), - }, - // TODO: we can't infer the type of iterators at runtime, unless we implement peek (CAN WE?) - Self::Iterator(_) => StaticType::Iterator(Box::new(StaticType::Any)), - - Self::MaxHeap(heap) => StaticType::MaxHeap(Box::new( - heap.borrow() - .iter() - .max() - .map(|elem| elem.0.static_type()) - .unwrap_or(StaticType::Any), - )), - Self::MinHeap(heap) => StaticType::MinHeap(Box::new( - heap.borrow() - .iter() - .min() - .map(|elem| elem.0.0.static_type()) - .unwrap_or(StaticType::Any), - )), - Self::Deque(d) => StaticType::Deque(Box::new( - d.borrow() - .iter() - .next() - .map(Value::static_type) - .unwrap_or(StaticType::Any), - )), - } - } - #[must_use] - pub fn deepcopy(&self) -> Self { - match self { - Self::List(l) => Self::List(Rc::new(RefCell::new( - l.borrow() - .iter() - .map(Value::deepcopy) - .collect::>(), - ))), - Self::Map(m, def) => Self::Map( - Rc::new(RefCell::new( - m.borrow() - .iter() - .map(|(key, value)| (key.deepcopy(), value.deepcopy())) - .collect::>(), - )), - def.as_deref().map(|v| Box::new(v.deepcopy())), - ), - // Since tuple has copy on write semantics we just don't do a deepcopy and nobody will know - Self::Tuple(t) => Self::Tuple(t.clone()), - Self::MaxHeap(heap) => Self::MaxHeap(Rc::new(RefCell::new( - heap.borrow() - .iter() - .map(|v| v.deepcopy()) - .collect::(), - ))), - Self::MinHeap(heap) => Self::MinHeap(Rc::new(RefCell::new( - heap.borrow() - .iter() - .map(|v| v.0.deepcopy()) - .collect::(), - ))), - Self::Deque(deque) => Self::Deque(Rc::new(RefCell::new( - deque - .borrow() - .iter() - .map(|v| v.deepcopy()) - .collect::>(), - ))), - Self::String(s) => Self::String(Rc::new(RefCell::new(s.borrow().to_string()))), - Self::Iterator(i) => { - Self::Iterator(Rc::new(RefCell::new(ValueIterator::clone(&*i.borrow())))) // ??? - } - } - } - - #[must_use] - pub fn contains(&self, needle: &Value) -> bool { - match self { - Self::String(string) => match needle { - Value::Sequence(Self::String(needle)) => { - if Rc::ptr_eq(string, needle) { - return true; - } - - string.borrow().contains(needle.borrow().as_str()) - } - _ => false, - }, - Self::List(list) => list.borrow().contains(needle), - Self::Tuple(tuple) => tuple.contains(needle), - Self::Map(map, _) => map.borrow().contains_key(needle), - Self::Iterator(iterator) => { - let iter = ValueIterator::clone(&*iterator.borrow()); - match iter { - ValueIterator::ValueRange(range) => range.contains(needle), - ValueIterator::ValueRangeFrom(range) => range.contains(needle), - ValueIterator::ValueRangeInclusive(range) => range.contains(needle), - ValueIterator::Repeat(repeat) => &repeat.value == needle, - } - } - Self::MaxHeap(heap) => heap.borrow().iter().any(|v| &v.0 == needle), - Self::MinHeap(heap) => heap.borrow().iter().any(|v| &v.0.0 == needle), - Self::Deque(deque) => deque.borrow().contains(needle), - } - } -} - -impl PartialEq for Sequence { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (Self::String(a), Self::String(b)) => a == b, - (Self::List(a), Self::List(b)) => a == b, - (Self::Tuple(a), Self::Tuple(b)) => a == b, - (Self::Map(a, _), Self::Map(b, _)) => a == b, - (Self::Deque(a), Self::Deque(b)) => a == b, - - // These types can't really be compared for equality so they will just return true if they point to the same memory addr - (Self::MaxHeap(a), Self::MaxHeap(b)) => Rc::ptr_eq(a, b), - (Self::MinHeap(a), Self::MinHeap(b)) => Rc::ptr_eq(a, b), - (Self::Iterator(a), Self::Iterator(b)) => Rc::ptr_eq(a, b), - - _ => false, - } - } -} - -impl Eq for Sequence {} - -impl PartialOrd for Sequence { - fn partial_cmp(&self, other: &Self) -> Option { - match (self, other) { - (Self::String(left), Self::String(right)) => left.partial_cmp(right), - (Self::List(left), Self::List(right)) => left.partial_cmp(right), - (Self::Tuple(left), Self::Tuple(right)) => left.partial_cmp(right), - _ => None, - } - } -} - -impl fmt::Debug for Sequence { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::String(s) => write!(f, "\"{}\"", s.borrow()), - Self::List(vs) => { - write!(f, "[")?; - let vs = vs.borrow(); - let mut vs = vs.iter().peekable(); - while let Some(v) = vs.next() { - if vs.peek().is_some() { - write!(f, "{v:?},")?; - } else { - write!(f, "{v:?}")?; - } - } - write!(f, "]") - } - Self::Tuple(vs) => { - write!(f, "(")?; - let mut iter = vs.iter().peekable(); - while let Some(v) = iter.next() { - write!(f, "{v:?}")?; - if iter.peek().is_some() { - write!(f, ",")?; - } - } - write!(f, ")") - } - Self::Map(dict, default) => { - let dict = dict.borrow(); - let mut iter = dict.iter().peekable(); - if let Some(default) = default { - write!(f, "{{default: {default:?}")?; - if iter.peek().is_some() { - write!(f, ",")?; - } - } else { - write!(f, "{{")?; - } - while let Some((key, value)) = iter.next() { - match value { - Value::Option(opt) if opt.is_none() => write!(f, "{key:?}")?, - _ => write!(f, "{key:?}: {value:?}")?, - } - - if iter.peek().is_some() { - write!(f, ",")?; - } - } - write!(f, "}}") - } - Self::Iterator(_) => { - write!(f, "Iterator") - } - Self::MaxHeap(h) => write!(f, "MaxHeap(len={})", h.borrow().len()), - Self::MinHeap(h) => write!(f, "MinHeap(len={})", h.borrow().len()), - Self::Deque(d) => write!(f, "Deque(len={})", d.borrow().len()), - } - } -} - -impl fmt::Display for Sequence { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::String(s) => write!(f, "{}", s.borrow()), - otherwise => write!(f, "{otherwise:?}"), - } - } -} diff --git a/ndc_interpreter/src/value.rs b/ndc_interpreter/src/value.rs deleted file mode 100644 index 52a588d9..00000000 --- a/ndc_interpreter/src/value.rs +++ /dev/null @@ -1,621 +0,0 @@ -use std::cell::RefCell; -use std::cmp::Ordering; -use std::fmt; -use std::hash::{Hash, Hasher}; -use std::ops::{Range, RangeFrom, RangeInclusive}; -use std::rc::Rc; - -use itertools::Itertools; -use num::BigInt; - -use crate::compare::FallibleOrd; -use crate::hash_map::DefaultHasher; -use crate::function::{Function, StaticType}; -use crate::int::Int; -use crate::num::{Number, NumberToFloatError, NumberToUsizeError}; -use crate::sequence::Sequence; - -use super::iterator::{ValueIterator, ValueRange, ValueRangeFrom, ValueRangeInclusive}; - -/// Enumerates all the different types of values that exist in the language -/// All values should be pretty cheap to clone because the bigger ones are wrapped using Rc's -#[derive(Clone)] -pub enum Value { - Option(Option>), - Number(Number), - Bool(bool), - Sequence(Sequence), - Function(Rc), -} - -impl Value { - pub fn function(function: Function) -> Self { - Self::Function(Rc::new(function)) - } - pub fn string>(string: S) -> Self { - Self::Sequence(Sequence::String(Rc::new(RefCell::new(string.into())))) - } - - pub fn list>>(data: V) -> Self { - Self::Sequence(Sequence::List(Rc::new(RefCell::new(data.into())))) - } - - pub fn collect_list(i: I) -> Self - where - I: Iterator, - V: Into, - { - Self::list(i.map(Into::into).collect::>()) - } - - pub fn tuple>>(data: V) -> Self { - Self::Sequence(Sequence::Tuple(Rc::new(data.into()))) - } - - pub fn unit() -> Self { - Self::Sequence(Sequence::Tuple(Rc::new(vec![]))) - } - - pub fn none() -> Self { - Self::Option(None) - } - - pub fn some(value: Self) -> Self { - Self::Option(Some(Box::new(value))) - } - - pub fn number>(source: T) -> Self { - Self::Number(source.into()) - } - - /// If this value is a type of `Sequence` it returns the length of the sequence, otherwise it returns `None` - #[must_use] - pub fn sequence_length(&self) -> Option { - match self { - Self::Sequence(seq) => seq.length(), - _ => None, - } - } - - pub fn try_into_vec(self) -> Option> { - match self { - Self::Sequence(Sequence::List(list)) => match Rc::try_unwrap(list) { - // This short circuit is almost certainly wrong because take will panic if list is borrowed - Ok(list) => Some(list.into_inner()), - Err(list) => Some(Vec::clone(&*list.borrow())), - }, - Self::Sequence(Sequence::Tuple(list)) => match Rc::try_unwrap(list) { - Ok(list) => Some(list), - Err(list) => Some(Vec::clone(&list)), - }, - Self::Sequence(Sequence::Map(map, _)) => { - Some(map.borrow().keys().cloned().collect_vec()) - } - Self::Sequence(Sequence::String(string)) => match Rc::try_unwrap(string) { - // This implementation is peak retard, we don't want collect_vec here - // ^-- WTF: is this comment, we collect_vec here anyway? - Ok(string) => Some(string.into_inner().chars().map(Self::from).collect_vec()), - Err(string) => Some(string.borrow().chars().map(Self::from).collect_vec()), - }, - _ => None, - } - } - - #[must_use] - pub fn static_type(&self) -> StaticType { - match self { - Self::Option(c) => StaticType::Option(Box::new( - c.as_deref().map_or(StaticType::Any, Self::static_type), - )), - Self::Number(number) => number.static_type(), - Self::Bool(_) => StaticType::Bool, - - Self::Sequence(s) => s.static_type(), - Self::Function(fun) => fun.static_type(), - } - } - - #[must_use] - pub fn empty_list() -> Self { - Self::Sequence(Sequence::List(Rc::new(RefCell::new(vec![])))) - } - - #[must_use] - pub fn deepcopy(&self) -> Self { - match self { - Self::Sequence(seq) => Self::Sequence(seq.deepcopy()), - // For all non non-sequence types we can just use clone since they don't have interior mutability - v => v.clone(), - } - } - - #[must_use] - pub fn supports_vectorization_with(&self, other: &Self) -> bool { - self.static_type() - .supports_vectorization_with(&other.static_type()) - } -} - -impl FallibleOrd for Value { - type Error = anyhow::Error; - - // TODO: do we really want to use anyhow here? - fn try_cmp(&self, other: &Self) -> anyhow::Result { - self.partial_cmp(other).ok_or_else(|| { - anyhow::anyhow!( - "{} cannot be compared to {}", - self.static_type(), - other.static_type() - ) - }) - } -} - -// TODO: is there a way to get rid of this implementation?!??! -impl FallibleOrd for &Value { - type Error = anyhow::Error; - - fn try_cmp(&self, other: &Self) -> Result { - self.partial_cmp(other).ok_or_else(|| { - anyhow::anyhow!( - "{} cannot be compared to {}", - self.static_type(), - other.static_type() - ) - }) - } -} - -// TODO: Remove this and deal with the fallout -impl From<&Self> for Value { - fn from(value: &Self) -> Self { - value.clone() - } -} - -impl Hash for Value { - fn hash(&self, state: &mut H) { - match self { - Self::Option(o) => { - state.write_u8(1); - o.hash(state); - } - Self::Number(number) => { - state.write_u8(2); - number.hash(state); - } - Self::Bool(true) => state.write_u8(3), - Self::Bool(false) => state.write_u8(4), - Self::Sequence(seq) => match seq { - Sequence::String(string) => { - state.write_u8(5); - string.borrow().hash(state); - } - Sequence::List(list) => { - state.write_u8(6); - for item in list.borrow().iter() { - item.hash(state); - } - } - Sequence::Tuple(list) => { - state.write_u8(7); - for item in list.iter() { - item.hash(state); - } - } - // NOTE: the default value is not party of the identity of the map so %{1,2,3} == {:0,1,2,3} - Sequence::Map(dict, _) => { - state.write_u8(8); - // This is 1 to 1 ripped from Noulith, and it's meant to ensure that if sets - // are equal {1,2,3} == {3,2,1} they produce the same hash regardless of the - // order the element appear in - - let mut acc = 0u64; - let mut cube_acc = 0u64; - for (key, value) in dict.borrow().iter() { - let mut hasher = DefaultHasher::default(); - key.hash(&mut hasher); - value.hash(&mut hasher); - - let f = hasher.finish(); - acc = acc.wrapping_add(f); - cube_acc = cube_acc.wrapping_add(f.wrapping_mul(f)); - } - state.write_u64(acc); - state.write_u64(cube_acc); - } - Sequence::Iterator(i) => { - state.write_u8(9); - Rc::as_ptr(i).hash(state); - } - Sequence::MaxHeap(h) => { - state.write_u8(10); - Rc::as_ptr(h).hash(state); - } - Sequence::MinHeap(h) => { - state.write_u8(11); - Rc::as_ptr(h).hash(state); - } - Sequence::Deque(list) => { - state.write_u8(12); - for item in list.borrow().iter() { - item.hash(state); - } - } - }, - Self::Function(f) => { - state.write_u8(13); - Rc::as_ptr(f).hash(state); - } - } - } -} - -impl PartialEq for Value { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (Self::Option(o1), Self::Option(o2)) => o1 == o2, - (Self::Number(n1), Self::Number(n2)) => n1.eq(n2), - (Self::Bool(b1), Self::Bool(b2)) => b1.eq(b2), - (Self::Sequence(s1), Self::Sequence(s2)) => s1.eq(s2), - (Self::Function(f1), Self::Function(f2)) => Rc::as_ptr(f1) == Rc::as_ptr(f2), - _ => false, - } - } -} - -impl Eq for Value {} - -impl PartialOrd for Value { - fn partial_cmp(&self, other: &Self) -> Option { - match (self, other) { - (Self::Option(left), Self::Option(right)) => left.partial_cmp(right), - (Self::Number(left), Self::Number(right)) => left.partial_cmp(right), - (Self::Sequence(left), Self::Sequence(right)) => left.partial_cmp(right), - (Self::Bool(left), Self::Bool(right)) => left.partial_cmp(right), - // Functions definitely don't have an order - // Things that are different don't have an order either - _ => None, - } - } -} - -// ----------------------------------------------------- -// Into value -// ----------------------------------------------------- - -impl From<()> for Value { - fn from(_value: ()) -> Self { - Self::tuple(vec![]) - } -} - -impl From for Value { - fn from(value: bool) -> Self { - Self::Bool(value) - } -} - -impl From for Value { - fn from(value: f64) -> Self { - Self::Number(Number::Float(value)) - } -} -impl From for Value { - fn from(value: i64) -> Self { - Self::Number(Number::Int(Int::Int64(value))) - } -} - -impl From for Value { - fn from(value: i32) -> Self { - Self::Number(Number::Int(Int::Int64(i64::from(value)))) - } -} - -impl From for Value { - fn from(value: char) -> Self { - Self::string(value) - } -} - -impl From for Value { - fn from(value: usize) -> Self { - i64::try_from(value).map_or_else(|_| Self::from(BigInt::from(value)), Self::from) - } -} - -impl From for Value { - fn from(value: BigInt) -> Self { - Self::Number(Number::Int(Int::BigInt(value))) - } -} - -impl From for Value { - fn from(value: String) -> Self { - Self::Sequence(Sequence::String(Rc::new(RefCell::new(value)))) - } -} - -impl From<&str> for Value { - fn from(value: &str) -> Self { - Self::Sequence(Sequence::String(Rc::new(RefCell::new(value.to_string())))) - } -} - -impl From for Value { - fn from(value: ValueIterator) -> Self { - Self::Sequence(Sequence::Iterator(Rc::new(RefCell::new(value)))) - } -} - -impl<'a> TryFrom<&'a mut Value> for &'a mut Sequence { - type Error = &'static str; - fn try_from(value: &'a mut Value) -> Result { - match value { - Value::Sequence(seq) => Ok(seq), - _ => Err("Kapot"), - } - } -} - -impl<'a> From<&'a mut Value> for &'a Value { - fn from(value: &'a mut Value) -> Self { - &*value - } -} - -impl From for Value { - fn from(value: Number) -> Self { - Self::Number(value) - } -} - -impl From for Value { - fn from(value: Sequence) -> Self { - Self::Sequence(value) - } -} - -impl From> for Value { - fn from(value: RangeInclusive) -> Self { - Self::from(ValueIterator::ValueRangeInclusive(ValueRangeInclusive( - value, - ))) - } -} - -impl From> for Value { - fn from(value: RangeFrom) -> Self { - Self::from(ValueIterator::ValueRangeFrom(ValueRangeFrom(value))) - } -} - -impl From> for Value { - fn from(value: Range) -> Self { - Self::from(ValueIterator::ValueRange(ValueRange(value))) - } -} - -// ----------------------------------------------------- -// Out of value -// ----------------------------------------------------- - -#[derive(thiserror::Error, Debug)] -pub enum ConversionError { - #[error("Cannot convert {0} into {1}")] - UnsupportedVariant(StaticType, &'static str), - - #[error("Cannot into {0} because the length is incorrect")] - IncorrectLength(&'static str), - - #[error("{0}")] - NumberToUsizeError(#[from] NumberToUsizeError), - - #[error("{0}")] - NumberToFloatError(#[from] NumberToFloatError), -} - -/// `TryFrom` implementation to convert a `Sequence::Tuple` into (Value, Value) -/// this is used in the implementation where we iterate over a hashmap -impl TryFrom for (Value, Value) { - type Error = ConversionError; - - fn try_from(value: Value) -> Result { - let Value::Sequence(Sequence::Tuple(tuple)) = value else { - return Err(ConversionError::UnsupportedVariant( - value.static_type(), - stringify!((Value, Value)), - )); - }; - - // If we can take ownership of the vector we use pop otherwise we use get + clone - let (right, left) = match Rc::try_unwrap(tuple) { - Ok(mut tuple) => (tuple.pop(), tuple.pop()), - Err(tuple) => (tuple.get(1).cloned(), tuple.first().cloned()), - }; - - if let (Some(left), Some(right)) = (left, right) { - Ok((left, right)) - } else { - Err(ConversionError::IncorrectLength(stringify!((Value, Value)))) - } - } -} - -impl TryFrom for i64 { - type Error = ConversionError; - - fn try_from(value: Value) -> Result { - let typ = value.static_type(); - if let Value::Number(Number::Int(Int::Int64(i))) = value { - return Ok(i); - } - - if let Value::Number(Number::Int(i)) = value - && let Int::Int64(i) = i.simplified() - { - return Ok(i); - } - - Err(Self::Error::UnsupportedVariant(typ, stringify!(i64))) - } -} - -impl TryFrom<&mut Value> for f64 { - type Error = ConversionError; - - fn try_from(value: &mut Value) -> Result { - match value { - Value::Number(n) => Ok((&*n).try_into()?), - v => Err(Self::Error::UnsupportedVariant( - v.static_type(), - stringify!(f64), - )), - } - } -} - -impl TryFrom<&mut Value> for i64 { - type Error = ConversionError; - - fn try_from(value: &mut Value) -> Result { - match value { - Value::Number(Number::Int(Int::Int64(i))) => Ok(*i), - v => Err(Self::Error::UnsupportedVariant( - v.static_type(), - stringify!(i64), - )), - } - } -} - -impl TryFrom<&mut Value> for bool { - type Error = ConversionError; - - fn try_from(value: &mut Value) -> Result { - match value { - Value::Bool(bool) => Ok(*bool), - v => Err(Self::Error::UnsupportedVariant( - v.static_type(), - stringify!(bool), - )), - } - } -} - -impl TryFrom for usize { - type Error = ConversionError; - - fn try_from(value: Value) -> Result { - match value { - Value::Number(n) => Ok(Self::try_from(n)?), - v => Err(Self::Error::UnsupportedVariant( - v.static_type(), - stringify!(usize), - )), - } - } -} - -impl TryFrom for Number { - type Error = ConversionError; - - fn try_from(value: Value) -> Result { - match value { - Value::Number(n) => Ok(n), - v => Err(ConversionError::UnsupportedVariant( - v.static_type(), - stringify!(Number), - )), - } - } -} - -impl TryFrom for BigInt { - type Error = ConversionError; - fn try_from(value: Value) -> Result { - match value { - Value::Number(Number::Int(Int::BigInt(b))) => Ok(b), - Value::Number(Number::Int(Int::Int64(i))) => Ok(Self::from(i)), - v => Err(ConversionError::UnsupportedVariant( - v.static_type(), - stringify!(BigInt), - )), - } - } -} - -impl TryFrom<&mut Value> for usize { - type Error = ConversionError; - - fn try_from(value: &mut Value) -> Result { - match value { - Value::Number(number) => Ok(Self::try_from(number.clone())?), - v => Err(ConversionError::UnsupportedVariant( - v.static_type(), - stringify!(usize), - )), - } - } -} - -impl<'a> TryFrom<&'a mut Value> for &'a Sequence { - type Error = ConversionError; - - fn try_from(value: &'a mut Value) -> Result { - match value { - Value::Sequence(seq) => Ok(seq), - v => Err(ConversionError::UnsupportedVariant( - v.static_type(), - stringify!(&Sequence), - )), - } - } -} - -impl<'a> TryFrom<&'a mut Value> for &'a Number { - type Error = ConversionError; - - fn try_from(value: &'a mut Value) -> Result { - match value { - Value::Number(n) => Ok(n), - v => Err(ConversionError::UnsupportedVariant( - v.static_type(), - "&Number", - )), - } - } -} - -impl fmt::Debug for Value { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Option(o) => write!(f, "{o:?}"), - Self::Number(n) => write!(f, "{n}"), - Self::Bool(b) => write!(f, "{b}"), - Self::Function(_) => { - write!(f, "function") - } - Self::Sequence(s) => write!(f, "{s:?}"), - } - } -} - -impl fmt::Display for Value { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Option(Some(v)) => write!(f, "Some({v})"), - Self::Option(None) => write!(f, "None"), - Self::Number(n) => write!(f, "{n}"), - Self::Bool(b) => write!(f, "{b}"), - Self::Function(_) => { - //TODO: implement function printing - write!(f, "function") - } - // Unit tuple does not print anything - Self::Sequence(Sequence::Tuple(t)) if t.is_empty() => write!(f, ""), - Self::Sequence(s) => write!(f, "{s}"), - } - } -} diff --git a/ndc_lexer/src/lib.rs b/ndc_lexer/src/lib.rs index 97e186c8..8b3e279f 100644 --- a/ndc_lexer/src/lib.rs +++ b/ndc_lexer/src/lib.rs @@ -199,7 +199,6 @@ impl SourceIterator<'_> { pub fn consume(&mut self, count: usize) { for _ in 0..count { - // TODO: this is an internal error how should we handle these? self.next() .expect("tried to consume but iterator was empty"); } diff --git a/ndc_lexer/src/number.rs b/ndc_lexer/src/number.rs index dcccda78..eb3fc994 100644 --- a/ndc_lexer/src/number.rs +++ b/ndc_lexer/src/number.rs @@ -45,8 +45,6 @@ impl NumberLexer for Lexer<'_> { self.lex_to_buffer(&mut buf, |c| c == '1' || c == '0'); - // TODO do these common error interceptions even make sense considering we don't really have any suffixes we support - // maybe we can pull these checks outside of lex number and see if the next token after lexing a number is ascii alpha? match self.source.peek() { Some(c) if c.is_ascii_digit() => { self.source.next(); @@ -82,8 +80,6 @@ impl NumberLexer for Lexer<'_> { self.lex_to_buffer(&mut buf, |c| c.is_ascii_hexdigit()); - // TODO: also intercept common errors here? - return match buf_to_token_with_radix(&buf, 16) { Some(token) => Ok(TokenLocation { token, @@ -101,8 +97,6 @@ impl NumberLexer for Lexer<'_> { self.lex_to_buffer(&mut buf, |c| matches!(c, '0'..='7')); - // TODO: also intercept common errors here? - return match buf_to_token_with_radix(&buf, 8) { Some(token) => Ok(TokenLocation { token, @@ -127,7 +121,6 @@ impl NumberLexer for Lexer<'_> { } // A `_` inside a number is ignored unless it's after a `.` '_' => { - // TODO: Maybe disallow `_` after `.` self.source.next(); // ignore underscore for nice number formatting } @@ -228,7 +221,6 @@ fn buf_to_token_with_radix(buf: &str, radix: u32) -> Option { } } -// TODO: maybe this implementation is a lot slower than it needs to be? fn validator_for_radix(radix: usize) -> impl Fn(char) -> bool { move |c| "0123456789abcdefghijlkmnopqrstuvwxyz"[0..radix].contains(c.to_ascii_lowercase()) } diff --git a/ndc_lexer/src/string.rs b/ndc_lexer/src/string.rs index 50bc5a1c..84415178 100644 --- a/ndc_lexer/src/string.rs +++ b/ndc_lexer/src/string.rs @@ -69,8 +69,6 @@ impl StringLexer for Lexer<'_> { // This was guaranteed by the caller, but we could make a nice error? assert_eq!(self.source.next(), Some('"')); - // TODO: support \u8080 type escape sequences - // TODO: should we handle bytes like \xFF? Probably not for strings because they aren't valid UTF-8 let mut buf = String::new(); #[allow(clippy::while_let_on_iterator)] while let Some(next_ch) = self.source.next() { diff --git a/ndc_lsp/Cargo.toml b/ndc_lsp/Cargo.toml index 9a26c848..99a749d3 100644 --- a/ndc_lsp/Cargo.toml +++ b/ndc_lsp/Cargo.toml @@ -8,6 +8,6 @@ version.workspace = true tokio = { version = "1.49.0", features = ["full"] } ndc_lexer.workspace = true ndc_interpreter.workspace = true -ndc_stdlib.workspace = true +ndc_core.workspace = true tower-lsp.workspace = true ndc_parser.workspace = true diff --git a/ndc_lsp/src/backend.rs b/ndc_lsp/src/backend.rs index 05f654b7..0d073aaf 100644 --- a/ndc_lsp/src/backend.rs +++ b/ndc_lsp/src/backend.rs @@ -1,9 +1,11 @@ use std::collections::HashMap; -use ndc_lexer::{Lexer, Span, TokenLocation}; +use ndc_core::{FunctionRegistry, StaticType}; use ndc_interpreter::Interpreter; +use ndc_interpreter::NativeFunction; +use ndc_lexer::{Lexer, Span, TokenLocation}; use ndc_parser::{Expression, ExpressionLocation, ForBody, ForIteration, Lvalue}; -use ndc_stdlib::WithStdlib; +use std::rc::Rc; use tokio::sync::Mutex; use tower_lsp::jsonrpc::Result as JsonRPCResult; use tower_lsp::lsp_types::{ @@ -19,16 +21,24 @@ use tower_lsp::{Client, LanguageServer}; pub struct Backend { pub client: Client, documents: Mutex>>, + configure: fn(&mut FunctionRegistry>), } impl Backend { - pub fn new(client: Client) -> Self { + pub fn new(client: Client, configure: fn(&mut FunctionRegistry>)) -> Self { Self { client, documents: Mutex::new(HashMap::new()), + configure, } } + fn make_interpreter(&self) -> Interpreter { + let mut interpreter = Interpreter::capturing(); + interpreter.configure(self.configure); + interpreter + } + async fn validate(&self, uri: &Url, text: &str) { let scanner = Lexer::new(text); let tokens = scanner @@ -81,7 +91,7 @@ impl Backend { // The interpreter uses Rc internally (non-Send), so it must be fully dropped // before the next await point. let hints = { - let mut interpreter = Interpreter::new(Vec::new()).with_stdlib(); + let mut interpreter = self.make_interpreter(); match interpreter.analyse_str(text) { Ok(expressions) => { let mut hints = Vec::new(); @@ -152,26 +162,45 @@ impl LanguageServer for Backend { &self, _params: CompletionParams, ) -> Result, tower_lsp::jsonrpc::Error> { - let interpreter = Interpreter::new(Vec::new()).with_stdlib(); - let env = interpreter.environment(); - let functions = env.borrow().get_all_functions(); + let interpreter = self.make_interpreter(); - let items = functions.iter().filter_map(|fun| { - if !is_normal_ident(fun.name()) { + let items = interpreter.functions().filter_map(|fun| { + if !is_normal_ident(&fun.name) { return None; } + let (param_detail, return_detail) = match &fun.static_type { + StaticType::Function { + parameters: Some(params), + return_type, + } => { + let ps = params + .iter() + .map(|t: &StaticType| t.to_string()) + .collect::>() + .join(", "); + (format!("({ps})"), return_type.to_string()) + } + StaticType::Function { + parameters: None, + return_type, + } => ("(...)".to_string(), return_type.to_string()), + other => (String::new(), other.to_string()), + }; + Some(CompletionItem { - label: fun.name().to_string(), + label: fun.name.clone(), label_details: Some(CompletionItemLabelDetails { - detail: Some(format!("({})", fun.type_signature())), - description: Some(fun.return_type().to_string()), + detail: Some(param_detail), + description: Some(return_detail), }), kind: Some(CompletionItemKind::FUNCTION), - documentation: Some(Documentation::MarkupContent(MarkupContent { - kind: MarkupKind::Markdown, - value: fun.documentation().to_string(), - })), + documentation: fun.documentation.as_ref().map(|d| { + Documentation::MarkupContent(MarkupContent { + kind: MarkupKind::Markdown, + value: d.clone(), + }) + }), ..Default::default() }) }); @@ -211,13 +240,13 @@ fn collect_hints(expr: &ExpressionLocation, text: &str, hints: &mut Vec { if let Some(rt) = return_type { hints.push(InlayHint { - position: position_from_offset(text, parameters.span.end()), + position: position_from_offset(text, parameters_span.end()), label: InlayHintLabel::String(format!(" -> {rt}")), kind: Some(InlayHintKind::TYPE), text_edits: None, @@ -265,11 +294,13 @@ fn collect_hints(expr: &ExpressionLocation, text: &str, hints: &mut Vec collect_hints(e, text, hints), + ForBody::Block(e) => collect_hints(e, text, hints), + ForBody::List { expr: e, .. } => collect_hints(e, text, hints), ForBody::Map { key, value, default, + .. } => { collect_hints(key, text, hints); if let Some(v) = value { diff --git a/ndc_lsp/src/lib.rs b/ndc_lsp/src/lib.rs index d0324e32..d09138b6 100644 --- a/ndc_lsp/src/lib.rs +++ b/ndc_lsp/src/lib.rs @@ -1,12 +1,15 @@ mod backend; use crate::backend::Backend; +use ndc_core::FunctionRegistry; +use ndc_interpreter::NativeFunction; +use std::rc::Rc; use tower_lsp::{LspService, Server}; -pub async fn start_lsp() { +pub async fn start_lsp(configure: fn(&mut FunctionRegistry>)) { let stdin = tokio::io::stdin(); let stdout = tokio::io::stdout(); - let (service, socket) = LspService::new(Backend::new); + let (service, socket) = LspService::new(move |client| Backend::new(client, configure)); Server::new(stdin, stdout, socket).serve(service).await; } diff --git a/ndc_macros/Cargo.toml b/ndc_macros/Cargo.toml index 74591e65..939c3f2a 100644 --- a/ndc_macros/Cargo.toml +++ b/ndc_macros/Cargo.toml @@ -12,4 +12,3 @@ proc-macro = true proc-macro2 = "1.0.106" quote = "1.0.44" syn = { version = "2.0.117", features = ["full", "extra-traits"] } -itertools.workspace = true diff --git a/ndc_macros/src/convert.rs b/ndc_macros/src/convert.rs deleted file mode 100644 index fcf992fe..00000000 --- a/ndc_macros/src/convert.rs +++ /dev/null @@ -1,190 +0,0 @@ -use crate::function::temp_create_map_any; -use crate::r#match::{is_ref_mut, is_string, path_ends_with}; -use proc_macro2::TokenStream; -use quote::quote; - -#[derive(Debug, Clone)] -pub struct Argument { - pub param_type: TokenStream, - pub param_name: TokenStream, - pub argument: TokenStream, - pub initialize_code: TokenStream, -} -pub trait TypeConverter { - fn matches(&self, ty: &syn::Type) -> bool; - fn static_type(&self) -> TokenStream; - fn convert( - &self, - temp_var: syn::Ident, - original_name: &str, - argument_var_name: syn::Ident, - ) -> Vec; -} - -struct MutRefString; -impl TypeConverter for MutRefString { - fn matches(&self, ty: &syn::Type) -> bool { - is_ref_mut(ty) && is_string(ty) - } - - fn static_type(&self) -> TokenStream { - quote! { ndc_interpreter::function::StaticType::String } - } - - fn convert( - &self, - temp_var: syn::Ident, - original_name: &str, - argument_var_name: syn::Ident, - ) -> Vec { - vec![Argument { - param_type: self.static_type(), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::String(#temp_var)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::String but wasn't"); - }; - let #argument_var_name = &mut *#temp_var.try_borrow_mut()?; - }, - }] - } -} - -/// Matches `Rc>>` -struct InternalMap; -impl TypeConverter for InternalMap { - fn matches(&self, ty: &syn::Type) -> bool { - path_ends_with(ty, "MapRepr") - } - - fn static_type(&self) -> TokenStream { - temp_create_map_any() - } - - fn convert( - &self, - temp_var: syn::Ident, - original_name: &str, - argument_var_name: syn::Ident, - ) -> Vec { - vec![Argument { - param_type: self.static_type(), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::Map(#temp_var, _)) = #argument_var_name else { - panic!("Value #position needed to be Sequence::Map but wasn't"); - }; - - let #argument_var_name = #temp_var; - }, - }] - } -} -struct InternalString; -impl TypeConverter for InternalString { - fn matches(&self, ty: &syn::Type) -> bool { - path_ends_with(ty, "StringRepr") - } - - fn static_type(&self) -> TokenStream { - quote! { ndc_interpreter::function::StaticType::String } - } - - fn convert( - &self, - temp_var: syn::Ident, - original_name: &str, - argument_var_name: syn::Ident, - ) -> Vec { - vec![Argument { - param_type: self.static_type(), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::String(#temp_var)) = #argument_var_name else { - panic!("Value #position needed to be Sequence::List but wasn't"); - }; - - let #argument_var_name = #temp_var; - }, - }] - } -} -/// Matches `Rc>>` -struct InternalList; -impl TypeConverter for InternalList { - fn matches(&self, ty: &syn::Type) -> bool { - path_ends_with(ty, "ListRepr") - } - - fn static_type(&self) -> TokenStream { - // TODO: just hardcoding Any here is lazy - quote! { - ndc_interpreter::function::StaticType::List(Box::new( - ndc_interpreter::function::StaticType::Any - )) - } - } - - fn convert( - &self, - temp_var: syn::Ident, - original_name: &str, - argument_var_name: syn::Ident, - ) -> Vec { - vec![Argument { - param_type: self.static_type(), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::List(#temp_var)) = #argument_var_name else { - panic!("Value #position needed to be Sequence::List but wasn't"); - }; - - let #argument_var_name = #temp_var; - }, - }] - } -} - -// Losing tuple concatenation is a price we might have to pay -// /// Matches `Rc>>` -// struct InternalTuple; -// impl TypeConverter for InternalTuple { -// fn matches(&self, ty: &syn::Type) -> bool { -// path_ends_with(ty, "TupleRepr") -// } -// -// fn convert( -// &self, -// temp_var: syn::Ident, -// original_name: &str, -// argument_var_name: syn::Ident, -// ) -> Vec { -// vec![Argument { -// param_type: quote! { ndc_interpreter::function::StaticType::Tuple }, -// param_name: quote! { #original_name }, -// argument: quote! { #argument_var_name }, -// initialize_code: quote! { -// let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::Tuple(#temp_var)) = #argument_var_name else { -// panic!("Value #position needed to be Sequence::Tuple but wasn't"); -// }; -// -// // TODO: is std::mem::take appropriate here? -// let #argument_var_name = std::mem::take(#temp_var); -// }, -// }] -// } -// } - -pub fn build() -> Vec> { - vec![ - Box::new(InternalList), - Box::new(MutRefString), - // Box::new(InternalTuple), - Box::new(InternalMap), - Box::new(InternalString), - ] -} diff --git a/ndc_macros/src/function.rs b/ndc_macros/src/function.rs index 4c8d7cfb..f6346320 100644 --- a/ndc_macros/src/function.rs +++ b/ndc_macros/src/function.rs @@ -1,9 +1,9 @@ -use crate::convert::{Argument, TypeConverter, build}; -use crate::r#match::{ - is_ref, is_ref_mut, is_ref_mut_of_slice_of_value, is_ref_of_bigint, is_ref_of_slice_of_value, - is_str_ref, path_ends_with, -}; -use itertools::Itertools; +//! Function wrapping logic for `#[export_module]`. +//! +//! Takes a `syn::ItemFn` and produces a `WrappedFunction` containing +//! the inner implementation and its `FunctionRegistry` registration code. + +use crate::types::{NdcType, classify}; use proc_macro2::TokenStream; use quote::{format_ident, quote}; use std::fmt::Write; @@ -13,7 +13,7 @@ pub struct WrappedFunction { pub function_registration: TokenStream, } -pub fn wrap_function(function: &syn::ItemFn) -> Vec { +pub fn wrap_function(function: &syn::ItemFn) -> syn::Result> { let original_identifier = function.sig.ident.clone(); let mut function_names = vec![proc_macro2::Literal::string( @@ -29,20 +29,18 @@ pub fn wrap_function(function: &syn::ItemFn) -> Vec { // #[function(name = "...")] if meta.path.is_ident("name") { function_names = vec![meta.value()?.parse()?]; - // register_as_function_name = meta.value()?.parse()?; Ok(()) } else if meta.path.is_ident("alias") { function_names.push(meta.value()?.parse()?); Ok(()) } else if meta.path.is_ident("return_type") { let value: syn::Type = meta.value()?.parse()?; - return_type = Some(map_type(&value)); + return_type = Some(map_type(&value)?); Ok(()) } else { Err(meta.error("unsupported property on function")) } - }) - .expect("invalid function attribute"); + })?; } else if attr.path().is_ident("doc") && let syn::Meta::NameValue(meta) = &attr.meta && let syn::Expr::Lit(expr) = &meta.value @@ -53,836 +51,386 @@ pub fn wrap_function(function: &syn::ItemFn) -> Vec { } } - let return_type = return_type.unwrap_or_else(|| map_return_type(&function.sig.output)); - - match &function.vis { - syn::Visibility::Public(_) => {} - syn::Visibility::Restricted(_) | syn::Visibility::Inherited => { - panic!("only public functions can be wrapped for now") - } - } + let return_type = match return_type { + Some(t) => t, + None => map_return_type(&function.sig.output)?, + }; - // If the function has no argument then the cartesian product stuff below doesn't work - if function.sig.inputs.is_empty() { - return function_names - .iter() - .map(|function_name| { - wrap_single( - function.clone(), - &original_identifier, - function_name, - vec![], - &return_type, - &documentation_buffer, - ) - }) - .collect(); + if !matches!(function.vis, syn::Visibility::Public(_)) { + return Err(syn::Error::new_spanned( + &function.sig.ident, + "only public functions can be exported", + )); } - // When we call create_temp_variable we can get multiple definitions for a variable - // For instance when a rust function is `fn foo(list: &[Value])` we can define two internal functions for both Tuple and List - let mut variation_id = 0usize; function_names .iter() - .flat_map(|function_name| { - function - .sig - .inputs - .iter() - .enumerate() - .map(|(position, fn_arg)| { - let name = match fn_arg { - syn::FnArg::Receiver(_) => "self".to_string(), - syn::FnArg::Typed(syn::PatType { pat, .. }) => match &**pat { - syn::Pat::Ident(syn::PatIdent { ident, .. }) => ident.to_string(), - _ => panic!("don't know how to process this"), - }, - }; - create_temp_variable(position, fn_arg, &original_identifier, &name) - }) - .multi_cartesian_product() - .map(|args| { - let wrapped = wrap_single( - function.clone(), - &format_ident!("{original_identifier}_{variation_id}"), - function_name, - args, - &return_type, - &documentation_buffer, - ); - variation_id += 1; - wrapped - }) - .collect::>() + .enumerate() + .map(|(i, function_name)| { + let ident = format_ident!("{original_identifier}_{i}"); + wrap_single( + function.clone(), + &ident, + function_name, + &return_type, + &documentation_buffer, + ) }) .collect() } -fn map_return_type(output: &syn::ReturnType) -> TokenStream { +fn map_return_type(output: &syn::ReturnType) -> syn::Result { match output { - syn::ReturnType::Default => { - // in case return type is not specified (for closures rust defaults to type inference which doesn't help us here) - quote! { ndc_interpreter::function::StaticType::Tuple(vec![]) } - } + syn::ReturnType::Default => Ok(quote! { ndc_core::StaticType::Tuple(vec![]) }), syn::ReturnType::Type(_, ty) => map_type(ty), } } -fn map_type(ty: &syn::Type) -> TokenStream { +fn map_type(ty: &syn::Type) -> syn::Result { match ty { syn::Type::Path(p) => map_type_path(p), syn::Type::Reference(r) => map_type(r.elem.as_ref()), syn::Type::Tuple(t) => { - let inner = t.elems.iter().map(map_type); - quote::quote! { - ndc_interpreter::function::StaticType::Tuple(vec![ + let inner = t + .elems + .iter() + .map(map_type) + .collect::>>()?; + Ok(quote! { + ndc_core::StaticType::Tuple(vec![ #(#inner),* ]) - } - } - syn::Type::Infer(_) => { - quote::quote! { ndc_interpreter::function::StaticType::Any } - } - _ => { - panic!("unmapped type: {ty:?}"); + }) } + syn::Type::Infer(_) => Ok(quote! { ndc_core::StaticType::Any }), + _ => Err(syn::Error::new_spanned( + ty, + format!("cannot map type to StaticType"), + )), } } #[allow(clippy::single_match_else)] -fn map_type_path(p: &syn::TypePath) -> TokenStream { - let segment = p.path.segments.last().unwrap(); +fn map_type_path(p: &syn::TypePath) -> syn::Result { + let segment = p + .path + .segments + .last() + .ok_or_else(|| syn::Error::new_spanned(p, "empty type path"))?; match segment.ident.to_string().as_str() { "i32" | "i64" | "isize" | "u32" | "u64" | "usize" | "BigInt" => { - quote::quote! { ndc_interpreter::function::StaticType::Int } - } - "f32" | "f64" => { - quote::quote! { ndc_interpreter::function::StaticType::Float } - } - "bool" => { - quote::quote! { ndc_interpreter::function::StaticType::Bool } - } - "String" | "str" => { - quote::quote! { ndc_interpreter::function::StaticType::String } + Ok(quote! { ndc_core::StaticType::Int }) } + "f32" | "f64" => Ok(quote! { ndc_core::StaticType::Float }), + "bool" => Ok(quote! { ndc_core::StaticType::Bool }), + "String" | "str" => Ok(quote! { ndc_core::StaticType::String }), "Vec" | "List" => match &segment.arguments { syn::PathArguments::AngleBracketed(args) => { - let inner = args.args.first().expect("Vec<> requires inner type"); + let inner = args + .args + .first() + .ok_or_else(|| syn::Error::new_spanned(segment, "Vec<> requires inner type"))?; if let syn::GenericArgument::Type(inner_ty) = inner { - let mapped = map_type(inner_ty); - quote::quote! { ndc_interpreter::function::StaticType::List(Box::new(#mapped)) } + let mapped = map_type(inner_ty)?; + Ok(quote! { ndc_core::StaticType::List(Box::new(#mapped)) }) } else { - panic!("Vec inner not a type"); + Err(syn::Error::new_spanned(inner, "Vec inner not a type")) } } - _ => { - quote::quote! { ndc_interpreter::function::StaticType::List(Box::new(ndc_interpreter::function::StaticType::Any)) } - } + _ => Ok(quote! { ndc_core::StaticType::List(Box::new(ndc_core::StaticType::Any)) }), }, "VecDeque" | "Deque" => match &segment.arguments { syn::PathArguments::AngleBracketed(args) => { - let inner = args.args.first().expect("VecDeque<> requires inner type"); + let inner = args.args.first().ok_or_else(|| { + syn::Error::new_spanned(segment, "VecDeque<> requires inner type") + })?; if let syn::GenericArgument::Type(inner_ty) = inner { - let mapped = map_type(inner_ty); - quote::quote! { ndc_interpreter::function::StaticType::Deque(Box::new(#mapped)) } + let mapped = map_type(inner_ty)?; + Ok(quote! { ndc_core::StaticType::Deque(Box::new(#mapped)) }) } else { - panic!("VecDeque inner not a type"); + Err(syn::Error::new_spanned(inner, "VecDeque inner not a type")) } } - _ => quote::quote! { - ndc_interpreter::function::StaticType::Deque(Box::new( - ndc_interpreter::function::StaticType::Any + _ => Ok(quote! { + ndc_core::StaticType::Deque(Box::new( + ndc_core::StaticType::Any )) - }, + }), }, "DefaultMap" | "HashMap" | "Map" => match &segment.arguments { syn::PathArguments::AngleBracketed(args) => { let mut iter = args.args.iter(); - let Some(key) = iter.next() else { - return temp_create_map_any(); - }; - let Some(val) = iter.next() else { - return temp_create_map_any(); - }; - let key_ty = match key { - syn::GenericArgument::Type(t) => t, - _ => panic!("Invalid map key"), + let Some(syn::GenericArgument::Type(key_ty)) = iter.next() else { + return Ok( + quote! { ndc_core::StaticType::Map { key: Box::new(ndc_core::StaticType::Any), value: Box::new(ndc_core::StaticType::Any) } }, + ); }; - let val_ty = match val { - syn::GenericArgument::Type(t) => t, - _ => panic!("Invalid map value"), + let Some(syn::GenericArgument::Type(val_ty)) = iter.next() else { + return Ok( + quote! { ndc_core::StaticType::Map { key: Box::new(ndc_core::StaticType::Any), value: Box::new(ndc_core::StaticType::Any) } }, + ); }; - let key_mapped = map_type(key_ty); - let val_mapped = map_type(val_ty); - quote::quote! { ndc_interpreter::function::StaticType::Map { key: Box::new(#key_mapped), value: Box::new(#val_mapped) } } + let key_mapped = map_type(key_ty)?; + let val_mapped = map_type(val_ty)?; + Ok( + quote! { ndc_core::StaticType::Map { key: Box::new(#key_mapped), value: Box::new(#val_mapped) } }, + ) } - _ => temp_create_map_any(), + _ => Ok( + quote! { ndc_core::StaticType::Map { key: Box::new(ndc_core::StaticType::Any), value: Box::new(ndc_core::StaticType::Any) } }, + ), }, "MinHeap" => match &segment.arguments { syn::PathArguments::AngleBracketed(args) => { - let inner = args.args.first().expect("MinHeap requires inner"); + let inner = args.args.first().ok_or_else(|| { + syn::Error::new_spanned(segment, "MinHeap requires inner type") + })?; if let syn::GenericArgument::Type(inner_ty) = inner { - let mapped = map_type(inner_ty); - quote::quote! { ndc_interpreter::function::StaticType::MinHeap(Box::new(#mapped)) } + let mapped = map_type(inner_ty)?; + Ok(quote! { ndc_core::StaticType::MinHeap(Box::new(#mapped)) }) } else { - panic!("MinHeap inner invalid"); + Err(syn::Error::new_spanned(inner, "MinHeap inner not a type")) } } - _ => quote::quote! { - ndc_interpreter::function::StaticType::MinHeap(Box::new( - ndc_interpreter::function::StaticType::Any + _ => Ok(quote! { + ndc_core::StaticType::MinHeap(Box::new( + ndc_core::StaticType::Any )) - }, + }), }, "MaxHeap" => match &segment.arguments { syn::PathArguments::AngleBracketed(args) => { - let inner = args.args.first().expect("MaxHeap requires inner"); + let inner = args.args.first().ok_or_else(|| { + syn::Error::new_spanned(segment, "MaxHeap requires inner type") + })?; if let syn::GenericArgument::Type(inner_ty) = inner { - let mapped = map_type(inner_ty); - quote::quote! { ndc_interpreter::function::StaticType::MaxHeap(Box::new(#mapped)) } + let mapped = map_type(inner_ty)?; + Ok(quote! { ndc_core::StaticType::MaxHeap(Box::new(#mapped)) }) } else { - panic!("MaxHeap inner invalid"); + Err(syn::Error::new_spanned(inner, "MaxHeap inner not a type")) } } - _ => panic!("MaxHeap without generics"), + _ => Err(syn::Error::new_spanned( + segment, + "MaxHeap requires generic arguments", + )), }, "Iterator" => match &segment.arguments { syn::PathArguments::AngleBracketed(args) => { - let inner = args.args.first().expect("Iterator requires inner"); + let inner = args.args.first().ok_or_else(|| { + syn::Error::new_spanned(segment, "Iterator requires inner type") + })?; if let syn::GenericArgument::Type(inner_ty) = inner { - let mapped = map_type(inner_ty); - quote::quote! { ndc_interpreter::function::StaticType::Iterator(Box::new(#mapped)) } + let mapped = map_type(inner_ty)?; + Ok(quote! { ndc_core::StaticType::Iterator(Box::new(#mapped)) }) } else { - panic!("Iterator inner invalid"); + Err(syn::Error::new_spanned(inner, "Iterator inner not a type")) } } - _ => { - quote::quote! { ndc_interpreter::function::StaticType::Iterator(Box::new(ndc_interpreter::function::StaticType::Any)) } - } + _ => Ok(quote! { ndc_core::StaticType::Iterator(Box::new(ndc_core::StaticType::Any)) }), }, "Option" => match &segment.arguments { syn::PathArguments::AngleBracketed(args) => { - let inner = args.args.first().expect("Option requires inner type"); + let inner = args.args.first().ok_or_else(|| { + syn::Error::new_spanned(segment, "Option requires inner type") + })?; if let syn::GenericArgument::Type(inner_ty) = inner { - let mapped = map_type(inner_ty); - quote::quote! { ndc_interpreter::function::StaticType::Option(Box::new(#mapped)) } + let mapped = map_type(inner_ty)?; + Ok(quote! { ndc_core::StaticType::Option(Box::new(#mapped)) }) } else { - panic!("Option inner invalid"); + Err(syn::Error::new_spanned(inner, "Option inner not a type")) } } - _ => panic!("Option without generics"), + _ => Err(syn::Error::new_spanned( + segment, + "Option requires generic arguments", + )), }, "Result" => match &segment.arguments { syn::PathArguments::AngleBracketed(args) => { if let Some(syn::GenericArgument::Type(inner_ty)) = args.args.first() { map_type(inner_ty) } else { - panic!("Result without generic arguments"); + Err(syn::Error::new_spanned( + segment, + "Result requires generic arguments", + )) } } - _ => panic!("Result without angle bracketed args"), + _ => Err(syn::Error::new_spanned( + segment, + "Result requires angle bracketed arguments", + )), }, - "Number" => quote::quote! { ndc_interpreter::function::StaticType::Number }, - "Value" | "EvaluationResult" => { - quote::quote! { ndc_interpreter::function::StaticType::Any } - } - unmatched => panic!("Cannot map type string '{unmatched}' to StaticType"), + "Number" => Ok(quote! { ndc_core::StaticType::Number }), + "MapValue" => Ok(quote! { + ndc_core::StaticType::Map { + key: Box::new(ndc_core::StaticType::Any), + value: Box::new(ndc_core::StaticType::Any), + } + }), + "Value" | "EvaluationResult" | "SeqValue" => Ok(quote! { ndc_core::StaticType::Any }), + unmatched => Err(syn::Error::new_spanned( + segment, + format!("cannot map type '{unmatched}' to StaticType"), + )), } } -/// Wraps an original rust function `function` in an outer function with the identifier `identifier` -/// It's registered with the environment as `register_as_function_name` -/// The argument translations mapping is defined by `input_arguments` fn wrap_single( function: syn::ItemFn, identifier: &syn::Ident, register_as_function_name: &proc_macro2::Literal, - input_arguments: Vec, return_type: &TokenStream, docs: &str, -) -> WrappedFunction { +) -> syn::Result { let inner_ident = format_ident!("{}_inner", identifier); let inner = { let mut inner = function.clone(); - // Remove attributes to prevent compilation errors inner.attrs.clear(); - - // Change the name + inner.vis = syn::Visibility::Public(syn::token::Pub::default()); inner.sig.ident = inner_ident.clone(); inner }; - let mut argument_init_code_blocks = Vec::new(); - let mut arguments = Vec::new(); - let mut param_types: Vec = Vec::new(); - let mut param_names: Vec = Vec::new(); - - for input_arg in input_arguments { - let Argument { - argument, - initialize_code, - param_type, - param_name, - } = input_arg; - - arguments.push(argument); - argument_init_code_blocks.push(initialize_code); - param_types.push(param_type); - param_names.push(param_name); - } - - let return_expr = match function.sig.output { - syn::ReturnType::Default => quote! { - return Ok(ndc_interpreter::value::Value::unit()); - }, - syn::ReturnType::Type(_, typ) => match &*typ { - // If the function returns a result we unpack it using the question mark operator - ty @ syn::Type::Path(_) if path_ends_with(ty, "EvaluationResult") => quote! { - return result; - }, - ty @ syn::Type::Path(_) if path_ends_with(ty, "Result") => quote! { - let value = result.map_err(|err| ndc_interpreter::function::FunctionCarrier::IntoEvaluationError(Box::new(err)))?; - return Ok(ndc_interpreter::value::Value::from(value)); - }, - _ => quote! { - let result = ndc_interpreter::value::Value::from(result); - return Ok(result); - }, - }, - }; + let vm = try_generate_vm_native( + &function, + &inner_ident, + register_as_function_name, + docs, + return_type, + ) + .ok_or_else(|| { + syn::Error::new_spanned( + &function.sig, + "unsupported parameter or return type for VM native", + ) + })?; - // This generates a function declaration from a rust function - // The expansion looks something like this - // - // fn wrapper_function(values: &[Value]) -> EvaluationResult { - // fn original_function(....) { .... } - // let arg0 = ....; // from values[0] - // let arg1 = ....; // from values[1] - // - // return original_function(arg0, arg1); - // } let function_declaration = quote! { - pub fn #identifier ( - values: &mut [ndc_interpreter::value::Value], - environment: &std::rc::Rc> - ) -> ndc_interpreter::evaluate::EvaluationResult { - // Define the inner function that has the rust type signature - #[inline] - #inner - - // Initialize the arguments and map them from the Andy C types to the rust types - let [#(#arguments, )*] = values else { panic!("actual argument count did not match expected argument count when calling native method, this should be prevented by the runtime") }; - #(#argument_init_code_blocks; )* - - // Call the inner function with the unpacked arguments - let result = #inner_ident (#(#arguments, )*); - - // Return the result (Possibly by unpacking errors) - #return_expr - } + #[inline] + #inner }; - let function_registration = quote! { - let func = ndc_interpreter::function::FunctionBuilder::default() - .body(ndc_interpreter::function::FunctionBody::GenericFunction { - function: #identifier, - type_signature: ndc_interpreter::function::TypeSignature::Exact(vec![ - #( ndc_interpreter::function::Parameter::new(#param_names, #param_types,) ),* - ]), - return_type: #return_type, - }) - .name(String::from(#register_as_function_name)) - .documentation(String::from(#docs)) - .build() - .expect("expected function creation in proc macro to always succeed"); + let VmNativeTokens { + native_let, + param_types: _, + param_names: _, + } = vm; - env.declare_global_fn(func); + let function_registration = quote! { + #native_let + env.declare_global_fn(native); }; - - WrappedFunction { + Ok(WrappedFunction { function_declaration, function_registration, - } + }) } -fn into_param_type(ty: &syn::Type) -> TokenStream { - match ty { - ty if path_ends_with(ty, "Vec") => { - quote! { ndc_interpreter::function::StaticType::List(Box::new(ndc_interpreter::function::StaticType::Any)) } - } - ty if path_ends_with(ty, "VecDeque") => { - quote! { ndc_interpreter::function::StaticType::Deque(Box::new(ndc_interpreter::function::StaticType::Any)) } - } - ty if path_ends_with(ty, "DefaultMap") - || path_ends_with(ty, "DefaultMapMut") - || path_ends_with(ty, "HashMap") => - { - temp_create_map_any() - } - ty if path_ends_with(ty, "MinHeap") => { - quote! { ndc_interpreter::function::StaticType::MinHeap(Box::new(ndc_interpreter::function::StaticType::Any)) } - } - ty if path_ends_with(ty, "MaxHeap") => { - quote! { ndc_interpreter::function::StaticType::MaxHeap(Box::new(ndc_interpreter::function::StaticType::Any)) } - } - ty if path_ends_with(ty, "ListRepr") => { - quote! { ndc_interpreter::function::StaticType::List(Box::new(ndc_interpreter::function::StaticType::Any)) } - } - ty if path_ends_with(ty, "MapRepr") => temp_create_map_any(), - syn::Type::Reference(syn::TypeReference { elem, .. }) => into_param_type(elem), - syn::Type::Path(syn::TypePath { path, .. }) => match path { - _ if path.is_ident("i64") => quote! { ndc_interpreter::function::StaticType::Int }, - _ if path.is_ident("usize") => { - quote! { ndc_interpreter::function::StaticType::Int } - } - _ if path.is_ident("f64") => { - quote! { ndc_interpreter::function::StaticType::Float } - } - _ if path.is_ident("bool") => { - quote! { ndc_interpreter::function::StaticType::Bool } - } - _ if path.is_ident("Value") => { - quote! { ndc_interpreter::function::StaticType::Any } - } - _ if path.is_ident("Number") => { - quote! { ndc_interpreter::function::StaticType::Number } - } - _ if path.is_ident("Sequence") => { - quote! { ndc_interpreter::function::StaticType::Sequence(Box::new(ndc_interpreter::function::StaticType::Any)) } - } - _ if path.is_ident("Callable") => { - quote! { - ndc_interpreter::function::StaticType::Function { - parameters: None, - return_type: Box::new(ndc_interpreter::function::StaticType::Any) - } - } - } - _ => panic!("Don't know how to convert Path into StaticType\n\n{path:?}"), - }, - syn::Type::ImplTrait(_) => { - quote! { ndc_interpreter::function::StaticType::Iterator(Box::new(ndc_interpreter::function::StaticType::Any)) } - } - x => panic!("Don't know how to convert {x:?} into StaticType"), - } +/// Tokens emitted by `try_generate_vm_native` when vm_native is possible. +#[allow(dead_code)] +struct VmNativeTokens { + /// `let native: Rc = Rc::new(NativeFunction { ... });` + native_let: TokenStream, + /// StaticType expressions for each parameter + param_types: Vec, + /// Parameter name strings + param_names: Vec, } -fn create_temp_variable( - position: usize, - input: &syn::FnArg, - identifier: &syn::Ident, - original_name: &str, -) -> Vec { - let argument_var_name = syn::Ident::new(&format!("arg{position}"), identifier.span()); - if let syn::FnArg::Typed(pat_type) = input { - let ty = &*pat_type.ty; - - let converters: Vec> = build(); - - let temp_var = syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - - for converter in converters { - if converter.matches(ty) { - return converter.convert(temp_var, original_name, argument_var_name); - } - } - - // The pattern is Callable - if path_ends_with(ty, "Callable") { - let temp_var = syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: quote! { - // TODO: how are we going to figure out the exact type of function here - ndc_interpreter::function::StaticType::Function { - parameters: None, - return_type: Box::new(ndc_interpreter::function::StaticType::Any) - } - }, - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Function(#temp_var) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::Map but wasn't"); - }; - let #argument_var_name = &Callable { - function: Rc::clone(#temp_var), - environment: environment - }; - }, - }]; - } - // The pattern is &HashMap - else if is_ref(ty) && path_ends_with(ty, "HashMap") { - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: temp_create_map_any(), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::Map(#rc_temp_var, _default)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::Map but wasn't"); - }; - let #argument_var_name = &*#rc_temp_var.borrow(); - }, - }]; - } - // The pattern is &mut HashMap - else if is_ref_mut(ty) && path_ends_with(ty, "HashMap") { - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: temp_create_map_any(), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::Map(#rc_temp_var, _default)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::Map but wasn't"); - }; - let #argument_var_name = &mut *#rc_temp_var.try_borrow_mut()?; - }, - }]; - } - // The pattern is DefaultMap - else if path_ends_with(ty, "DefaultMap") { - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: temp_create_map_any(), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::Map(#rc_temp_var, default)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::Map but wasn't"); - }; - let #argument_var_name = (&*#rc_temp_var.borrow(), default.to_owned()); - }, - }]; - } - // The pattern is DefaultMapMut - else if path_ends_with(ty, "DefaultMapMut") { - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: temp_create_map_any(), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::Map(#rc_temp_var, default)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::Map but wasn't"); - }; - let #argument_var_name = (&mut *#rc_temp_var.try_borrow_mut()?, default.to_owned()); - }, - }]; - } - // The pattern is exactly &mut Vec - // TODO: support this for tuple - else if is_ref_mut(ty) && path_ends_with(ty, "Vec") { - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: quote! { ndc_interpreter::function::StaticType::List(Box::new(ndc_interpreter::function::StaticType::Any)) }, - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::List(#rc_temp_var)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::List but wasn't"); - }; - let #argument_var_name = &mut *#rc_temp_var.try_borrow_mut()?; - }, - }]; - } - // The pattern is exactly &mut VecDeque - else if is_ref_mut(ty) && path_ends_with(ty, "VecDeque") { - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: into_param_type(ty), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::Deque(#rc_temp_var)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::List but wasn't"); - }; - let #argument_var_name = &mut *#rc_temp_var.try_borrow_mut()?; - }, - }]; - } - // The pattern is exactly &VecDeque - else if is_ref(ty) && path_ends_with(ty, "VecDeque") { - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: into_param_type(ty), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::Deque(#rc_temp_var)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::List but wasn't"); - }; - let #argument_var_name = &*#rc_temp_var.try_borrow()?; - }, - }]; - } - // The pattern is exactly &mut MaxHeap - else if is_ref_mut(ty) && path_ends_with(ty, "MaxHeap") { - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: into_param_type(ty), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::MaxHeap(#rc_temp_var)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::MaxHeap but wasn't"); - }; - let #argument_var_name = &mut *#rc_temp_var.try_borrow_mut()?; - }, - }]; - } - // The pattern is exactly &MaxHeap - else if is_ref(ty) && path_ends_with(ty, "MaxHeap") { - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: into_param_type(ty), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::MaxHeap(#rc_temp_var)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::MaxHeap but wasn't"); - }; - let #argument_var_name = &*#rc_temp_var.try_borrow()?; - }, - }]; - } - // The pattern is exactly &mut MinHeap - else if is_ref_mut(ty) && path_ends_with(ty, "MinHeap") { - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: into_param_type(ty), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::MinHeap(#rc_temp_var)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::MinHeap but wasn't"); - }; - let #argument_var_name = &mut *#rc_temp_var.try_borrow_mut()?; - }, - }]; - } - // The pattern is exactly &MinHeap - else if is_ref(ty) && path_ends_with(ty, "MinHeap") { - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: into_param_type(ty), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::MinHeap(#rc_temp_var)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::MinHeap but wasn't"); - }; - let #argument_var_name = &*#rc_temp_var.try_borrow()?; - }, - }]; - } - // The pattern is exactly &str - else if is_str_ref(ty) { - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: quote! { ndc_interpreter::function::StaticType::String }, - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::String(#rc_temp_var)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::String but wasn't"); - }; - let #rc_temp_var = #rc_temp_var.borrow(); - let #argument_var_name = #rc_temp_var.as_ref(); - }, - }]; - } - // The pattern is &BigInt - else if is_ref_of_bigint(ty) { - let big_int = syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: quote! { ndc_interpreter::function::StaticType::Int }, - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let #big_int = if let ndc_interpreter::value::Value::Number(ndc_interpreter::num::Number::Int(ndc_interpreter::int::Int::Int64(smol))) = #argument_var_name { - Some(num::BigInt::from(*smol)) - } else { - None - }; - - let #argument_var_name = match #argument_var_name { - ndc_interpreter::value::Value::Number(ndc_interpreter::num::Number::Int(ndc_interpreter::int::Int::BigInt(big))) => big, - ndc_interpreter::value::Value::Number(ndc_interpreter::num::Number::Int(ndc_interpreter::int::Int::Int64(smoll))) => #big_int.as_ref().unwrap(), - _ => panic!("Value #position need to be an Int but wasn't"), - } - }, - }]; - } - // If we need an owned Value - else if path_ends_with(ty, "Value") && !is_ref(ty) { - return vec![Argument { - param_type: quote! { ndc_interpreter::function::StaticType::Any }, - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let #argument_var_name = #argument_var_name.clone(); - }, - }]; - } - // The pattern is &mut [Value] - else if is_ref_mut_of_slice_of_value(ty) { - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: quote! { ndc_interpreter::function::StaticType::List(Box::new(ndc_interpreter::function::StaticType::Any)) }, - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::List(#rc_temp_var)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::List but wasn't"); - }; - let #argument_var_name = &mut *#rc_temp_var.borrow_mut(); - }, - }]; - } - // The pattern is &[Value] - else if is_ref_of_slice_of_value(ty) { - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![ - Argument { - param_type: quote! { ndc_interpreter::function::StaticType::List(Box::new(ndc_interpreter::function::StaticType::Any)) }, - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::List(#rc_temp_var)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::List but wasn't"); - }; - let #argument_var_name = &*#rc_temp_var.borrow(); - }, - }, - // Argument { - // param_type: quote! { ndc_interpreter::function::StaticType::Tuple }, - // param_name: quote! { #original_name }, - // argument: quote! { #argument_var_name }, - // initialize_code: quote! { - // let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::Tuple(#rc_temp_var)) = #argument_var_name else { - // panic!("Value #position needed to be a Sequence::List but wasn't"); - // }; - // let #argument_var_name = &#rc_temp_var; - // }, - // }, - ]; - } - // The pattern is &BigRational - else if path_ends_with(ty, "BigRational") && is_ref(ty) { - return vec![Argument { - param_type: quote! { ndc_interpreter::function::StaticType::Rational }, - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Number(ndc_interpreter::num::Number::Rational(#argument_var_name)) = #argument_var_name else { - panic!("Value #position needs to be Rational but wasn't"); - }; - - let #argument_var_name = &#argument_var_name.clone(); - }, - }]; - } - // The pattern is BigRational - else if path_ends_with(ty, "BigRational") && !is_ref(ty) { - return vec![Argument { - param_type: quote! { ndc_interpreter::function::StaticType::Rational }, - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Number(ndc_interpreter::num::Number::Rational(#argument_var_name)) = #argument_var_name else { - panic!("VValue #position needs to be Rational but wasn't"); - }; - - let #argument_var_name = *#argument_var_name.clone(); - }, - }]; - } - // The pattern is Complex64 - else if path_ends_with(ty, "Complex64") && !is_ref(ty) { - return vec![Argument { - param_type: quote! { ndc_interpreter::function::StaticType::Complex }, - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Number(ndc_interpreter::num::Number::Complex(#argument_var_name)) = #argument_var_name else { - panic!("Value #position needs to be Complex64 but wasn't"); - }; +/// Attempt to generate vm_native tokens for a function. +/// +/// Returns `None` when any parameter or the return type cannot be expressed in +/// VM-native terms. +fn try_generate_vm_native( + function: &syn::ItemFn, + inner_ident: &syn::Ident, + fn_name: &proc_macro2::Literal, + docs: &str, + return_type_override: &TokenStream, +) -> Option { + use crate::vm_convert::{try_vm_input, try_vm_return}; + + let mut extracts = Vec::new(); + let mut passes = Vec::new(); + let mut param_types = Vec::new(); + let mut param_names = Vec::new(); + let mut has_vm_callable = false; + let raw_args: Vec<_> = (0..function.sig.inputs.len()) + .map(|i| format_ident!("vm_raw{i}")) + .collect(); + + for (i, arg) in function.sig.inputs.iter().enumerate() { + let pat_ty = match arg { + syn::FnArg::Typed(pat_ty) => pat_ty, + syn::FnArg::Receiver(_) => return None, + }; + let ty = &*pat_ty.ty; + if classify(ty) == Some(NdcType::MutVmCallable) { + has_vm_callable = true; + } + let conv = try_vm_input(ty, i)?; + extracts.push(conv.extract); + passes.push(conv.pass); + param_types.push(conv.static_type); + let name = match &*pat_ty.pat { + syn::Pat::Ident(ident) => ident.ident.to_string(), + _ => format!("arg{i}"), + }; + param_names.push(quote! { #name }); + } - let #argument_var_name = #argument_var_name.clone(); - }, - }]; - } - // The pattern is something like `i64` (but also matches other concrete types) - else if let syn::Type::Path(path) = ty { - return vec![Argument { - param_type: into_param_type(ty), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let #argument_var_name = #path :: try_from(#argument_var_name).map_err(|err| ndc_interpreter::function::FunctionCallError::ConvertToNativeTypeError(format!("{err}")))? - }, - }]; - } - // The pattern is something like '&Number' - else if let syn::Type::Reference(type_ref) = &*pat_type.ty { - return vec![Argument { - param_type: into_param_type(ty), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let #argument_var_name = <#type_ref as TryFrom<&mut ndc_interpreter::value::Value>> :: try_from(#argument_var_name).map_err(|err| ndc_interpreter::function::FunctionCallError::ConvertToNativeTypeError(format!("{err}")))? - }, - }]; + let (return_code, _) = try_vm_return(&function.sig.output)?; + let return_static_type = return_type_override; + let n = function.sig.inputs.len(); + + let func_variant = if has_vm_callable { + quote! { + ndc_vm::value::NativeFunc::WithVm(Box::new(|args, _vm| match args { + [#(#raw_args),*] => { + #(#extracts)* + let result = #inner_ident(#(#passes),*); + #return_code + } + _ => Err(ndc_vm::error::VmError::native(format!("expected {} arguments, got {}", #n, args.len()))), + })) + } + } else { + quote! { + ndc_vm::value::NativeFunc::Simple(Box::new(|args| match args { + [#(#raw_args),*] => { + #(#extracts)* + let result = #inner_ident(#(#passes),*); + #return_code + } + _ => Err(ndc_vm::error::VmError::native(format!("expected {} arguments, got {}", #n, args.len()))), + })) } - // The pattern is a trait implementation TODO: this is not implemented - else if let syn::Type::ImplTrait(syn::TypeImplTrait { .. }) = &*pat_type.ty { - // TODO: we should perform a type check, but in order to get results quick we can just assume that all impl blocks are iterators - - let rc_temp_var = - syn::Ident::new(&format!("temp_{argument_var_name}"), identifier.span()); - return vec![Argument { - param_type: into_param_type(ty), - param_name: quote! { #original_name }, - argument: quote! { #argument_var_name }, - initialize_code: quote! { - let ndc_interpreter::value::Value::Sequence(ndc_interpreter::sequence::Sequence::Iterator(#rc_temp_var)) = #argument_var_name else { - panic!("Value #position needed to be a Sequence::Iterator but wasn't"); - }; + }; - let #argument_var_name = ndc_interpreter::iterator::RcIter::new(Rc::clone(#rc_temp_var)); + let documentation_tokens = if docs.is_empty() { + quote! { None } + } else { + quote! { Some(String::from(#docs)) } + }; + let native_let = quote! { + let native: std::rc::Rc = + std::rc::Rc::new(ndc_vm::value::NativeFunction { + name: String::from(#fn_name), + documentation: #documentation_tokens, + static_type: ndc_core::StaticType::Function { + parameters: Some(vec![#(#param_types.clone()),*]), + return_type: Box::new(#return_static_type), }, - }]; - } else { - panic!("not sure how to handle this type of thing:\n|---> {:?}", ty); - } - } - - panic!("Not sure how to handle receivers"); -} + func: #func_variant, + }); + }; -// TODO: just adding Any as type here is lazy AF but CBA fixing generics -pub fn temp_create_map_any() -> TokenStream { - quote! { - ndc_interpreter::function::StaticType::Map { - key: Box::new(ndc_interpreter::function::StaticType::Any), - value: Box::new(ndc_interpreter::function::StaticType::Any) - } - } + Some(VmNativeTokens { + native_let, + param_types, + param_names, + }) } diff --git a/ndc_macros/src/lib.rs b/ndc_macros/src/lib.rs index 5051cc38..5d24c26f 100644 --- a/ndc_macros/src/lib.rs +++ b/ndc_macros/src/lib.rs @@ -1,6 +1,11 @@ -mod convert; +//! Procedural macros for registering NDC standard library functions. +//! +//! The primary entry point is [`export_module`], which wraps public functions +//! inside a module into VM-native closures and generates registration code. + mod function; -mod r#match; +mod types; +mod vm_convert; use proc_macro::TokenStream; use quote::quote; @@ -8,11 +13,6 @@ use quote::quote; use crate::function::wrap_function; use syn::{Item, parse_macro_input}; -#[proc_macro_attribute] -pub fn function(_attr: TokenStream, item: TokenStream) -> TokenStream { - item -} - #[proc_macro_attribute] pub fn export_module(_attr: TokenStream, item: TokenStream) -> TokenStream { let module = parse_macro_input!(item as syn::ItemMod); @@ -20,7 +20,9 @@ pub fn export_module(_attr: TokenStream, item: TokenStream) -> TokenStream { let module_vis = module.vis; let Some((_, items)) = module.content else { - panic!("exported module has no content"); + return syn::Error::new(module_name.span(), "exported module has no content") + .to_compile_error() + .into(); }; let mut declarations = Vec::new(); @@ -29,12 +31,15 @@ pub fn export_module(_attr: TokenStream, item: TokenStream) -> TokenStream { for item in items { match item { - Item::Fn(f) if matches!(f.vis, syn::Visibility::Public(_)) => { - for fun in wrap_function(&f) { - declarations.push(fun.function_declaration); - registrations.push(fun.function_registration); + Item::Fn(f) if matches!(f.vis, syn::Visibility::Public(_)) => match wrap_function(&f) { + Ok(fns) => { + for fun in fns { + declarations.push(fun.function_declaration); + registrations.push(fun.function_registration); + } } - } + Err(e) => return e.to_compile_error().into(), + }, Item::Use(u) => { uses.push(u); } @@ -43,7 +48,7 @@ pub fn export_module(_attr: TokenStream, item: TokenStream) -> TokenStream { } let register_function = quote! { - pub fn register(env: &mut ndc_interpreter::environment::Environment) { + pub fn register(env: &mut ndc_core::FunctionRegistry>) { #(#registrations)* } }; diff --git a/ndc_macros/src/match.rs b/ndc_macros/src/match.rs deleted file mode 100644 index d340eb0f..00000000 --- a/ndc_macros/src/match.rs +++ /dev/null @@ -1,101 +0,0 @@ -/// Checks if a type ends with a string, returns false for slices -pub fn path_ends_with(ty: &syn::Type, ident: &str) -> bool { - match ty { - syn::Type::Path(syn::TypePath { - path: syn::Path { segments, .. }, - .. - }) => { - let Some(last_segment) = segments.last() else { - return false; - }; - - last_segment.ident == ident - } - syn::Type::Reference(syn::TypeReference { elem, .. }) => path_ends_with(elem, ident), - _ => false, - } -} - -pub fn is_ref(ty: &syn::Type) -> bool { - matches!( - ty, - syn::Type::Reference(syn::TypeReference { - mutability: None, - .. - }) - ) -} -pub fn is_ref_of(ty: &syn::Type, f: fn(&syn::Type) -> bool) -> bool { - match ty { - syn::Type::Reference(syn::TypeReference { - elem, mutability, .. - }) if mutability.is_none() => f(elem.as_ref()), - _ => false, - } -} - -pub fn is_ref_mut_of(ty: &syn::Type, f: fn(&syn::Type) -> bool) -> bool { - match ty { - syn::Type::Reference(syn::TypeReference { - elem, mutability, .. - }) if mutability.is_some() => f(elem.as_ref()), - _ => false, - } -} - -pub fn is_ref_mut_of_slice_of_value(ty: &syn::Type) -> bool { - is_ref_mut_of(ty, |ty| match ty { - syn::Type::Slice(syn::TypeSlice { elem, .. }) => has_path_match(elem.as_ref(), "Value"), - _ => false, - }) -} - -pub fn is_ref_of_slice_of_value(ty: &syn::Type) -> bool { - is_ref_of(ty, |ty| match ty { - syn::Type::Slice(syn::TypeSlice { elem, .. }) => has_path_match(elem.as_ref(), "Value"), - _ => false, - }) -} - -pub fn is_ref_of_bigint(ty: &syn::Type) -> bool { - is_ref_of(ty, |ty| has_path_match(ty, "BigInt")) -} - -pub fn is_ref_mut(ty: &syn::Type) -> bool { - matches!( - ty, - syn::Type::Reference(syn::TypeReference { - mutability: Some(_), - .. - }) - ) -} - -pub fn is_str_ref(ty: &syn::Type) -> bool { - match ty { - syn::Type::Reference(syn::TypeReference { elem: ty, .. }) => has_path_match(ty, "str"), - _ => false, - } -} - -fn has_path_match(ty: &syn::Type, ident: &str) -> bool { - match ty { - syn::Type::Path(syn::TypePath { - path: syn::Path { segments, .. }, - .. - }) => segments.iter().any(|seg| seg.ident == ident), - _ => false, - } -} - -pub fn is_string(ty: &syn::Type) -> bool { - match ty { - // If ref just recurse :haha: - syn::Type::Reference(syn::TypeReference { elem: ty, .. }) => is_string(ty), - syn::Type::Path(syn::TypePath { - path: syn::Path { segments, .. }, - .. - }) => segments.iter().any(|seg| seg.ident == "String"), - _ => false, - } -} diff --git a/ndc_macros/src/types.rs b/ndc_macros/src/types.rs new file mode 100644 index 00000000..caf9840a --- /dev/null +++ b/ndc_macros/src/types.rs @@ -0,0 +1,262 @@ +//! Type classification for NDC macro parameter and return types. +//! +//! The [`classify`] function maps `syn::Type` values to [`NdcType`] variants, +//! providing a single point of truth for type recognition. Both parameter +//! extraction (`vm_convert`) and function wrapping (`function`) use this +//! classification instead of ad-hoc predicate chains. + +/// A recognized NDC type from a Rust function signature. +/// +/// Variants encode both the base type and its reference/mutability wrapper, +/// since the generated code differs based on ownership. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum NdcType { + /// Owned `Number` + Number, + /// `&Number` + NumberRef, + /// `f64` + F64, + /// `bool` + Bool, + /// `i64` + I64, + /// `usize` + Usize, + /// Owned `BigInt` (return types) + BigInt, + /// `&BigInt` (input parameters) + BigIntRef, + /// Owned `BigRational` (return types) + BigRational, + /// `&BigRational` (input parameters) + BigRationalRef, + /// Owned `Complex64` + Complex64, + /// Owned `String` (also matches `&String`) + String, + /// `&str` + StrRef, + /// `&mut String` + MutString, + /// `&StringRepr` or owned `StringRepr` + StringRepr, + /// `&mut StringRepr` + MutStringRepr, + /// `ndc_vm::value::Value` + VmValue, + /// `ndc_vm::value::SeqValue` + SeqValue, + /// `ndc_vm::value::MapValue` + MapValue, + /// `&[ndc_vm::value::Value]` + SliceOfValue, + /// `&mut Vec` + MutVecOfValue, + /// `&HashMap` + RefHashMap, + /// `&mut HashMap` + MutHashMap, + /// `&VecDeque` + RefVecDeque, + /// `&mut VecDeque` + MutVecDeque, + /// `&mut BinaryHeap>` + MutMinHeap, + /// `&mut BinaryHeap` + MutMaxHeap, + /// `&mut VmCallable` + MutVmCallable, +} + +/// Classify a `syn::Type` into a recognized NDC type. +/// +/// Returns `None` for types that cannot be directly handled by the macro +/// system (e.g. custom structs, unsupported generic combinations). +pub fn classify(ty: &syn::Type) -> Option { + match ty { + syn::Type::Reference(r) => { + if r.mutability.is_some() { + classify_ref_mut(&r.elem) + } else { + classify_ref(&r.elem) + } + } + _ => classify_owned(ty), + } +} + +/// Unwrap a `Result` type, returning the inner `T`. +pub fn unwrap_result(ty: &syn::Type) -> Option<&syn::Type> { + let syn::Type::Path(p) = ty else { return None }; + let last = p.path.segments.last()?; + if last.ident != "Result" { + return None; + } + let syn::PathArguments::AngleBracketed(args) = &last.arguments else { + return None; + }; + match args.args.first() { + Some(syn::GenericArgument::Type(inner)) => Some(inner), + _ => None, + } +} + +// --- Classify by ownership --- + +fn classify_owned(ty: &syn::Type) -> Option { + let syn::Type::Path(type_path) = ty else { + return None; + }; + let segments: Vec<_> = type_path.path.segments.iter().collect(); + + let last = segments.last()?; + match last.ident.to_string().as_str() { + "Number" => Some(NdcType::Number), + "f64" => Some(NdcType::F64), + "bool" => Some(NdcType::Bool), + "i64" => Some(NdcType::I64), + "usize" => Some(NdcType::Usize), + "Complex64" => Some(NdcType::Complex64), + "String" => Some(NdcType::String), + "StringRepr" => Some(NdcType::StringRepr), + "BigInt" => Some(NdcType::BigInt), + "BigRational" => Some(NdcType::BigRational), + "Value" => Some(NdcType::VmValue), + "SeqValue" => Some(NdcType::SeqValue), + "MapValue" => Some(NdcType::MapValue), + _ => None, + } +} + +fn classify_ref(inner: &syn::Type) -> Option { + // &str + if is_path_ident(inner, "str") { + return Some(NdcType::StrRef); + } + + // &[ndc_vm::value::Value] + if let syn::Type::Slice(slice) = inner + && classify_owned(&slice.elem) == Some(NdcType::VmValue) + { + return Some(NdcType::SliceOfValue); + } + + // &HashMap + if is_collection_of_vm_value(inner, "HashMap") { + return Some(NdcType::RefHashMap); + } + + // &VecDeque + if is_collection_of_vm_value(inner, "VecDeque") { + return Some(NdcType::RefVecDeque); + } + + // Simple &T references — map to appropriate variant + match classify_owned(inner)? { + NdcType::Number => Some(NdcType::NumberRef), + NdcType::BigInt => Some(NdcType::BigIntRef), + NdcType::BigRational => Some(NdcType::BigRationalRef), + NdcType::String => Some(NdcType::String), + NdcType::StringRepr => Some(NdcType::StringRepr), + _ => None, + } +} + +fn classify_ref_mut(inner: &syn::Type) -> Option { + if is_path_ident(inner, "VmCallable") { + return Some(NdcType::MutVmCallable); + } + if is_path_ident(inner, "String") { + return Some(NdcType::MutString); + } + if is_path_ident(inner, "StringRepr") { + return Some(NdcType::MutStringRepr); + } + if is_collection_of_vm_value(inner, "Vec") { + return Some(NdcType::MutVecOfValue); + } + if is_collection_of_vm_value(inner, "HashMap") { + return Some(NdcType::MutHashMap); + } + if is_collection_of_vm_value(inner, "VecDeque") { + return Some(NdcType::MutVecDeque); + } + if is_binary_heap_of(inner, is_reverse_of_ord_value) { + return Some(NdcType::MutMinHeap); + } + if is_binary_heap_of(inner, |t| is_path_ident(t, "OrdValue")) { + return Some(NdcType::MutMaxHeap); + } + None +} + +fn is_path_ident(ty: &syn::Type, ident: &str) -> bool { + let syn::Type::Path(p) = ty else { return false }; + p.path.segments.last().is_some_and(|s| s.ident == ident) +} + +/// Check if `ty` is `Collection` (for Vec, VecDeque) +/// or `Collection` (for HashMap). +fn is_collection_of_vm_value(ty: &syn::Type, collection_name: &str) -> bool { + let syn::Type::Path(p) = ty else { return false }; + let Some(last) = p.path.segments.last() else { + return false; + }; + if last.ident != collection_name { + return false; + } + let syn::PathArguments::AngleBracketed(args) = &last.arguments else { + return false; + }; + if collection_name == "HashMap" { + let mut iter = args.args.iter(); + let Some(syn::GenericArgument::Type(key)) = iter.next() else { + return false; + }; + let Some(syn::GenericArgument::Type(val)) = iter.next() else { + return false; + }; + classify_owned(key) == Some(NdcType::VmValue) + && classify_owned(val) == Some(NdcType::VmValue) + } else { + let Some(syn::GenericArgument::Type(elem)) = args.args.first() else { + return false; + }; + classify_owned(elem) == Some(NdcType::VmValue) + } +} + +fn is_binary_heap_of(ty: &syn::Type, check: impl Fn(&syn::Type) -> bool) -> bool { + let syn::Type::Path(p) = ty else { return false }; + let Some(last) = p.path.segments.last() else { + return false; + }; + if last.ident != "BinaryHeap" { + return false; + } + let syn::PathArguments::AngleBracketed(args) = &last.arguments else { + return false; + }; + let Some(syn::GenericArgument::Type(inner)) = args.args.first() else { + return false; + }; + check(inner) +} + +fn is_reverse_of_ord_value(ty: &syn::Type) -> bool { + let syn::Type::Path(p) = ty else { return false }; + let Some(last) = p.path.segments.last() else { + return false; + }; + if last.ident != "Reverse" { + return false; + } + let syn::PathArguments::AngleBracketed(args) = &last.arguments else { + return false; + }; + let Some(syn::GenericArgument::Type(inner)) = args.args.first() else { + return false; + }; + is_path_ident(inner, "OrdValue") +} diff --git a/ndc_macros/src/vm_convert.rs b/ndc_macros/src/vm_convert.rs new file mode 100644 index 00000000..eb51c858 --- /dev/null +++ b/ndc_macros/src/vm_convert.rs @@ -0,0 +1,539 @@ +//! Code generation for VM-native function parameter extraction and return conversion. +//! +//! Each `try_vm_*` function returns `None` when the type cannot be represented +//! without the interpreter bridge, causing `export_module` to silently skip +//! vm_native generation for that function. + +use crate::types::{NdcType, classify, unwrap_result}; +use proc_macro2::TokenStream; +use quote::{format_ident, quote}; + +/// Extraction of a single argument from a `&[VmValue]` slice. +pub struct VmInputArg { + /// Code that reads `vm_raw{N}` (a `&VmValue`) and binds a converted local. + pub extract: TokenStream, + /// Expression to pass as the corresponding argument to the inner function. + pub pass: TokenStream, + /// `StaticType` token for this parameter. + pub static_type: TokenStream, +} + +/// Build the `VmError::native(format!("arg {pos}: expected {expected}, got {actual}"))` expression. +fn arg_error(position: usize, expected: &str) -> TokenStream { + let raw = format_ident!("vm_raw{position}"); + quote! { + ndc_vm::error::VmError::native(format!( + "arg {}: expected {}, got {}", + #position, #expected, #raw.static_type() + )) + } +} + +/// Generate extraction code for a RefCell-based collection parameter. +/// +/// Covers List, Deque, MinHeap, MaxHeap (but not Map, which has different destructuring). +fn refcell_collection_arg( + position: usize, + expected: &str, + variant: &str, + mutable: bool, + static_type: TokenStream, +) -> VmInputArg { + let raw = format_ident!("vm_raw{position}"); + let temp = format_ident!("vm_temp{position}"); + let err = arg_error(position, expected); + let rc = format_ident!("{temp}_rc"); + let guard = format_ident!("{temp}_guard"); + let variant_ident = format_ident!("{variant}"); + + let (borrow, pass) = if mutable { + ( + quote! { let mut #guard = #rc.borrow_mut(); }, + quote! { &mut *#guard }, + ) + } else { + (quote! { let #guard = #rc.borrow(); }, quote! { &*#guard }) + }; + + VmInputArg { + extract: quote! { + let ndc_vm::value::Value::Object(ref #rc) = *#raw else { + return Err(#err); + }; + let ndc_vm::value::Object::#variant_ident(ref #rc) = *#rc.as_ref() else { + return Err(#err); + }; + #borrow + }, + pass, + static_type, + } +} + +/// Try to generate extraction code for a single parameter type. +/// +/// Returns `None` for types that cannot be expressed in VM-native terms. +pub fn try_vm_input(ty: &syn::Type, position: usize) -> Option { + let ndc_type = classify(ty)?; + let raw = format_ident!("vm_raw{position}"); + let temp = format_ident!("vm_temp{position}"); + + let result = match ndc_type { + NdcType::Number | NdcType::NumberRef => { + let err = arg_error(position, "number"); + let pass = if ndc_type == NdcType::NumberRef { + quote! { &#temp } + } else { + quote! { #temp } + }; + VmInputArg { + extract: quote! { + let #temp = #raw.to_number().ok_or_else(|| #err)?; + }, + pass, + static_type: quote! { ndc_core::StaticType::Number }, + } + } + + NdcType::F64 => { + let err = arg_error(position, "float"); + VmInputArg { + extract: quote! { + let #temp = #raw.to_f64().ok_or_else(|| #err)?; + }, + pass: quote! { #temp }, + static_type: quote! { ndc_core::StaticType::Float }, + } + } + + NdcType::Bool => { + let err = arg_error(position, "bool"); + VmInputArg { + extract: quote! { + let ndc_vm::value::Value::Bool(#temp) = #raw else { + return Err(#err); + }; + let #temp = *#temp; + }, + pass: quote! { #temp }, + static_type: quote! { ndc_core::StaticType::Bool }, + } + } + + NdcType::String | NdcType::StrRef => { + let err = arg_error(position, "string"); + let pass = if ndc_type == NdcType::StrRef { + quote! { &#temp } + } else { + quote! { #temp } + }; + VmInputArg { + extract: quote! { + let #temp = match #raw { + ndc_vm::value::Value::Object(obj) => match obj.as_ref() { + ndc_vm::value::Object::String(s) => s.borrow().clone(), + _ => return Err(#err), + }, + _ => return Err(#err), + }; + }, + pass, + static_type: quote! { ndc_core::StaticType::String }, + } + } + + NdcType::I64 => { + let err = arg_error(position, "int"); + VmInputArg { + extract: quote! { + let #temp = match #raw { + ndc_vm::value::Value::Int(i) => *i, + _ => return Err(#err), + }; + }, + pass: quote! { #temp }, + static_type: quote! { ndc_core::StaticType::Int }, + } + } + + NdcType::Usize => { + let err = arg_error(position, "int"); + VmInputArg { + extract: quote! { + let #temp = match #raw { + ndc_vm::value::Value::Int(i) => *i as usize, + _ => return Err(#err), + }; + }, + pass: quote! { #temp }, + static_type: quote! { ndc_core::StaticType::Int }, + } + } + + NdcType::BigIntRef => { + let err = arg_error(position, "int"); + VmInputArg { + extract: quote! { + let #temp = { + let num = #raw.to_number().ok_or_else(|| #err)?; + match num { + ndc_core::num::Number::Int(i) => i.to_bigint(), + _ => return Err(#err), + } + }; + }, + pass: quote! { &#temp }, + static_type: quote! { ndc_core::StaticType::Int }, + } + } + + NdcType::BigRationalRef => { + let err = arg_error(position, "rational"); + VmInputArg { + extract: quote! { + let #temp = { + let num = #raw.to_number().ok_or_else(|| #err)?; + match num { + ndc_core::num::Number::Rational(r) => *r, + _ => return Err(#err), + } + }; + }, + pass: quote! { &#temp }, + static_type: quote! { ndc_core::StaticType::Rational }, + } + } + + NdcType::Complex64 => { + let err = arg_error(position, "complex"); + VmInputArg { + extract: quote! { + let #temp = { + let num = #raw.to_number().ok_or_else(|| #err)?; + match num { + ndc_core::num::Number::Complex(c) => c, + _ => return Err(#err), + } + }; + }, + pass: quote! { #temp }, + static_type: quote! { ndc_core::StaticType::Complex }, + } + } + + NdcType::MutString => { + let err = arg_error(position, "string"); + let rc = format_ident!("{temp}_rc"); + let guard = format_ident!("{temp}_guard"); + VmInputArg { + extract: quote! { + let ndc_vm::value::Value::Object(ref #rc) = *#raw else { + return Err(#err); + }; + let ndc_vm::value::Object::String(ref #rc) = *#rc.as_ref() else { + return Err(#err); + }; + let mut #guard = #rc.borrow_mut(); + }, + pass: quote! { &mut *#guard }, + static_type: quote! { ndc_core::StaticType::String }, + } + } + + NdcType::StringRepr | NdcType::MutStringRepr => { + let err = arg_error(position, "string"); + let obj = format_ident!("{temp}_obj"); + let inner = format_ident!("{temp}_inner"); + let pass = if ndc_type == NdcType::MutStringRepr { + quote! { &mut #temp } + } else { + quote! { &#temp } + }; + VmInputArg { + extract: quote! { + let ndc_vm::value::Value::Object(ref #obj) = *#raw else { + return Err(#err); + }; + let ndc_vm::value::Object::String(ref #inner) = *#obj.as_ref() else { + return Err(#err); + }; + let mut #temp = #inner.clone(); + }, + pass, + static_type: quote! { ndc_core::StaticType::String }, + } + } + + NdcType::MutVecOfValue => refcell_collection_arg( + position, + "list", + "List", + true, + quote! { ndc_core::StaticType::List(Box::new(ndc_core::StaticType::Any)) }, + ), + + NdcType::MutHashMap => { + let err = arg_error(position, "map"); + let rc = format_ident!("{temp}_rc"); + let guard = format_ident!("{temp}_guard"); + VmInputArg { + extract: quote! { + let ndc_vm::value::Value::Object(ref #rc) = *#raw else { + return Err(#err); + }; + let ndc_vm::value::Object::Map { entries: ref #rc, .. } = *#rc.as_ref() else { + return Err(#err); + }; + let mut #guard = #rc.borrow_mut(); + }, + pass: quote! { &mut *#guard }, + static_type: quote! { + ndc_core::StaticType::Map { + key: Box::new(ndc_core::StaticType::Any), + value: Box::new(ndc_core::StaticType::Any), + } + }, + } + } + + NdcType::RefHashMap => { + let err = arg_error(position, "map"); + let rc = format_ident!("{temp}_rc"); + let guard = format_ident!("{temp}_guard"); + VmInputArg { + extract: quote! { + let ndc_vm::value::Value::Object(ref #rc) = *#raw else { + return Err(#err); + }; + let ndc_vm::value::Object::Map { entries: ref #rc, .. } = *#rc.as_ref() else { + return Err(#err); + }; + let #guard = #rc.borrow(); + }, + pass: quote! { &*#guard }, + static_type: quote! { + ndc_core::StaticType::Map { + key: Box::new(ndc_core::StaticType::Any), + value: Box::new(ndc_core::StaticType::Any), + } + }, + } + } + + NdcType::MutMinHeap => refcell_collection_arg( + position, + "min heap", + "MinHeap", + true, + quote! { ndc_core::StaticType::MinHeap(Box::new(ndc_core::StaticType::Any)) }, + ), + + NdcType::MutMaxHeap => refcell_collection_arg( + position, + "max heap", + "MaxHeap", + true, + quote! { ndc_core::StaticType::MaxHeap(Box::new(ndc_core::StaticType::Any)) }, + ), + + NdcType::MutVecDeque => refcell_collection_arg( + position, + "deque", + "Deque", + true, + quote! { ndc_core::StaticType::Deque(Box::new(ndc_core::StaticType::Any)) }, + ), + + NdcType::RefVecDeque => refcell_collection_arg( + position, + "deque", + "Deque", + false, + quote! { ndc_core::StaticType::Deque(Box::new(ndc_core::StaticType::Any)) }, + ), + + NdcType::VmValue => VmInputArg { + extract: quote! {}, + pass: quote! { #raw.clone() }, + static_type: quote! { ndc_core::StaticType::Any }, + }, + + NdcType::SeqValue => VmInputArg { + extract: quote! {}, + pass: quote! { #raw.clone() }, + static_type: quote! { ndc_core::StaticType::Sequence(Box::new(ndc_core::StaticType::Any)) }, + }, + + NdcType::MapValue => VmInputArg { + extract: quote! {}, + pass: quote! { #raw.clone() }, + static_type: quote! { + ndc_core::StaticType::Map { + key: Box::new(ndc_core::StaticType::Any), + value: Box::new(ndc_core::StaticType::Any), + } + }, + }, + + NdcType::SliceOfValue => { + let vec = format_ident!("{temp}_vec"); + let err = arg_error(position, "list"); + VmInputArg { + extract: quote! { + let #vec: Vec = match #raw { + ndc_vm::value::Value::Object(obj) => match obj.as_ref() { + ndc_vm::value::Object::List(list) => list.borrow().clone(), + ndc_vm::value::Object::Tuple(tuple) => tuple.clone(), + _ => return Err(#err), + }, + _ => return Err(#err), + }; + }, + pass: quote! { &#vec }, + static_type: quote! { + ndc_core::StaticType::List(Box::new(ndc_core::StaticType::Any)) + }, + } + } + + NdcType::MutVmCallable => { + let err = arg_error(position, "function"); + VmInputArg { + extract: quote! { + let mut #temp = match #raw { + ndc_vm::value::Value::Object(_obj) => match _obj.as_ref() { + ndc_vm::value::Object::Function(f) => ndc_vm::VmCallable { + function: f.clone(), + vm: _vm, + }, + _ => return Err(#err), + }, + _ => return Err(#err), + }; + }, + pass: quote! { &mut #temp }, + static_type: quote! { + ndc_core::StaticType::Function { + parameters: None, + return_type: Box::new(ndc_core::StaticType::Any), + } + }, + } + } + + // Return-only types — not valid as function parameters + NdcType::BigInt | NdcType::BigRational => return None, + }; + + Some(result) +} + +/// Try to generate the return expression (producing `Result`) +/// and the `StaticType` token for the return type. +/// +/// Returns `None` for unsupported types. +pub fn try_vm_return(output: &syn::ReturnType) -> Option<(TokenStream, TokenStream)> { + match output { + syn::ReturnType::Default => Some(( + quote! { Ok(ndc_vm::value::Value::unit()) }, + quote! { ndc_core::StaticType::Tuple(vec![]) }, + )), + syn::ReturnType::Type(_, ty) => try_vm_return_type(ty), + } +} + +fn try_vm_return_type(ty: &syn::Type) -> Option<(TokenStream, TokenStream)> { + // Handle Result wrapper — unwrap and recurse into the inner type + if let Some(inner) = unwrap_result(ty) { + let (inner_code, inner_type) = try_vm_return_inner(inner)?; + return Some(( + quote! { + let result = result.map_err(|e| ndc_vm::error::VmError::native(e.to_string()))?; + #inner_code + }, + inner_type, + )); + } + + vm_return_for_classified(ty) +} + +/// Like `try_vm_return_type` but also handles `()` (unit), which only makes +/// sense as the Ok type inside a `Result<()>`. +fn try_vm_return_inner(ty: &syn::Type) -> Option<(TokenStream, TokenStream)> { + if let syn::Type::Tuple(t) = ty { + if t.elems.is_empty() { + return Some(( + quote! { Ok(ndc_vm::value::Value::unit()) }, + quote! { ndc_core::StaticType::Tuple(vec![]) }, + )); + } + } + vm_return_for_classified(ty) +} + +fn vm_return_for_classified(ty: &syn::Type) -> Option<(TokenStream, TokenStream)> { + let ndc_type = classify(ty)?; + match ndc_type { + NdcType::VmValue | NdcType::SeqValue => { + Some((quote! { Ok(result) }, quote! { ndc_core::StaticType::Any })) + } + NdcType::MapValue => Some(( + quote! { Ok(result) }, + quote! { + ndc_core::StaticType::Map { + key: Box::new(ndc_core::StaticType::Any), + value: Box::new(ndc_core::StaticType::Any), + } + }, + )), + NdcType::Number | NdcType::NumberRef => Some(( + quote! { Ok(ndc_vm::value::Value::from_number(result)) }, + quote! { ndc_core::StaticType::Number }, + )), + NdcType::F64 => Some(( + quote! { Ok(ndc_vm::value::Value::Float(result)) }, + quote! { ndc_core::StaticType::Float }, + )), + NdcType::Bool => Some(( + quote! { Ok(ndc_vm::value::Value::Bool(result)) }, + quote! { ndc_core::StaticType::Bool }, + )), + NdcType::StrRef => Some(( + quote! { Ok(ndc_vm::value::Value::string(result.to_owned())) }, + quote! { ndc_core::StaticType::String }, + )), + NdcType::String => Some(( + quote! { Ok(ndc_vm::value::Value::string(result)) }, + quote! { ndc_core::StaticType::String }, + )), + NdcType::I64 => Some(( + quote! { Ok(ndc_vm::value::Value::Int(result)) }, + quote! { ndc_core::StaticType::Int }, + )), + NdcType::Usize => Some(( + quote! { Ok(ndc_vm::value::Value::Int(result as i64)) }, + quote! { ndc_core::StaticType::Int }, + )), + NdcType::BigInt => Some(( + quote! { + Ok(ndc_vm::value::Value::from_number( + ndc_core::num::Number::Int( + ndc_core::int::Int::BigInt(result).simplified() + ) + )) + }, + quote! { ndc_core::StaticType::Int }, + )), + NdcType::BigRational => Some(( + quote! { + Ok(ndc_vm::value::Value::from_number( + ndc_core::num::Number::Rational(Box::new(result)) + )) + }, + quote! { ndc_core::StaticType::Rational }, + )), + // Input-only types — not valid as return types + _ => None, + } +} diff --git a/ndc_parser/Cargo.toml b/ndc_parser/Cargo.toml index 17829a14..7ab23ab4 100644 --- a/ndc_parser/Cargo.toml +++ b/ndc_parser/Cargo.toml @@ -4,8 +4,8 @@ edition.workspace = true version.workspace = true [dependencies] +ndc_core.workspace = true ndc_lexer.workspace = true num.workspace = true derive_more = { workspace = true } -thiserror.workspace = true -itertools.workspace = true +thiserror.workspace = true \ No newline at end of file diff --git a/ndc_parser/src/expression.rs b/ndc_parser/src/expression.rs index 261d646f..4173bd36 100644 --- a/ndc_parser/src/expression.rs +++ b/ndc_parser/src/expression.rs @@ -1,6 +1,6 @@ use crate::operator::LogicalOperator; use crate::parser::Error as ParseError; -use crate::static_type::StaticType; +use ndc_core::{StaticType, TypeSignature}; use ndc_lexer::Span; use num::BigInt; use num::complex::Complex64; @@ -14,11 +14,26 @@ pub enum Binding { #[derive(Debug, Eq, PartialEq, Clone, Copy)] pub enum ResolvedVar { - Captured { depth: usize, slot: usize }, + Local { slot: usize }, + Upvalue { slot: usize }, Global { slot: usize }, } -#[derive(Eq, PartialEq, Clone)] +impl ResolvedVar { + pub fn slot(self) -> usize { + match self { + Self::Local { slot } | Self::Upvalue { slot, .. } | Self::Global { slot } => slot, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CaptureSource { + Local(usize), + Upvalue(usize), +} + +#[derive(Eq, PartialEq, Clone, Debug)] pub struct ExpressionLocation { pub expression: Expression, pub span: Span, @@ -62,10 +77,11 @@ pub enum Expression { FunctionDeclaration { name: Option, resolved_name: Option, - // TODO: Instead of an ExpressionLocation with a Tuple the parser should just give us something we can actually work with - parameters: Box, + type_signature: TypeSignature, + parameters_span: Span, body: Box, return_type: Option, + captures: Vec, pure: bool, }, Block { @@ -89,10 +105,6 @@ pub enum Expression { function: Box, arguments: Vec, }, - Index { - value: Box, - index: Box, - }, Tuple { values: Vec, }, @@ -130,11 +142,15 @@ pub enum ForIteration { #[derive(Debug, Eq, PartialEq, Clone)] pub enum ForBody { Block(ExpressionLocation), - List(ExpressionLocation), + List { + expr: ExpressionLocation, + accumulator_slot: Option, + }, Map { key: ExpressionLocation, value: Option, default: Option>, + accumulator_slot: Option, }, } @@ -151,6 +167,8 @@ pub enum Lvalue { Index { value: Box, index: Box, + resolved_set: Option, + resolved_get: Option, }, // Example: `let a, b = ...` Sequence(Vec), @@ -221,7 +239,11 @@ impl Lvalue { #[must_use] pub fn can_build_from_expression(expression: &Expression) -> bool { match expression { - Expression::Identifier { .. } | Expression::Index { .. } => true, + Expression::Identifier { .. } => true, + Expression::Call { + function, + arguments, + } if is_index_call(function, arguments) => true, Expression::List { values } | Expression::Tuple { values } => values .iter() .all(|el| Self::can_build_from_expression(&el.expression)), @@ -246,7 +268,19 @@ impl TryFrom for Lvalue { fn try_from(value: ExpressionLocation) -> Result { match value.expression { Expression::Identifier { name, .. } => Ok(Self::new_identifier(name, value.span)), - Expression::Index { value, index } => Ok(Self::Index { value, index }), + Expression::Call { + function, + mut arguments, + } if is_index_call(&function, &arguments) => { + let index = arguments.remove(1); + let container = arguments.remove(0); + Ok(Self::Index { + value: Box::new(container), + index: Box::new(index), + resolved_set: None, + resolved_get: None, + }) + } Expression::List { values } | Expression::Tuple { values } => Ok(Self::Sequence( values .into_iter() @@ -259,161 +293,9 @@ impl TryFrom for Lvalue { } } -#[allow(clippy::missing_fields_in_debug, clippy::too_many_lines)] -impl std::fmt::Debug for ExpressionLocation { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // write!(f, "{{{:?} at {:?}}}", self.expression, self.span) - match &self.expression { - Expression::BoolLiteral(b) => { - f.debug_struct("BooleanLiteral").field("value", &b).finish() - } - Expression::StringLiteral(s) => { - f.debug_struct("StringLiteral").field("value", &s).finish() - } - Expression::Int64Literal(i) => f.debug_struct("IntLiteral").field("value", &i).finish(), - Expression::Float64Literal(v) => { - f.debug_struct("FloatLiteral").field("value", &v).finish() - } - Expression::BigIntLiteral(big_int) => f - .debug_struct("BigIntLiteral") - .field("value", &big_int) - .finish(), - Expression::ComplexLiteral(complex) => f - .debug_struct("CoplexLiteral") - .field("value", &complex) - .finish(), - Expression::Identifier { - name: ident, - resolved, - } => f - .debug_struct("Ident") - .field("value", &ident) - .field("resolved", resolved) - .finish(), - Expression::Statement(expression_location) => f - .debug_struct("Statement") - .field("expression", &expression_location) - .finish(), - Expression::Logical { - left, - operator, - right, - } => f - .debug_struct("Logical") - .field("left", left) - .field("operator", operator) - .field("right", right) - .finish(), - Expression::Grouping(expression_location) => f - .debug_struct("Grouping") - .field("expression", expression_location) - .finish(), - Expression::VariableDeclaration { l_value, value } => f - .debug_struct("VariableDeclaration") - .field("l_value", l_value) - .field("value", value) - .finish(), - Expression::Assignment { l_value, r_value } => f - .debug_struct("Assignment") - .field("l_value", l_value) - .field("r_value", r_value) - .finish(), - Expression::OpAssignment { - l_value, - r_value: value, - operation, - resolved_operation, - resolved_assign_operation, - } => f - .debug_struct("OpAssignment") - .field("l_value", l_value) - .field("value", value) - .field("operation", operation) - .field("resolved_operation", resolved_operation) - .field("resolved_assign_operation", resolved_assign_operation) - .finish(), - Expression::FunctionDeclaration { - name, - parameters, - return_type, - body, - pure, - resolved_name, - } => f - .debug_struct("FunctionDeclaration") - .field("name", name) - .field("resolved_name", resolved_name) - .field("parameters", parameters) - .field("return_type", return_type) - .field("body", body) - .field("pure", pure) - .finish(), - Expression::Block { statements } => f - .debug_struct("Block") - .field("statements", statements) - .finish(), - Expression::If { - condition, - on_true, - on_false, - } => f - .debug_struct("If") - .field("condition", condition) - .field("on_true", on_true) - .field("on_false", on_false) - .finish(), - Expression::While { - expression, - loop_body, - } => f - .debug_struct("While") - .field("expression", expression) - .field("loop_body", loop_body) - .finish(), - Expression::For { iterations, body } => f - .debug_struct("For") - .field("iterations", iterations) - .field("body", body) - .finish(), - Expression::Call { - function, - arguments, - } => f - .debug_struct("Call") - .field("function", function) - .field("arguments", arguments) - .finish(), - Expression::Index { value, index } => f - .debug_struct("Index") - .field("value", value) - .field("index", index) - .finish(), - Expression::Tuple { values } => { - f.debug_struct("Tuple").field("values", values).finish() - } - Expression::List { values } => f.debug_struct("List").field("values", values).finish(), - Expression::Map { values, default } => f - .debug_struct("Map") - .field("values", values) - .field("default", default) - .finish(), - Expression::Return { value } => f.debug_struct("Return").field("value", value).finish(), - Expression::Break => f.debug_struct("Break").finish(), - Expression::Continue => f.debug_struct("Continue").finish(), - Expression::RangeInclusive { start, end } => f - .debug_struct("RangeInclusive") - .field("start", start) - .field("end", end) - .field("start", start) - .field("end", end) - .finish(), - Expression::RangeExclusive { start, end } => f - .debug_struct("RangeExclusive") - .field("start", start) - .field("end", end) - .field("start", start) - .field("end", end) - .finish(), - } - } +fn is_index_call(function: &ExpressionLocation, arguments: &[ExpressionLocation]) -> bool { + matches!( + &function.expression, + Expression::Identifier { name, .. } if name == "[]" + ) && arguments.len() == 2 } diff --git a/ndc_parser/src/lib.rs b/ndc_parser/src/lib.rs index f6f5e900..ee441ba7 100644 --- a/ndc_parser/src/lib.rs +++ b/ndc_parser/src/lib.rs @@ -1,12 +1,11 @@ mod expression; mod operator; mod parser; -mod static_type; pub use expression::{ - Binding, Expression, ExpressionLocation, ForBody, ForIteration, Lvalue, ResolvedVar, + Binding, CaptureSource, Expression, ExpressionLocation, ForBody, ForIteration, Lvalue, + ResolvedVar, }; pub use operator::{BinaryOperator, LogicalOperator, UnaryOperator}; pub use parser::Error; pub use parser::Parser; -pub use static_type::{Parameter, StaticType, TypeSignature}; diff --git a/ndc_parser/src/parser.rs b/ndc_parser/src/parser.rs index 733566fb..cbe3d51c 100644 --- a/ndc_parser/src/parser.rs +++ b/ndc_parser/src/parser.rs @@ -3,6 +3,7 @@ use std::fmt::Write; use crate::expression::Expression; use crate::expression::{Binding, ExpressionLocation, ForBody, ForIteration, Lvalue}; use crate::operator::{BinaryOperator, LogicalOperator, UnaryOperator}; +use ndc_core::{Parameter, StaticType, TypeSignature}; use ndc_lexer::{Span, Token, TokenLocation}; pub struct Parser { @@ -29,6 +30,7 @@ impl Parser { | Expression::While { .. } | Expression::For { .. } | Expression::FunctionDeclaration { .. } + | Expression::VariableDeclaration { .. } ) }; let mut expressions = Vec::new(); @@ -331,9 +333,7 @@ impl Parser { self.require_current_token_matches(&Token::Semicolon)?; } - Ok(declaration - .to_location(let_token.span.merge(end)) - .to_statement()) + Ok(declaration.to_location(let_token.span.merge(end))) } fn expression(&mut self) -> Result { @@ -697,39 +697,63 @@ impl Parser { // for now, we require parentheses } Token::LeftSquareBracket => { + let bracket_span = current.span; self.require_current_token_matches(&Token::LeftSquareBracket)?; // self.expression here allows for this syntax which is maybe a good idea // `foo[1, 2] == foo[(1, 2)]` // and // `foo[x := 3]` - let index_expression = self.expression()?; - - // TODO: this error may be triggered in a scenario described below, and it would - // probably be nice if we could have a special message in a later version - // - // # Error code - // - // if x == y { true } else { false } - // [x for x in 1..10] - // - // In this case we have some kind of expression that could also be a statement - // followed by a list comprehension (the same problem would arise if the next - // statement was a tuple). The list comprehension or tuple will now be interpreted - // as an operand for the previous expression as if we meant to write this: - // - // if x == y { foo } else { bar }[12] - // - // This ambiguity can only be resolved by adding a semicolon to the if expression - // or by not putting a list comprehension or tuple in this position. + let mut index_expression = self.expression()?; + + // Reject `a[x..=]` — inclusive ranges must have an end. + if matches!( + &index_expression.expression, + Expression::RangeInclusive { end: None, .. } + ) { + return Err(Error::text( + "inclusive ranges must have an end".to_string(), + index_expression.span, + )); + } + + // Normalize open-ended ranges: `a[..3]` → `a[0..3]` so the range + // can be evaluated as a standalone value (ranges without a lower + // bound cannot be evaluated otherwise). + match &mut index_expression.expression { + Expression::RangeExclusive { + start: start @ None, + .. + } + | Expression::RangeInclusive { + start: start @ None, + .. + } => { + *start = Some(Box::new( + Expression::Int64Literal(0).to_location(index_expression.span), + )); + } + _ => {} + } + + // Note: `if x == y { true } else { false }` followed by `[x for x in 1..10]` + // on the next line triggers this path — the list comprehension is parsed as an + // index operation on the if-expression. A semicolon after the if resolves the + // ambiguity. let end_token = self.require_current_token_matches(&Token::RightSquareBracket)?; let span = expr.span.merge(end_token.span); expr = ExpressionLocation { - expression: Expression::Index { - value: Box::new(expr), - index: Box::new(index_expression), + expression: Expression::Call { + function: Box::new( + Expression::Identifier { + name: "[]".to_string(), + resolved: Binding::None, + } + .to_location(bracket_span), + ), + arguments: vec![expr, index_expression], }, span, }; @@ -794,7 +818,10 @@ impl Parser { } // WOAH, this is not a list, it's a list comprehension Some(Token::For) => { - let result = ForBody::List(expr.simplify()); + let result = ForBody::List { + expr: expr.simplify(), + accumulator_slot: None, + }; self.for_comprehension(left_square_bracket_span, result, &Token::RightSquareBracket) } _ => { @@ -958,8 +985,6 @@ impl Parser { resolved: Binding::None, }, _ => { - // TODO: this error might not be the best way to describe what's happening here - // figure out if there is a better way to handle errors here. return Err(Error::text( format!( "Expected an expression but got '{}' instead", @@ -1145,29 +1170,26 @@ impl Parser { None => return Err(Error::end_of_input(argument_list.span)), }; + let parameters_span = argument_list.span; let span = fn_token.span.merge(body.span); Ok(ExpressionLocation { expression: Expression::FunctionDeclaration { name: identifier, - parameters: Box::new(argument_list), + type_signature: argument_list + .try_into() + .expect("INTERNAL ERROR: type of argument list is incorrect"), + parameters_span, body: Box::new(body), return_type: None, // At some point in the future we could use type declarations here to insert the type (return type inference is cringe anyway) - pure: is_pure, resolved_name: None, + captures: vec![], + pure: is_pure, }, span, }) } /// Parses a block expression including the block delimiters `{` and `}` - /// example: - /// ```ndc - /// { - /// func(); - /// x := 1 + 1; - /// x - /// } - /// ``` fn block(&mut self) -> Result { let left_curly_span = self.require_token(&[Token::LeftCurlyBracket])?; @@ -1188,6 +1210,21 @@ impl Parser { } }; + // Non-last items in a block are in statement position: wrap bare expressions + // (e.g. `if` without else) in Statement so the compiler discards their values. + let last = statements.len().saturating_sub(1); + let statements = statements + .into_iter() + .enumerate() + .map(|(i, stmt)| { + if i < last && !matches!(stmt.expression, Expression::Statement(_)) { + stmt.to_statement() + } else { + stmt + } + }) + .collect(); + Ok(Expression::Block { statements }.to_location(left_curly_span.merge(loop_span))) } @@ -1240,6 +1277,7 @@ impl Parser { key: key_expr, value: value_expr, default, + accumulator_slot: None, }, &Token::RightCurlyBracket, ); @@ -1324,3 +1362,30 @@ fn tokens_to_string(tokens: &[Token]) -> String { } buf } + +impl TryFrom for TypeSignature { + type Error = (); + + fn try_from( + ExpressionLocation { expression, .. }: ExpressionLocation, + ) -> Result { + let Expression::Tuple { values } = expression else { + return Err(()); + }; + + values + .into_iter() + .map(|expression_location| { + let ExpressionLocation { expression, .. } = expression_location; + + match expression { + Expression::Identifier { name, .. } => { + Ok(Parameter::new(name, StaticType::Any)) + } + _ => Err(()), + } + }) + .collect::, ()>>() + .map(TypeSignature::Exact) + } +} diff --git a/ndc_stdlib/Cargo.toml b/ndc_stdlib/Cargo.toml index f7323e23..92787d16 100644 --- a/ndc_stdlib/Cargo.toml +++ b/ndc_stdlib/Cargo.toml @@ -7,8 +7,9 @@ version.workspace = true anyhow.workspace = true factorial.workspace = true itertools.workspace = true -ndc_interpreter = { workspace = true } +ndc_core.workspace = true ndc_macros.workspace = true +ndc_vm = { workspace = true } num.workspace = true once_cell.workspace = true tap.workspace = true diff --git a/ndc_stdlib/src/aoc.rs b/ndc_stdlib/src/aoc.rs index 8b695704..7a7a188c 100644 --- a/ndc_stdlib/src/aoc.rs +++ b/ndc_stdlib/src/aoc.rs @@ -1,31 +1,30 @@ use ndc_macros::export_module; +use ndc_vm::value::{Object, SeqValue, Value}; + #[export_module] mod inner { - use std::cell::RefCell; + use ndc_core::hash_map::HashMap; use std::rc::Rc; - use ndc_interpreter::hash_map::HashMap; - use ndc_interpreter::iterator::mut_seq_to_iterator; - use ndc_interpreter::sequence::Sequence; - use ndc_interpreter::value::Value; - /// Counts the occurrences of each item in a sequence and returns a map with the frequencies. - #[function(return_type = HashMap<_, _>)] - pub fn frequencies(seq: &mut Sequence) -> Value { - let mut out_map = HashMap::new(); + pub fn frequencies(seq: SeqValue) -> anyhow::Result { + let mut counts: HashMap = HashMap::new(); - for item in mut_seq_to_iterator(seq) { - *out_map.entry(item).or_insert(0i64) += 1; + for item in seq + .try_into_iter() + .ok_or_else(|| anyhow::anyhow!("frequencies requires a sequence"))? + { + *counts.entry(item).or_insert(0i64) += 1; } - let out_map = out_map + let entries: HashMap = counts .into_iter() - .map(|(key, value)| (key, Value::from(value))) - .collect::>(); + .map(|(k, v)| (k, Value::Int(v))) + .collect(); - Value::Sequence(Sequence::Map( - Rc::new(RefCell::new(out_map)), - Some(Box::new(Value::from(0))), - )) + Ok(Value::Object(Rc::new(Object::map( + entries, + Some(Value::Int(0)), + )))) } } diff --git a/ndc_stdlib/src/cmp.rs b/ndc_stdlib/src/cmp.rs index 9362cbaf..42cb2a64 100644 --- a/ndc_stdlib/src/cmp.rs +++ b/ndc_stdlib/src/cmp.rs @@ -1,9 +1,10 @@ -#[ndc_macros::export_module] +use ndc_macros::export_module; +use ndc_vm::value::Value; + +#[export_module] mod inner { use anyhow::anyhow; - use ndc_interpreter::compare::FallibleOrd; - use ndc_interpreter::value::Value; use std::cmp::Ordering; /// Produces an error if the argument is not true. @@ -16,7 +17,7 @@ mod inner { } /// Produces an error if the arguments aren't equal to each other. - pub fn assert_eq(left: &Value, right: &Value) -> anyhow::Result<()> { + pub fn assert_eq(left: Value, right: Value) -> anyhow::Result<()> { if left == right { Ok(()) } else { @@ -27,7 +28,7 @@ mod inner { } /// Produces an error if the arguments are equal to each other. - pub fn assert_ne(left: &Value, right: &Value) -> anyhow::Result<()> { + pub fn assert_ne(left: Value, right: Value) -> anyhow::Result<()> { if left == right { Err(anyhow!(format!( "failed asserting that {left} does not equal {right}" @@ -48,18 +49,28 @@ mod inner { } /// Returns the larger of `left` and `right`, preferring `left` if they are equal. - pub fn max(left: &Value, right: &Value) -> Result { - match left.try_cmp(right)? { - Ordering::Equal | Ordering::Greater => Ok(left.clone()), - Ordering::Less => Ok(right.clone()), + pub fn max(left: Value, right: Value) -> anyhow::Result { + match left.partial_cmp(&right) { + Some(Ordering::Equal) | Some(Ordering::Greater) => Ok(left), + Some(Ordering::Less) => Ok(right), + None => Err(anyhow!( + "cannot compare {} and {}", + left.static_type(), + right.static_type() + )), } } /// Returns the smaller of `left` and `right`, preferring `left` if they are equal. - pub fn min(left: &Value, right: &Value) -> Result { - match left.try_cmp(right)? { - Ordering::Equal | Ordering::Less => Ok(left.clone()), - Ordering::Greater => Ok(right.clone()), + pub fn min(left: Value, right: Value) -> anyhow::Result { + match left.partial_cmp(&right) { + Some(Ordering::Equal) | Some(Ordering::Less) => Ok(left), + Some(Ordering::Greater) => Ok(right), + None => Err(anyhow!( + "cannot compare {} and {}", + left.static_type(), + right.static_type() + )), } } } diff --git a/ndc_stdlib/src/crypto.rs b/ndc_stdlib/src/crypto.rs index 2dfec55f..87634515 100644 --- a/ndc_stdlib/src/crypto.rs +++ b/ndc_stdlib/src/crypto.rs @@ -4,13 +4,13 @@ use ndc_macros::export_module; mod internal { use sha1::Digest; - /// Computes the md5 hash of an input string and returns it as an hex encoded string + /// Computes the md5 hash of an input string and returns it as a hex-encoded string. pub fn md5(val: &str) -> String { let digest = md5::compute(val); format!("{:x}", digest) } - /// Computes the sha1 hash of an input string and returns it as an hex encoded string + /// Computes the sha1 hash of an input string and returns it as a hex-encoded string. pub fn sha1(val: &str) -> String { let mut hasher = sha1::Sha1::new(); hasher.update(val); diff --git a/ndc_stdlib/src/deque.rs b/ndc_stdlib/src/deque.rs index afa286de..e22f0c78 100644 --- a/ndc_stdlib/src/deque.rs +++ b/ndc_stdlib/src/deque.rs @@ -1,19 +1,18 @@ use ndc_macros::export_module; +use ndc_vm::value::{Object, Value}; #[export_module] mod inner { - use ndc_interpreter::sequence::Sequence; - use ndc_interpreter::value::Value; - use std::cell::RefCell; use std::collections::VecDeque; - use std::rc::Rc; /// Creates a new `Deque` type. /// /// The `Deque` type allows the user to quickly append and remove elements from both the start and the end of the list. #[function(name = "Deque", return_type = VecDeque)] pub fn create_deque() -> Value { - Value::Sequence(Sequence::Deque(Rc::new(RefCell::new(VecDeque::new())))) + Value::Object(std::rc::Rc::new(Object::Deque(std::cell::RefCell::new( + VecDeque::new(), + )))) } /// Pushes the `value` to the front of the `deque`. @@ -36,7 +35,10 @@ mod inner { /// Removes and returns the first element of the `deque` as an `Option` returning `None` if the queue is empty. #[function(name = "pop_front?", return_type = Option)] pub fn maybe_pop_front(deque: &mut VecDeque) -> Value { - deque.pop_front().map_or_else(Value::none, Value::some) + match deque.pop_front() { + None => Value::None, + Some(val) => Value::Object(std::rc::Rc::new(Object::Some(val))), + } } /// Removes and returns the last element of the `deque`. @@ -49,7 +51,10 @@ mod inner { /// Removes and returns the last element of the `deque` as an `Option` returning `None` if the queue is empty. #[function(name = "pop_back?", return_type = Option)] pub fn maybe_pop_back(deque: &mut VecDeque) -> Value { - deque.pop_back().map_or_else(Value::none, Value::some) + match deque.pop_back() { + None => Value::None, + Some(val) => Value::Object(std::rc::Rc::new(Object::Some(val))), + } } /// Returns (but does not remove) the first element of the `deque`. @@ -63,7 +68,10 @@ mod inner { /// Returns (but does not remove) the first element of the `deque` as an `Option` returning `None` if the queue is empty. #[function(name = "front?", return_type = Option)] pub fn maybe_front(deque: &VecDeque) -> Value { - deque.front().cloned().map_or_else(Value::none, Value::some) + match deque.front().cloned() { + None => Value::None, + Some(val) => Value::Object(std::rc::Rc::new(Object::Some(val))), + } } /// Returns (but does not remove) the last element of the `deque`. @@ -77,12 +85,15 @@ mod inner { /// Returns (but does not remove) the last element of the `deque` as an `Option` returning `None` if the queue is empty. #[function(name = "back?", return_type = Option)] pub fn maybe_back(deque: &VecDeque) -> Value { - deque.back().cloned().map_or_else(Value::none, Value::some) + match deque.back().cloned() { + None => Value::None, + Some(val) => Value::Object(std::rc::Rc::new(Object::Some(val))), + } } /// Returns `true` if the `deque` contains the `value` or `false` otherwise. - pub fn contains(deque: &VecDeque, value: &Value) -> bool { - deque.contains(value) + pub fn contains(deque: &VecDeque, value: Value) -> bool { + deque.contains(&value) } /// Returns `true` if the `deque` is empty and `false` otherwise. diff --git a/ndc_stdlib/src/file.rs b/ndc_stdlib/src/file.rs index 23752e97..80fdb536 100644 --- a/ndc_stdlib/src/file.rs +++ b/ndc_stdlib/src/file.rs @@ -1,84 +1,75 @@ -use ndc_interpreter::environment::Environment; -use ndc_interpreter::function::{ - FunctionBody, FunctionBuilder, FunctionCarrier, StaticType, TypeSignature, -}; -use ndc_interpreter::value::Value; +use ndc_core::{FunctionRegistry, StaticType}; use ndc_macros::export_module; +use ndc_vm::error::VmError; +use ndc_vm::value::{NativeFunc, NativeFunction, Value}; +use std::fmt::Write as FmtWrite; use std::fs::read_to_string; +use std::rc::Rc; #[export_module] mod inner { use anyhow::Context; use std::path::PathBuf; + /// Reads the entire contents of a file into a string. pub fn read_file(file_path: &str) -> anyhow::Result { read_to_string(file_path.parse::().context("invalid file path")?) .context("failed to read file") } } -pub fn register_variadic(env: &mut Environment) { - (env).declare_global_fn( - FunctionBuilder::default() - .name("print".to_string()) - .documentation("Print the value.".to_string()) - .body(FunctionBody::GenericFunction { - function: |args, env| { - env.borrow_mut() - .with_output(|output| { - let mut iter = args.iter().peekable(); +pub fn register_variadic(env: &mut FunctionRegistry>) { + let print_native = Rc::new(NativeFunction { + name: "print".to_string(), + documentation: Some( + "Prints its arguments to standard output, separated by spaces, followed by a newline." + .to_string(), + ), + static_type: StaticType::Function { + parameters: None, + return_type: Box::new(StaticType::unit()), + }, + func: NativeFunc::WithVm(Box::new(|args, vm| { + let mut buf = String::new(); + let mut iter = args.iter().peekable(); + if iter.peek().is_none() { + buf.push('\n'); + } else { + while let Some(arg) = iter.next() { + if iter.peek().is_some() { + write!(buf, "{arg} ").map_err(|e| VmError::native(e.to_string()))?; + } else { + writeln!(buf, "{arg}").map_err(|e| VmError::native(e.to_string()))?; + } + } + } + vm.write_output(&buf)?; + Ok(Value::unit()) + })), + }); - // If no arguments are passed to the print function just print an empty line - if iter.peek().is_none() { - writeln!(output)?; - return Ok(()); - } + let dbg_native = Rc::new(NativeFunction { + name: "dbg".to_string(), + documentation: Some("Prints its arguments in debug format to standard output, separated by spaces, followed by a newline.".to_string()), + static_type: StaticType::Function { + parameters: None, + return_type: Box::new(StaticType::unit()), + }, + func: NativeFunc::WithVm(Box::new(|args, vm| { + let mut buf = String::new(); + let mut iter = args.iter().peekable(); + while let Some(arg) = iter.next() { + if iter.peek().is_some() { + write!(buf, "{arg:?} ").map_err(|e| VmError::native(e.to_string()))?; + } else { + writeln!(buf, "{arg:?}").map_err(|e| VmError::native(e.to_string()))?; + } + } + vm.write_output(&buf)?; + Ok(Value::unit()) + })), + }); - // Otherwise - while let Some(arg) = iter.next() { - if iter.peek().is_some() { - write!(output, "{arg} ")?; - } else { - writeln!(output, "{arg}")?; - } - } - Ok(()) - }) - .map_err(|err| FunctionCarrier::IntoEvaluationError(Box::new(err)))?; - Ok(Value::unit()) - }, - type_signature: TypeSignature::Variadic, - return_type: StaticType::unit(), - }) - .build() - .expect("function definition defined in code must be valid"), - ); - - env.declare_global_fn( - FunctionBuilder::default() - .name("dbg".to_string()) - .documentation("Prints the values for quick and dirty debugging (using the value's debug representation).".to_string()) - .body(FunctionBody::GenericFunction { - function: |args, env| { - env.borrow_mut() - .with_output(|output| { - let mut iter = args.iter().peekable(); - while let Some(arg) = iter.next() { - if iter.peek().is_some() { - write!(output, "{arg:?} ")?; - } else { - writeln!(output, "{arg:?}")?; - } - } - Ok(()) - }) - .map_err(|err| FunctionCarrier::IntoEvaluationError(Box::new(err)))?; - Ok(Value::unit()) - }, - type_signature: TypeSignature::Variadic, - return_type: StaticType::unit(), - }) - .build() - .expect("function definition defined in code must be valid"), - ); + env.declare_global_fn(print_native); + env.declare_global_fn(dbg_native); } diff --git a/ndc_stdlib/src/hash_map.rs b/ndc_stdlib/src/hash_map.rs index 2d81b6aa..b55bfe65 100644 --- a/ndc_stdlib/src/hash_map.rs +++ b/ndc_stdlib/src/hash_map.rs @@ -1,9 +1,5 @@ -use ndc_interpreter::hash_map; -use ndc_interpreter::hash_map::HashMap; -use ndc_interpreter::hash_map::HashMapExt; -use ndc_interpreter::sequence::{DefaultMap, MapRepr, Sequence}; -use ndc_interpreter::value::Value; -use std::cell::RefCell; +use ndc_core::hash_map::{self, HashMapExt}; +use ndc_vm::value::{MapValue, Object, SeqValue, Value}; use std::rc::Rc; #[ndc_macros::export_module] @@ -13,26 +9,33 @@ mod inner { /// /// Note that for a set this will return the values in the set. #[function(return_type = Vec<_>)] - pub fn keys(map: &mut HashMap) -> Value { - Value::list(map.keys().cloned().collect::>()) + pub fn keys(map: &mut hash_map::HashMap) -> Value { + Value::Object(Rc::new(Object::list( + map.keys().cloned().collect::>(), + ))) } /// Returns a list of all the values in the map. /// /// Note that for sets this will return a list of unit types, you should use keys if you want the values in the set. #[function(return_type = Vec<_>)] - pub fn values(map: &mut HashMap) -> Value { - Value::list(map.values().cloned().collect::>()) + pub fn values(map: &mut hash_map::HashMap) -> Value { + Value::Object(Rc::new(Object::list( + map.values().cloned().collect::>(), + ))) } /// Removes a key from the map or a value from a set. - pub fn remove(map: &mut HashMap, key: &Value) { - map.remove(key); + pub fn remove(map: &mut hash_map::HashMap, key: Value) { + map.remove(&key); } /// Removes all keys from the `left` map/set that are present in the `right` map/set. #[function(name = "remove")] - pub fn remove_map(left: &mut HashMap, right: &HashMap) { + pub fn remove_map( + left: &mut hash_map::HashMap, + right: &hash_map::HashMap, + ) { for (key, _) in right { left.remove(key); } @@ -40,149 +43,183 @@ mod inner { /// Insert a value into a map. #[function(name = "insert")] - pub fn insert_map(map: &mut HashMap, key: Value, value: Value) { + pub fn insert_map(map: &mut hash_map::HashMap, key: Value, value: Value) { map.insert(key, value); } /// Inserts a value into a set. #[function(name = "insert")] - pub fn insert_set(map: &mut HashMap, key: Value) { + pub fn insert_set(map: &mut hash_map::HashMap, key: Value) { map.insert(key, Value::unit()); } /// Returns true if the map or set contains no elements. - pub fn is_empty(map: &HashMap) -> bool { + pub fn is_empty(map: &hash_map::HashMap) -> bool { map.is_empty() } + /// Intersection-assign: retains only elements present in both maps or sets. #[function(name = "&=")] - pub fn intersect_assign(lhs: &mut MapRepr, rhs: &mut MapRepr) { - let left_map: &mut HashMap = &mut lhs - .try_borrow_mut() - .expect("Failed to mutably borrow the lhs of &= operator"); - - left_map.intersection( - &*rhs - .try_borrow() - .expect("Failed borrow the rhs of &= operator"), - ); + pub fn intersect_assign( + lhs: MapValue, + rhs: &hash_map::HashMap, + ) -> anyhow::Result { + { + let Value::Object(ref obj) = lhs else { + anyhow::bail!("&= requires a map on the left side"); + }; + let Object::Map { ref entries, .. } = *obj.as_ref() else { + anyhow::bail!("&= requires a map on the left side"); + }; + entries.borrow_mut().intersection(rhs); + } + Ok(lhs) } + /// Union-assign: adds all elements from the right map or set into the left. #[function(name = "|=")] - pub fn union_assign(lhs: &mut MapRepr, rhs: &mut MapRepr) { - let left_map: &mut HashMap = &mut lhs.borrow_mut(); - - if Rc::strong_count(rhs) == 1 { - // Take ownership - let rhs = std::mem::take(&mut *rhs.borrow_mut()); - left_map.union(rhs); - } else { - let right = rhs.borrow(); - for (key, value) in right.iter() { - left_map.insert(key.clone(), value.clone()); + pub fn union_assign( + lhs: MapValue, + rhs: &hash_map::HashMap, + ) -> anyhow::Result { + { + let Value::Object(ref obj) = lhs else { + anyhow::bail!("|= requires a map on the left side"); + }; + let Object::Map { ref entries, .. } = *obj.as_ref() else { + anyhow::bail!("|= requires a map on the left side"); + }; + let mut m = entries.borrow_mut(); + for (key, value) in rhs { + m.insert(key.clone(), value.clone()); } } + Ok(lhs) } + /// Difference-assign: removes all elements from the left map or set that are present in the right. #[function(name = "-=")] - pub fn difference_assign(lhs: &mut MapRepr, rhs: &mut MapRepr) { - let left_map: &mut HashMap = &mut lhs.borrow_mut(); - - left_map.difference(&*rhs.borrow()); + pub fn difference_assign( + lhs: MapValue, + rhs: &hash_map::HashMap, + ) -> anyhow::Result { + { + let Value::Object(ref obj) = lhs else { + anyhow::bail!("-= requires a map on the left side"); + }; + let Object::Map { ref entries, .. } = *obj.as_ref() else { + anyhow::bail!("-= requires a map on the left side"); + }; + entries.borrow_mut().difference(rhs); + } + Ok(lhs) } + /// Symmetric-difference-assign: retains only elements present in exactly one of the two maps or sets. #[function(name = "~=")] - pub fn symmetric_difference_assign(lhs: &mut MapRepr, rhs: &mut MapRepr) { - let diff = hash_map::symmetric_difference( - &*lhs - .try_borrow() - .expect("Failed to borrow the lhs of ~= operator"), - &*rhs - .try_borrow() - .expect("Failed borrow the rhs of ~= operator"), - ); - - *lhs.borrow_mut() = diff; + pub fn symmetric_difference_assign( + lhs: MapValue, + rhs: &hash_map::HashMap, + ) -> anyhow::Result { + { + let Value::Object(ref obj) = lhs else { + anyhow::bail!("~= requires a map on the left side"); + }; + let Object::Map { ref entries, .. } = *obj.as_ref() else { + anyhow::bail!("~= requires a map on the left side"); + }; + let diff = hash_map::symmetric_difference(&*entries.borrow(), rhs); + *entries.borrow_mut() = diff; + } + Ok(lhs) } /// Returns the union (elements that are in either `left` or `right`) of two maps or sets. /// - /// This is the same as evaluating the expression `left | right` - #[function(alias = "|", return_type = DefaultMap<'_>)] - pub fn union(left: DefaultMap<'_>, right: &HashMap) -> Value { - Value::Sequence(Sequence::Map( - Rc::new(RefCell::new(hash_map::union(left.0, right))), - left.1, - )) + /// This is the same as evaluating the expression `left | right`. + #[function(alias = "|")] + pub fn union( + left: MapValue, + right: &hash_map::HashMap, + ) -> anyhow::Result { + let Value::Object(ref obj) = left else { + anyhow::bail!("| requires a map on the left side"); + }; + let Object::Map { + ref entries, + ref default, + } = *obj.as_ref() + else { + anyhow::bail!("| requires a map on the left side"); + }; + let new_entries = hash_map::union(&*entries.borrow(), right); + Ok(Value::Object(Rc::new(Object::map( + new_entries, + default.clone(), + )))) } - /// Returns the intersection (elements that are in both `left and `right`) of two maps or sets. + /// Returns the intersection (elements that are in both `left` and `right`) of two maps or sets. /// /// This is the same as evaluating the expression `left & right`. - #[function(alias = "&", return_type = DefaultMap<'_>)] - pub fn intersection(left: DefaultMap<'_>, right: &HashMap) -> Value { - Value::Sequence(Sequence::Map( - Rc::new(RefCell::new(hash_map::intersection(left.0, right))), - left.1, - )) + #[function(alias = "&")] + pub fn intersection( + left: MapValue, + right: &hash_map::HashMap, + ) -> anyhow::Result { + let Value::Object(ref obj) = left else { + anyhow::bail!("& requires a map on the left side"); + }; + let Object::Map { + ref entries, + ref default, + } = *obj.as_ref() + else { + anyhow::bail!("& requires a map on the left side"); + }; + let new_entries = hash_map::intersection(&*entries.borrow(), right); + Ok(Value::Object(Rc::new(Object::map( + new_entries, + default.clone(), + )))) } /// Returns the symmetric difference (elements that are either in `left` or `right` but not both) of two maps or sets. /// /// This is the same as evaluating the expression `left ~ right`. - #[function(alias = "~", return_type = DefaultMap<'_>)] - pub fn symmetric_difference(left: DefaultMap<'_>, right: &HashMap) -> Value { - Value::Sequence(Sequence::Map( - Rc::new(RefCell::new(hash_map::symmetric_difference(left.0, right))), - left.1, - )) + #[function(alias = "~")] + pub fn symmetric_difference( + left: MapValue, + right: &hash_map::HashMap, + ) -> anyhow::Result { + let Value::Object(ref obj) = left else { + anyhow::bail!("~ requires a map on the left side"); + }; + let Object::Map { + ref entries, + ref default, + } = *obj.as_ref() + else { + anyhow::bail!("~ requires a map on the left side"); + }; + let new_entries = hash_map::symmetric_difference(&*entries.borrow(), right); + Ok(Value::Object(Rc::new(Object::map( + new_entries, + default.clone(), + )))) } /// Converts the given sequence to set. - #[function(return_type = DefaultMap<'_>)] - pub fn set(seq: &mut Sequence) -> Value { - let out: HashMap = match seq { - Sequence::String(rc) => rc - .borrow() - .chars() - .map(|c| (c.into(), Value::unit())) - .collect(), - Sequence::List(rc) => rc - .borrow() - .iter() - .map(|v| (v.to_owned(), Value::unit())) - .collect(), - Sequence::Tuple(rc) => rc.iter().map(|v| (v.to_owned(), Value::unit())).collect(), - Sequence::Map(rc, _) => rc - .borrow() - .keys() - .map(|key| (key.to_owned(), Value::unit())) - .collect(), - Sequence::Iterator(rc) => { - let mut iter = rc.borrow_mut(); - let mut out = HashMap::new(); - for item in iter.by_ref() { - out.insert(item, Value::unit()); - } - out - } - Sequence::MaxHeap(h) => h - .borrow() - .iter() - .map(|value| (value.0.clone(), Value::unit())) - .collect(), - Sequence::MinHeap(h) => h - .borrow() - .iter() - .map(|value| (value.0.0.clone(), Value::unit())) - .collect(), - Sequence::Deque(rc) => rc - .borrow() - .iter() - .map(|v| (v.to_owned(), Value::unit())) - .collect(), - }; + #[function(return_type = Map<_, ()>)] + pub fn set(seq: SeqValue) -> anyhow::Result { + use ndc_core::hash_map::HashMap; + + let entries: HashMap = seq + .try_into_iter() + .ok_or_else(|| anyhow::anyhow!("set requires a sequence"))? + .map(|v| (v, Value::unit())) + .collect(); - Value::Sequence(Sequence::Map(Rc::new(RefCell::new(out)), None)) + Ok(Value::Object(Rc::new(Object::map(entries, None)))) } } diff --git a/ndc_stdlib/src/heap.rs b/ndc_stdlib/src/heap.rs index 9eafb0ea..a49a99f4 100644 --- a/ndc_stdlib/src/heap.rs +++ b/ndc_stdlib/src/heap.rs @@ -1,50 +1,70 @@ use ndc_macros::export_module; +use ndc_vm::value::{Object, OrdValue, Value}; #[export_module] mod inner { - use ndc_interpreter::heap::{MaxHeap, MinHeap}; - use ndc_interpreter::sequence::Sequence; - use ndc_interpreter::value::Value; - use std::cell::RefCell; - use std::rc::Rc; + use std::cmp::Reverse; + use std::collections::BinaryHeap; + /// Creates a new empty min-heap (priority queue where the smallest element is popped first). #[function(name = "MinHeap", return_type = MinHeap<_>)] pub fn create_min_heap() -> Value { - Value::Sequence(Sequence::MinHeap(Rc::new(RefCell::new(MinHeap::new())))) + Value::Object(std::rc::Rc::new(Object::MinHeap(std::cell::RefCell::new( + BinaryHeap::new(), + )))) } + /// Creates a new empty max-heap (priority queue where the largest element is popped first). #[function(name = "MaxHeap", return_type = MaxHeap<_>)] pub fn create_max_heap() -> Value { - Value::Sequence(Sequence::MaxHeap(Rc::new(RefCell::new(MaxHeap::new())))) + Value::Object(std::rc::Rc::new(Object::MaxHeap(std::cell::RefCell::new( + BinaryHeap::new(), + )))) } + /// Removes and returns the smallest element from the min-heap, or `None` if empty. #[function(name = "pop?", return_type = Option)] - pub fn maybe_min_pop(heap: &mut MinHeap) -> Value { - heap.pop().map_or_else(Value::none, Value::some) + pub fn maybe_min_pop(heap: &mut BinaryHeap>) -> Value { + match heap.pop() { + None => Value::None, + Some(Reverse(OrdValue(v))) => Value::Object(std::rc::Rc::new(Object::Some(v))), + } } + /// Removes and returns the largest element from the max-heap, or `None` if empty. #[function(name = "pop?", return_type = Option)] - pub fn maybe_max_pop(heap: &mut MaxHeap) -> Value { - heap.pop().map_or_else(Value::none, Value::some) + pub fn maybe_max_pop(heap: &mut BinaryHeap) -> Value { + match heap.pop() { + None => Value::None, + Some(OrdValue(v)) => Value::Object(std::rc::Rc::new(Object::Some(v))), + } } + /// Removes and returns the smallest element from the min-heap, or errors if empty. #[function(name = "pop")] - pub fn min_pop(heap: &mut MinHeap) -> anyhow::Result { - heap.pop().ok_or_else(|| anyhow::anyhow!("heap is empty")) + pub fn min_pop(heap: &mut BinaryHeap>) -> anyhow::Result { + heap.pop() + .map(|Reverse(OrdValue(v))| v) + .ok_or_else(|| anyhow::anyhow!("heap is empty")) } + /// Removes and returns the largest element from the max-heap, or errors if empty. #[function(name = "pop")] - pub fn max_pop(heap: &mut MaxHeap) -> anyhow::Result { - heap.pop().ok_or_else(|| anyhow::anyhow!("heap is empty")) + pub fn max_pop(heap: &mut BinaryHeap) -> anyhow::Result { + heap.pop() + .map(|OrdValue(v)| v) + .ok_or_else(|| anyhow::anyhow!("heap is empty")) } + /// Pushes a value onto the min-heap. #[function(name = "push")] - pub fn min_push(heap: &mut MinHeap, value: Value) { - heap.push(value); + pub fn min_push(heap: &mut BinaryHeap>, value: Value) { + heap.push(Reverse(OrdValue(value))); } + /// Pushes a value onto the max-heap. #[function(name = "push")] - pub fn max_push(heap: &mut MaxHeap, value: Value) { - heap.push(value); + pub fn max_push(heap: &mut BinaryHeap, value: Value) { + heap.push(OrdValue(value)); } } diff --git a/ndc_stdlib/src/index.rs b/ndc_stdlib/src/index.rs new file mode 100644 index 00000000..2fdee5f6 --- /dev/null +++ b/ndc_stdlib/src/index.rs @@ -0,0 +1,439 @@ +use ndc_core::{FunctionRegistry, StaticType}; +use ndc_vm::Vm; +use ndc_vm::error::VmError; +use ndc_vm::value::{NativeFunc, NativeFunction, Object, Value}; +use std::rc::Rc; + +pub fn register(env: &mut FunctionRegistry>) { + register_get(env); + register_set(env); +} + +fn make_get_func() -> NativeFunc { + NativeFunc::WithVm(Box::new(|args: &[Value], vm: &mut Vm| { + let [container, index_value] = args else { + return Err(VmError::native(format!( + "[] requires exactly 2 arguments, got {}", + args.len() + ))); + }; + vm_get_at_index(container, index_value, vm) + })) +} + +fn make_set_func() -> NativeFunc { + NativeFunc::Simple(Box::new(|args: &[Value]| { + let [container, index_value, rhs] = args else { + return Err(VmError::native(format!( + "[]= requires exactly 3 arguments, got {}", + args.len() + ))); + }; + vm_set_at_index(container, index_value, rhs.clone())?; + Ok(Value::unit()) + })) +} + +fn register_get(env: &mut FunctionRegistry>) { + let doc = + "Retrieves an element by index. Supports negative indices and range slicing.".to_string(); + + // [] -> Any + env.declare_global_fn(Rc::new(NativeFunction { + name: "[]".to_string(), + documentation: Some(doc.clone()), + static_type: StaticType::Function { + parameters: Some(vec![ + StaticType::List(Box::new(StaticType::Any)), + StaticType::Any, + ]), + return_type: Box::new(StaticType::Any), + }, + func: make_get_func(), + })); + + // [] -> String + env.declare_global_fn(Rc::new(NativeFunction { + name: "[]".to_string(), + documentation: Some(doc.clone()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::String, StaticType::Any]), + return_type: Box::new(StaticType::String), + }, + func: make_get_func(), + })); + + // [] -> Any (fallback for Tuple and other indexable types) + env.declare_global_fn(Rc::new(NativeFunction { + name: "[]".to_string(), + documentation: Some(doc.clone()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Any, StaticType::Any]), + return_type: Box::new(StaticType::Any), + }, + func: make_get_func(), + })); + + // [] -> Any + env.declare_global_fn(Rc::new(NativeFunction { + name: "[]".to_string(), + documentation: Some(doc.clone()), + static_type: StaticType::Function { + parameters: Some(vec![ + StaticType::Deque(Box::new(StaticType::Any)), + StaticType::Any, + ]), + return_type: Box::new(StaticType::Any), + }, + func: make_get_func(), + })); + + // [] -> Any + env.declare_global_fn(Rc::new(NativeFunction { + name: "[]".to_string(), + documentation: Some(doc), + static_type: StaticType::Function { + parameters: Some(vec![ + StaticType::Map { + key: Box::new(StaticType::Any), + value: Box::new(StaticType::Any), + }, + StaticType::Any, + ]), + return_type: Box::new(StaticType::Any), + }, + func: make_get_func(), + })); +} + +fn register_set(env: &mut FunctionRegistry>) { + let doc = "Sets an element by index. Supports negative indices and range slicing for lists and strings.".to_string(); + + // []= -> () + env.declare_global_fn(Rc::new(NativeFunction { + name: "[]=".to_string(), + documentation: Some(doc.clone()), + static_type: StaticType::Function { + parameters: Some(vec![ + StaticType::List(Box::new(StaticType::Any)), + StaticType::Any, + StaticType::Any, + ]), + return_type: Box::new(StaticType::unit()), + }, + func: make_set_func(), + })); + + // []= -> () + env.declare_global_fn(Rc::new(NativeFunction { + name: "[]=".to_string(), + documentation: Some(doc.clone()), + static_type: StaticType::Function { + parameters: Some(vec![ + StaticType::String, + StaticType::Any, + StaticType::String, + ]), + return_type: Box::new(StaticType::unit()), + }, + func: make_set_func(), + })); + + // []= -> () + env.declare_global_fn(Rc::new(NativeFunction { + name: "[]=".to_string(), + documentation: Some(doc), + static_type: StaticType::Function { + parameters: Some(vec![ + StaticType::Map { + key: Box::new(StaticType::Any), + value: Box::new(StaticType::Any), + }, + StaticType::Any, + StaticType::Any, + ]), + return_type: Box::new(StaticType::unit()), + }, + func: make_set_func(), + })); +} + +fn vm_sequence_length(v: &Value) -> Option { + match v { + Value::Object(obj) => match obj.as_ref() { + Object::String(s) => Some(s.borrow().chars().count()), + Object::List(l) => Some(l.borrow().len()), + Object::Tuple(t) => Some(t.len()), + Object::Map { entries, .. } => Some(entries.borrow().len()), + Object::Deque(d) => Some(d.borrow().len()), + _ => None, + }, + _ => None, + } +} + +fn to_forward_index(i: i64, size: usize, for_slice: bool) -> Result { + if i < 0 { + let abs = i.unsigned_abs() as usize; + if for_slice { + Ok(size.saturating_sub(abs)) + } else { + size.checked_sub(abs) + .ok_or_else(|| VmError::native("index out of bounds")) + } + } else { + let idx = i as usize; + if for_slice { + Ok(idx.min(size)) + } else if idx >= size { + Err(VmError::native("index out of bounds")) + } else { + Ok(idx) + } + } +} + +enum VmOffset { + Element(usize), + Range(usize, usize), +} + +fn extract_vm_offset(index_value: &Value, size: usize) -> Result { + if let Value::Object(obj) = index_value { + if let Object::Iterator(iter) = obj.as_ref() { + let iter_ref = iter.borrow(); + if let Some((start, end, inclusive)) = iter_ref.range_bounds() { + let from_idx = to_forward_index(start, size, true)?; + let to_idx = to_forward_index(end, size, true)?; + let to_idx = if inclusive { + (to_idx + 1).min(size) + } else { + to_idx + }; + return Ok(VmOffset::Range(from_idx, to_idx)); + } + if let Some(start) = iter_ref.unbounded_range_start() { + let from_idx = to_forward_index(start, size, true)?; + return Ok(VmOffset::Range(from_idx, size)); + } + return Err(VmError::native("cannot use non-range iterator as index")); + } + } + let i = match index_value { + Value::Int(i) => *i, + Value::Object(obj) => match obj.as_ref() { + Object::BigInt(n) => num::ToPrimitive::to_i64(n) + .ok_or_else(|| VmError::native("index too large for i64"))?, + _ => { + return Err(VmError::native( + "Invalid list index. List indices must be convertible to a signed 64-bit integer.", + )); + } + }, + _ => { + return Err(VmError::native( + "Invalid list index. List indices must be convertible to a signed 64-bit integer.", + )); + } + }; + Ok(VmOffset::Element(to_forward_index(i, size, false)?)) +} + +fn vm_get_at_index(container: &Value, index_value: &Value, vm: &mut Vm) -> Result { + let Some(size) = vm_sequence_length(container) else { + return Err(VmError::native(format!( + "cannot index into {}", + container.static_type() + ))); + }; + match container { + Value::Object(obj) => match obj.as_ref() { + Object::List(list) => { + let list = list.borrow(); + match extract_vm_offset(index_value, size)? { + VmOffset::Element(idx) => Ok(list[idx].clone()), + VmOffset::Range(from, to) => { + let values = list.get(from..to).ok_or_else(|| { + VmError::native(format!("{from}..{to} out of bounds")) + })?; + Ok(Value::Object(Rc::new(Object::list(values.to_vec())))) + } + } + } + Object::String(s) => { + let s = s.borrow(); + match extract_vm_offset(index_value, size)? { + VmOffset::Element(idx) => { + let ch = s.chars().nth(idx).expect("bounds already checked"); + Ok(Value::string(ch.to_string())) + } + VmOffset::Range(from, to) => { + let result: String = s.chars().skip(from).take(to - from).collect(); + Ok(Value::string(result)) + } + } + } + Object::Map { entries, default } => { + if matches!( + index_value, + Value::Object(o) if matches!(o.as_ref(), Object::Iterator(_)) + ) { + return Err(VmError::native( + "cannot use range expression as index in map", + )); + } + let key = index_value.clone(); + let value = entries.borrow().get(&key).cloned(); + if let Some(v) = value { + return Ok(v); + } + match default { + None => Err(VmError::native(format!("Key not found in map: {key}"))), + Some(default_val) => match default_val { + Value::Object(o) if matches!(o.as_ref(), Object::Function(_)) => { + let Object::Function(f) = o.as_ref() else { + unreachable!() + }; + let result = vm.call_callback(f.clone(), vec![])?; + entries.borrow_mut().insert(key, result.clone()); + Ok(result) + } + non_fn => { + let v = non_fn.clone(); + entries.borrow_mut().insert(key, v.clone()); + Ok(v) + } + }, + } + } + Object::Tuple(tuple) => match extract_vm_offset(index_value, size)? { + VmOffset::Element(idx) => tuple + .get(idx) + .cloned() + .ok_or_else(|| VmError::native("index out of bounds")), + VmOffset::Range(from, to) => { + let values = tuple + .get(from..to) + .ok_or_else(|| VmError::native("index out of bounds"))?; + Ok(Value::Object(Rc::new(Object::Tuple(values.to_vec())))) + } + }, + Object::Deque(deque) => { + let deque = deque.borrow(); + match extract_vm_offset(index_value, size)? { + VmOffset::Element(idx) => deque + .get(idx) + .cloned() + .ok_or_else(|| VmError::native("index out of bounds")), + VmOffset::Range(from, to) => { + let out: Vec = + deque.iter().skip(from).take(to - from).cloned().collect(); + Ok(Value::Object(Rc::new(Object::list(out)))) + } + } + } + _ => Err(VmError::native(format!( + "cannot index into {}", + container.static_type() + ))), + }, + _ => Err(VmError::native(format!( + "cannot index into {}", + container.static_type() + ))), + } +} + +fn vm_set_at_index(container: &Value, index_value: &Value, rhs: Value) -> Result<(), VmError> { + let Some(size) = vm_sequence_length(container) else { + return Err(VmError::native(format!( + "cannot insert into {} at index", + container.static_type() + ))); + }; + match container { + Value::Object(obj) => match obj.as_ref() { + Object::List(list) => { + let mut list = list.try_borrow_mut().map_err(|_| { + VmError::native("Mutation error: you cannot mutate a value in a list while you're iterating over this list") + })?; + match extract_vm_offset(index_value, size)? { + VmOffset::Element(idx) => { + list[idx] = rhs; + } + VmOffset::Range(from, to) => { + let rhs_vec = match rhs { + Value::Object(o) => match o.as_ref() { + Object::List(l) => l.borrow().clone(), + Object::Tuple(t) => t.clone(), + _ => { + return Err(VmError::native( + "cannot assign non-list to list slice", + )); + } + }, + _ => { + return Err(VmError::native( + "cannot assign non-list to list slice", + )); + } + }; + let tail: Vec = list.drain(from..).collect(); + list.extend(rhs_vec); + list.extend_from_slice(&tail[(to - from)..]); + } + } + } + Object::String(s) => { + let rhs_str = match &rhs { + Value::Object(o) => match o.as_ref() { + Object::String(r) => r.borrow().clone(), + _ => { + return Err(VmError::native(format!( + "cannot insert {} into a string", + rhs.static_type() + ))); + } + }, + _ => { + return Err(VmError::native(format!( + "cannot insert {} into a string", + rhs.static_type() + ))); + } + }; + let mut s = s.borrow_mut(); + match extract_vm_offset(index_value, size)? { + VmOffset::Element(idx) => { + s.replace_range(idx..=idx, &rhs_str); + } + VmOffset::Range(from, to) => { + s.replace_range(from..to, &rhs_str); + } + } + } + Object::Map { entries, .. } => { + if matches!( + index_value, + Value::Object(o) if matches!(o.as_ref(), Object::Iterator(_)) + ) { + return Err(VmError::native("cannot use range expression as index")); + } + entries.borrow_mut().insert(index_value.clone(), rhs); + } + _ => { + return Err(VmError::native(format!( + "cannot insert into {} at index", + container.static_type() + ))); + } + }, + _ => { + return Err(VmError::native(format!( + "cannot insert into {} at index", + container.static_type() + ))); + } + } + Ok(()) +} diff --git a/ndc_stdlib/src/lib.rs b/ndc_stdlib/src/lib.rs index fb0ac47f..eb1066a5 100644 --- a/ndc_stdlib/src/lib.rs +++ b/ndc_stdlib/src/lib.rs @@ -1,5 +1,6 @@ -use ndc_interpreter::Interpreter; -use ndc_interpreter::environment::Environment; +use ndc_core::FunctionRegistry; +use ndc_vm::NativeFunction; +use std::rc::Rc; pub mod aoc; pub mod cmp; @@ -7,6 +8,7 @@ pub mod deque; pub mod file; pub mod hash_map; pub mod heap; +pub mod index; pub mod list; pub mod math; pub mod sequence; @@ -22,7 +24,7 @@ pub mod regex; #[cfg(feature = "serde")] pub mod serde; -pub fn register(env: &mut Environment) { +pub fn register(env: &mut FunctionRegistry>) { aoc::register(env); cmp::register(env); #[cfg(feature = "crypto")] @@ -32,6 +34,8 @@ pub fn register(env: &mut Environment) { file::register_variadic(env); hash_map::register(env); heap::register(env); + index::register(env); + list::ops::register(env); list::register(env); math::f64::register(env); math::register(env); @@ -46,14 +50,3 @@ pub fn register(env: &mut Environment) { string::register(env); value::register(env); } - -pub trait WithStdlib: Sized { - fn with_stdlib(self) -> Self; -} - -impl WithStdlib for Interpreter { - fn with_stdlib(mut self) -> Self { - self.configure(register); - self - } -} diff --git a/ndc_stdlib/src/list.rs b/ndc_stdlib/src/list.rs index c3350b78..c9e16d66 100644 --- a/ndc_stdlib/src/list.rs +++ b/ndc_stdlib/src/list.rs @@ -1,27 +1,33 @@ +use ndc_vm::value::{Object, SeqValue, Value}; + #[ndc_macros::export_module] mod inner { use itertools::Itertools; - use ndc_interpreter::iterator::mut_seq_to_iterator; - use ndc_interpreter::sequence::{ListRepr, Sequence}; - use ndc_interpreter::value::Value; use std::rc::Rc; use anyhow::anyhow; /// Converts any sequence into a list - #[function(return_type = Vec)] - pub fn list(seq: &mut Sequence) -> Value { - Value::list(mut_seq_to_iterator(seq).collect::>()) + #[function(return_type = Vec<_>)] + pub fn list(seq: SeqValue) -> anyhow::Result { + Ok(Value::list( + seq.try_into_iter() + .ok_or_else(|| anyhow!("list requires a sequence"))? + .collect::>(), + )) } - pub fn contains(list: &[Value], elem: &Value) -> bool { - list.contains(elem) + /// Returns `true` if the list contains the given element. + pub fn contains(list: &[Value], elem: Value) -> bool { + list.contains(&elem) } + /// Returns `true` if the list contains the given subsequence. pub fn contains_subsequence(list: &[Value], subsequence: &[Value]) -> bool { list.windows(subsequence.len()).contains(&subsequence) } + /// Returns the starting index of the first occurrence of `subsequence` in `list`, or unit if not found. pub fn find_subsequence(list: &[Value], subsequence: &[Value]) -> Value { let result = list .windows(subsequence.len()) @@ -29,12 +35,13 @@ mod inner { .find(|(_, seq)| *seq == subsequence) .map(|(idx, _)| idx); if let Some(result) = result { - Value::from(result) + Value::Int(result as i64) } else { Value::unit() } } + /// Inserts an element at the given index, shifting all elements after it to the right. pub fn insert(list: &mut Vec, index: usize, elem: Value) -> anyhow::Result<()> { if index > list.len() { return Err(anyhow!("index {index} is out of bounds")); @@ -53,8 +60,8 @@ mod inner { } /// Removes all instances of `element` from `list` - pub fn remove_element(list: &mut Vec, element: &Value) { - list.retain(|cur| cur != element); + pub fn remove_element(list: &mut Vec, element: Value) { + list.retain(|cur| cur != &element); } /// Appends `elem` to the back of `list` @@ -62,109 +69,51 @@ mod inner { list.push(elem); } - /// Moves elements from `other` to `list` leaving `other` empty + /// Moves elements from `other` to `list`, leaving `other` empty. pub fn append(list: &mut Vec, other: &mut Vec) { list.append(other); } - // A price we have to pay for the type system - // #[function(name = "++")] - // pub fn tup_concat(left: TupleRepr, mut right: TupleRepr) -> Value { - // match Rc::try_unwrap(left) { - // Ok(mut left) => { - // left.append(Rc::make_mut(&mut right)); - // Value::tuple(left) - // } - // Err(left) => Value::tuple( - // left.iter() - // .chain(right.iter()) - // .cloned() - // .collect::>(), - // ), - // } - // } - - #[function(name = "++")] - pub fn list_concat(left: &mut ListRepr, right: &mut ListRepr) -> Value { - if Rc::strong_count(left) == 1 { - left.borrow_mut().extend_from_slice(&right.borrow()); - - Value::Sequence(Sequence::List(left.clone())) - } else { - Value::list( - left.borrow() - .iter() - .chain(right.borrow().iter()) - .cloned() - .collect::>(), - ) - } - } - - #[function(name = "++=")] - pub fn list_append_operator(left: &mut ListRepr, right: &mut ListRepr) { - // The ++= operator has 3 implementation paths, this first one is the case where a list extends itself - if Rc::ptr_eq(left, right) { - left.borrow_mut().extend_from_within(..); - } else if Rc::strong_count(right) == 1 { - // The second path deals with a RHS that has an RC of one, in this case we can drain the RHS which should be faster than copying? - left.borrow_mut().append( - &mut right - .try_borrow_mut() - .expect("Failed to borrow_mut in `list_append_operator`"), - ); - // The last path is if the RHS has an RC that's higher than 1, in this case we copy all the elements into the LHS - } else { - left.borrow_mut().extend_from_slice( - &right - .try_borrow() - .expect("Failed to borrow in `list_append_operator`"), - ); - } - } - - /// Copies elements from `other` to `list` not touching `other`. + /// Copies elements from `other` to `list`, not touching `other`. pub fn extend(list: &mut Vec, other: &[Value]) { list.extend_from_slice(other); } - /// Extends this `list` with elements from `iter` leaving the iterator empty - #[function(name = "extend")] - pub fn extend_from_iter(list: &mut Vec, iter: impl Iterator) { - list.extend(iter); - } - - /// Removes the last element from a list and returns it, or `Unit` if it is empty - #[function(name = "pop?")] + /// Removes and returns the last element from the list, or `None` if empty. + #[function(name = "pop?", return_type = Option<_>)] pub fn maybe_pop(list: &mut Vec) -> Value { - list.pop().map_or_else(Value::none, Value::some) + match list.pop() { + None => Value::None, + Some(val) => Value::Object(Rc::new(Object::Some(val))), + } } + /// Removes and returns the last element from the list, or unit if empty. pub fn pop(list: &mut Vec) -> Value { - list.pop().unwrap_or(Value::unit()) + list.pop().unwrap_or_else(Value::unit) } - #[function(name = "pop_left?", return_type = Option)] + /// Removes and returns the first element from the list, or `None` if empty. + #[function(name = "pop_left?", return_type = Option<_>)] pub fn maybe_pop_left(list: &mut Vec) -> Value { if list.is_empty() { - return Value::none(); + return Value::None; } - - Value::some(list.remove(0)) + Value::Object(Rc::new(Object::Some(list.remove(0)))) } + /// Removes and returns the first element from the list, or unit if empty. pub fn pop_left(list: &mut Vec) -> Value { if list.is_empty() { return Value::unit(); } - list.remove(0) } /// Creates a copy of the list with its elements in reverse order - #[function(return_type = Vec)] + #[function(return_type = Vec<_>)] pub fn reversed(list: &[Value]) -> Value { - Value::list(list.iter().rev().cloned().collect::>()) + Value::list(list.iter().rev().cloned().collect()) } /// Removes all values from the list @@ -183,7 +132,7 @@ mod inner { return Err(anyhow!("index {index} is out of bounds")); } - Ok(Value::list(list.split_off(index))) + Ok(Value::Object(Rc::new(Object::list(list.split_off(index))))) } /// Shortens the list, keeping the first `len` elements and dropping the rest. @@ -199,10 +148,13 @@ mod inner { .ok_or_else(|| anyhow!("collection is empty")) } - /// Returns a copy of the first element or `unit` if the list is empty. - #[function(name = "first?")] + /// Returns a copy of the first element, or `None` if the list is empty. + #[function(name = "first?", return_type = Option<_>)] pub fn maybe_first(list: &[Value]) -> Value { - list.first().cloned().map_or_else(Value::none, Value::some) + match list.first() { + None => Value::None, + Some(v) => Value::Object(Rc::new(Object::Some(v.clone()))), + } } /// Returns a copy of the last element of the list or results in an error if the list is empty. @@ -212,13 +164,17 @@ mod inner { .ok_or_else(|| anyhow!("the list is empty")) } - /// Returns a copy of the last element or `unit` if the list is empty. - #[function(name = "last?")] + /// Returns a copy of the last element, or `None` if the list is empty. + #[function(name = "last?", return_type = Option<_>)] pub fn maybe_last(list: &[Value]) -> Value { - list.last().cloned().map_or_else(Value::none, Value::some) + match list.last() { + None => Value::None, + Some(v) => Value::Object(Rc::new(Object::Some(v.clone()))), + } } - #[function(return_type = Vec<(Value, Value)>)] + /// Returns the Cartesian product of two lists as a list of tuples. + #[function(return_type = Vec<_>)] pub fn cartesian_product(list_a: &[Value], list_b: &[Value]) -> Value { Value::list( list_a @@ -229,3 +185,141 @@ mod inner { ) } } + +pub mod ops { + use ndc_core::{FunctionRegistry, StaticType}; + use ndc_vm::error::VmError; + use ndc_vm::value::{NativeFunc, NativeFunction, Object, Value}; + use std::rc::Rc; + + pub fn register(env: &mut FunctionRegistry>) { + register_list_concat(env); + register_list_append(env); + } + + fn register_list_concat(env: &mut FunctionRegistry>) { + let native = Rc::new(NativeFunction { + name: "++".to_string(), + documentation: Some("Concatenates two lists into a new list.".to_string()), + static_type: StaticType::Function { + parameters: Some(vec![ + StaticType::List(Box::new(StaticType::Any)), + StaticType::List(Box::new(StaticType::Any)), + ]), + return_type: Box::new(StaticType::List(Box::new(StaticType::Any))), + }, + func: NativeFunc::Simple(Box::new(|args| { + let [left, right] = args else { + return Err(VmError::native(format!( + "++ requires exactly 2 arguments, got {}", + args.len() + ))); + }; + let Value::Object(left_obj) = left else { + return Err(VmError::native(format!( + "++ left requires a list, got {}", + left.static_type() + ))); + }; + let Value::Object(right_obj) = right else { + return Err(VmError::native(format!( + "++ right requires a list, got {}", + right.static_type() + ))); + }; + let Object::List(left_cell) = left_obj.as_ref() else { + return Err(VmError::native(format!( + "++ left requires a list, got {}", + left.static_type() + ))); + }; + let Object::List(right_cell) = right_obj.as_ref() else { + return Err(VmError::native(format!( + "++ right requires a list, got {}", + right.static_type() + ))); + }; + + if Rc::strong_count(left_obj) == 1 { + left_cell + .borrow_mut() + .extend_from_slice(&right_cell.borrow()); + Ok(Value::Object(left_obj.clone())) + } else { + let new_list: Vec = left_cell + .borrow() + .iter() + .chain(right_cell.borrow().iter()) + .cloned() + .collect(); + Ok(Value::Object(Rc::new(Object::list(new_list)))) + } + })), + }); + env.declare_global_fn(native); + } + + fn register_list_append(env: &mut FunctionRegistry>) { + let native = Rc::new(NativeFunction { + name: "++=".to_string(), + documentation: Some( + "Appends all elements from the right list to the left list in place.".to_string(), + ), + static_type: StaticType::Function { + parameters: Some(vec![ + StaticType::List(Box::new(StaticType::Any)), + StaticType::List(Box::new(StaticType::Any)), + ]), + return_type: Box::new(StaticType::Tuple(vec![])), + }, + func: NativeFunc::Simple(Box::new(|args| { + let [left, right] = args else { + return Err(VmError::native(format!( + "++= requires exactly 2 arguments, got {}", + args.len() + ))); + }; + let Value::Object(left_obj) = left else { + return Err(VmError::native(format!( + "++= left requires a list, got {}", + left.static_type() + ))); + }; + let Value::Object(right_obj) = right else { + return Err(VmError::native(format!( + "++= right requires a list, got {}", + right.static_type() + ))); + }; + let Object::List(left_cell) = left_obj.as_ref() else { + return Err(VmError::native(format!( + "++= left requires a list, got {}", + left.static_type() + ))); + }; + let Object::List(right_cell) = right_obj.as_ref() else { + return Err(VmError::native(format!( + "++= right requires a list, got {}", + right.static_type() + ))); + }; + + if Rc::ptr_eq(left_obj, right_obj) { + left_cell.borrow_mut().extend_from_within(..); + } else if Rc::strong_count(right_obj) == 1 { + left_cell.borrow_mut().append( + &mut right_cell + .try_borrow_mut() + .expect("Failed to borrow_mut right in `++=`"), + ); + } else { + left_cell + .borrow_mut() + .extend_from_slice(&right_cell.borrow()); + } + Ok(left.clone()) + })), + }); + env.declare_global_fn(native); + } +} diff --git a/ndc_stdlib/src/math.rs b/ndc_stdlib/src/math.rs index 97b97a4f..b2377dc0 100644 --- a/ndc_stdlib/src/math.rs +++ b/ndc_stdlib/src/math.rs @@ -1,60 +1,17 @@ use factorial::Factorial; -use ndc_interpreter::environment::Environment; -use ndc_interpreter::num::{BinaryOperatorError, Number}; -use ndc_interpreter::sequence::Sequence; -use ndc_interpreter::value::Value; +use ndc_core::num::{BinaryOperatorError, Number}; use ndc_macros::export_module; +use ndc_vm::value::{Object, SeqValue, Value}; use num::ToPrimitive; use std::ops::{Add, Mul}; -trait FallibleSum { - fn try_sum(&mut self) -> Result; -} - -impl FallibleSum for C -where - C: Iterator, - T: std::borrow::Borrow, -{ - fn try_sum(&mut self) -> Result { - self.try_fold(Number::from(0), |acc, cur| match cur.borrow() { - Value::Number(n) => acc.add(n), - value => Err(BinaryOperatorError::new(format!( - "cannot sum {} and number", - value.static_type() - ))), - }) - } -} - -trait FallibleProduct { - fn try_product(&mut self) -> Result; -} - -impl FallibleProduct for C -where - C: Iterator, - T: std::borrow::Borrow, -{ - fn try_product(&mut self) -> Result { - self.try_fold(Number::from(1), |acc, cur| match cur.borrow() { - Value::Number(n) => acc.mul(n), - value => Err(BinaryOperatorError::new(format!( - "cannot multiply {} and number", - value.static_type() - ))), - }) - } -} - #[export_module] mod inner { use std::ops::Sub; - use super::FallibleSum; use anyhow::Context; - use ndc_interpreter::int::Int; - use ndc_interpreter::num::Number; + use ndc_core::int::Int; + use ndc_core::num::Number; use num::{BigInt, BigRational, BigUint, Integer, complex::Complex64}; /// Returns the sign of a number. @@ -85,38 +42,37 @@ mod inner { r.denom().clone() } - pub fn sum(seq: &Sequence) -> anyhow::Result { - match seq { - Sequence::String(_s) => Err(BinaryOperatorError::new( - "string cannot be summed".to_string(), - )), - Sequence::List(list) => list.borrow().iter().try_sum(), - Sequence::Tuple(tup) => tup.iter().try_sum(), - Sequence::Map(map, _) => map.borrow().keys().try_sum(), - Sequence::Iterator(iter) => iter.borrow_mut().try_sum(), - Sequence::MaxHeap(h) => h.borrow().iter().map(|v| &v.0).try_sum(), - Sequence::MinHeap(h) => h.borrow().iter().map(|v| &v.0.0).try_sum(), - Sequence::Deque(d) => d.borrow().iter().try_sum(), + /// Returns the sum of all elements in a sequence. + pub fn sum(seq: SeqValue) -> anyhow::Result { + if matches!(&seq, Value::Object(o) if matches!(o.as_ref(), Object::String(_))) { + anyhow::bail!("string cannot be summed"); } - .context("type error while multiplying sequence") + seq.try_into_iter() + .ok_or_else(|| anyhow::anyhow!("cannot sum non-sequence"))? + .try_fold(Number::from(0), |acc, val| { + let n = val + .to_number() + .ok_or_else(|| anyhow::anyhow!("cannot sum {}", val.static_type()))?; + acc.add(&n).map_err(|e| anyhow::anyhow!("{e}")) + }) } - pub fn product(seq: &Sequence) -> anyhow::Result { - match seq { - Sequence::String(_s) => Err(BinaryOperatorError::new( - "string cannot be multiplied".to_string(), - )), - Sequence::List(list) => list.borrow().iter().try_product(), - Sequence::Tuple(tup) => tup.iter().try_product(), - Sequence::Map(map, _) => map.borrow().keys().try_product(), - Sequence::Iterator(iter) => iter.borrow_mut().try_product(), - Sequence::MaxHeap(h) => h.borrow().iter().map(|v| &v.0).try_product(), - Sequence::MinHeap(h) => h.borrow().iter().map(|v| &v.0.0).try_product(), - Sequence::Deque(d) => d.borrow().iter().try_product(), + /// Returns the product of all elements in a sequence. + pub fn product(seq: SeqValue) -> anyhow::Result { + if matches!(&seq, Value::Object(o) if matches!(o.as_ref(), Object::String(_))) { + anyhow::bail!("string cannot be multiplied"); } - .context("type error while multiplying sequence") + seq.try_into_iter() + .ok_or_else(|| anyhow::anyhow!("cannot multiply non-sequence"))? + .try_fold(Number::from(1), |acc, val| { + let n = val + .to_number() + .ok_or_else(|| anyhow::anyhow!("cannot multiply {}", val.static_type()))?; + acc.mul(&n).map_err(|e| anyhow::anyhow!("{e}")) + }) } + /// Returns the factorial of a non-negative integer. pub fn factorial(a: &BigInt) -> anyhow::Result { let num = BigUint::try_from(a).context("cannot compute the factorial of a negative number")?; @@ -124,59 +80,58 @@ mod inner { Ok(num.factorial().into()) } + /// Returns the greatest common divisor of two integers. pub fn gcd(a: &BigInt, b: &BigInt) -> BigInt { a.gcd(b) } + /// Returns the least common multiple of two integers. pub fn lcm(a: &BigInt, b: &BigInt) -> BigInt { a.lcm(b) } + /// Returns the smallest integer greater than or equal to the number. pub fn ceil(number: &Number) -> Number { number.ceil() } + /// Rounds the number to the nearest integer, with ties rounding away from zero. pub fn round(number: &Number) -> Number { number.round() } + /// Returns the largest integer less than or equal to the number. pub fn floor(number: &Number) -> Number { number.floor() } + /// Returns the absolute value of a number. pub fn abs(number: &Number) -> Number { number.abs() } + /// Returns the absolute difference between two numbers. pub fn abs_diff(left: &Number, right: &Number) -> Result { Ok(left.sub(right)?.abs()) } - pub fn float(value: &Value) -> anyhow::Result { - match value { - Value::Number(Number::Int(Int::BigInt(i))) => i - .to_f64() - .ok_or_else(|| anyhow::anyhow!("failed to convert int to float (overflow?)")), - Value::Number(Number::Int(Int::Int64(i))) => i - .to_f64() - .ok_or_else(|| anyhow::anyhow!("failed to convert int to float (overflow?)")), - Value::Number(Number::Rational(r)) => r + /// Converts a value to a floating-point number. + pub fn float(value: Value) -> anyhow::Result { + match &value { + Value::Bool(b) => Ok(if *b { 1.0 } else { 0.0 }), + Value::Object(obj) => match obj.as_ref() { + Object::String(s) => Ok(s.borrow().parse::()?), + _ => value.to_f64().ok_or_else(|| { + anyhow::anyhow!("cannot convert {} to float", value.static_type()) + }), + }, + _ => value .to_f64() - .ok_or_else(|| anyhow::anyhow!("failed to convert rational to float (overflow?)")), - Value::Number(Number::Float(f)) => Ok(*f), - Value::Bool(true) => Ok(1.0), - Value::Bool(false) => Ok(0.0), - Value::Sequence(Sequence::String(string)) => { - let string = string.borrow(); - Ok(string.parse::()?) - } - value => Err(anyhow::anyhow!( - "cannot convert {} to float", - value.static_type() - )), + .ok_or_else(|| anyhow::anyhow!("cannot convert {} to float", value.static_type())), } } + /// Computes the four-quadrant arctangent of `y` and `x` in radians. pub fn atan2(y: f64, x: f64) -> f64 { y.atan2(x) } @@ -189,308 +144,445 @@ mod inner { /// - Rational numbers are rounded down /// - `true` is converted to `1`, and `false` to `0` /// - Strings are parsed as decimal integers; other representations result in an error - pub fn int(value: &Value) -> anyhow::Result { - match value { - Value::Number(number) => Ok(number.to_int_lossy()?), - Value::Bool(true) => Ok(Number::from(1)), - Value::Bool(false) => Ok(Number::from(0)), - Value::Sequence(Sequence::String(string)) => { - let string = string.borrow(); - let bi = string.parse::()?; - Ok(Number::Int(Int::BigInt(bi).simplified())) - } - value => Err(anyhow::anyhow!( - "cannot convert {} to int", - value.static_type() - )), + pub fn int(value: Value) -> anyhow::Result { + match &value { + Value::Bool(b) => Ok(Number::from(if *b { 1i32 } else { 0i32 })), + Value::Object(obj) => match obj.as_ref() { + Object::String(s) => { + let bi = s.borrow().parse::()?; + Ok(Number::Int(Int::BigInt(bi).simplified())) + } + _ => value + .to_number() + .ok_or_else(|| { + anyhow::anyhow!("cannot convert {} to int", value.static_type()) + })? + .to_int_lossy() + .map_err(|e| anyhow::anyhow!("{e}")), + }, + _ => value + .to_number() + .ok_or_else(|| anyhow::anyhow!("cannot convert {} to int", value.static_type()))? + .to_int_lossy() + .map_err(|e| anyhow::anyhow!("{e}")), } } } pub mod f64 { - use super::{Environment, Number, ToPrimitive, f64}; - use ndc_interpreter::function::{ - FunctionBody, FunctionBuilder, FunctionCarrier, Parameter, StaticType, TypeSignature, - }; - use ndc_interpreter::num::BinaryOperatorError; - use ndc_interpreter::value::Value; + use super::{Number, ToPrimitive, f64}; + use ndc_core::StaticType; + use ndc_core::num::BinaryOperatorError; + use ndc_vm::error::VmError; + use ndc_vm::value::{NativeFunc, NativeFunction, Value}; use std::cmp::Ordering; use std::ops::Not; + use std::rc::Rc; - pub fn register(env: &mut Environment) { + pub fn register(env: &mut ndc_core::FunctionRegistry>) { macro_rules! implement_binary_operator_on_num { - ($operator:literal,$method:expr) => { - env.declare_global_fn( - FunctionBuilder::default() - .name($operator.to_string()) - .body(FunctionBody::NumericBinaryOp { body: $method }) - .build() - .expect("must be valid"), - ); + ($operator:literal,$method:expr,$docs:literal) => { + env.declare_global_fn(Rc::new(NativeFunction { + name: $operator.to_string(), + documentation: Some($docs.to_string()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Number, StaticType::Number]), + return_type: Box::new(StaticType::Number), + }, + func: NativeFunc::Simple(Box::new(|args| match args { + [left, right] => { + let l = left.to_number().ok_or_else(|| { + VmError::native(format!( + "expected number, got {}", + left.static_type() + )) + })?; + let r = right.to_number().ok_or_else(|| { + VmError::native(format!( + "expected number, got {}", + right.static_type() + )) + })?; + $method(l, r) + .map(Value::from_number) + .map_err(|e: BinaryOperatorError| VmError::native(e.to_string())) + } + _ => Err(VmError::native(format!( + "expected 2 arguments, got {}", + args.len() + ))), + })), + })); }; } - implement_binary_operator_on_num!("-", std::ops::Sub::sub); - implement_binary_operator_on_num!("+", std::ops::Add::add); - implement_binary_operator_on_num!("*", std::ops::Mul::mul); - implement_binary_operator_on_num!("/", std::ops::Div::div); - implement_binary_operator_on_num!("\\", Number::floor_div); - implement_binary_operator_on_num!("^", Number::pow); - implement_binary_operator_on_num!("%", std::ops::Rem::rem); - implement_binary_operator_on_num!("%%", Number::checked_rem_euclid); - - env.declare_global_fn( - FunctionBuilder::default() - .body(FunctionBody::NumericUnaryOp { - body: std::ops::Neg::neg, - }) - .name("-".to_string()) - .build() - .expect("must succeed"), + implement_binary_operator_on_num!("-", std::ops::Sub::sub, "Subtracts two numbers."); + implement_binary_operator_on_num!("+", std::ops::Add::add, "Adds two numbers."); + implement_binary_operator_on_num!("*", std::ops::Mul::mul, "Multiplies two numbers."); + implement_binary_operator_on_num!("/", std::ops::Div::div, "Divides two numbers."); + implement_binary_operator_on_num!( + "\\", + Number::floor_div, + "Integer (floor) division of two numbers." + ); + implement_binary_operator_on_num!( + "^", + Number::pow, + "Raises the first number to the power of the second." + ); + implement_binary_operator_on_num!( + "%", + std::ops::Rem::rem, + "Returns the remainder of dividing two numbers." ); + implement_binary_operator_on_num!( + "%%", + Number::checked_rem_euclid, + "Returns the Euclidean remainder of dividing two numbers. The result is always non-negative." + ); + + env.declare_global_fn(Rc::new(NativeFunction { + name: "-".to_string(), + documentation: Some("Negates a number.".to_string()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Number]), + return_type: Box::new(StaticType::Number), + }, + func: NativeFunc::Simple(Box::new(|args| match args { + [v] => v + .to_number() + .map(std::ops::Neg::neg) + .map(Value::from_number) + .ok_or_else(|| { + VmError::native(format!("expected number, got {}", v.static_type())) + }), + _ => Err(VmError::native(format!( + "expected 1 argument, got {}", + args.len() + ))), + })), + })); macro_rules! impl_cmp { - ($operator:literal,$expected:pat) => { - env.declare_global_fn( - FunctionBuilder::default() - .name($operator.to_string()) - .body(FunctionBody::GenericFunction { - type_signature: TypeSignature::Exact(vec![ - Parameter::new("left", StaticType::Any), - Parameter::new("right", StaticType::Any), - ]), - function: |values, _env| match values { - [left, right] => match left.partial_cmp(&right) { - Some($expected) => Ok(Value::Bool(true)), - Some(_) => Ok(Value::Bool(false)), - None => Err(anyhow::anyhow!("cannot compare {} and {}",left.static_type(),right.static_type()).into()), - }, - _ => unreachable!("the type checker should never invoke this function if the argument count does not match") - }, - return_type: StaticType::Bool, - }) - .build() - .expect("must succeed") - ); + ($operator:literal,$expected:pat,$docs:literal) => { + env.declare_global_fn(Rc::new(NativeFunction { + name: $operator.to_string(), + documentation: Some($docs.to_string()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Any, StaticType::Any]), + return_type: Box::new(StaticType::Bool), + }, + func: NativeFunc::Simple(Box::new(|args| match args { + [left, right] => match left.partial_cmp(right) { + Some($expected) => Ok(Value::Bool(true)), + Some(_) => Ok(Value::Bool(false)), + None => Err(VmError::native(format!( + "cannot compare {} and {}", + left.static_type(), + right.static_type() + ))), + }, + _ => Err(VmError::native(format!( + "expected 2 arguments, got {}", + args.len() + ))), + })), + })); }; } - impl_cmp!(">", Ordering::Greater); - impl_cmp!(">=", Ordering::Greater | Ordering::Equal); - impl_cmp!("<", Ordering::Less); - impl_cmp!("<=", Ordering::Less | Ordering::Equal); - - env.declare_global_fn( - FunctionBuilder::default() - .name("==".to_string()) - .body(FunctionBody::GenericFunction { - type_signature: TypeSignature::Exact(vec![ - Parameter::new("left", StaticType::Any), - Parameter::new("right", StaticType::Any), - ]), - function: |values, _env| match values { - [left, right] => Ok(Value::Bool(left == right)), - _ => unreachable!("the type checker should never invoke this function if the argument count does not match") - }, - return_type: StaticType::Bool, - }) - .build() - .expect("must succeed") + impl_cmp!( + ">", + Ordering::Greater, + "Returns true if the left value is greater than the right." ); - - env.declare_global_fn( - FunctionBuilder::default() - .name("!=".to_string()) - .body(FunctionBody::GenericFunction { - type_signature: TypeSignature::Exact(vec![ - Parameter::new("left", StaticType::Any), - Parameter::new("right", StaticType::Any), - ]), - function: |values, _env| match values { - [left, right] => Ok(Value::Bool(left != right)), - _ => unreachable!("the type checker should never invoke this function if the argument count does not match") - }, - return_type: StaticType::Bool, - }) - .build() - .expect("must succeed") + impl_cmp!( + ">=", + Ordering::Greater | Ordering::Equal, + "Returns true if the left value is greater than or equal to the right." ); - - env.declare_global_fn( - FunctionBuilder::default() - .name("<=>".to_string()) - .body(FunctionBody::GenericFunction { - type_signature: TypeSignature::Exact(vec![ - Parameter::new("left", StaticType::Any), - Parameter::new("right", StaticType::Any), - ]), - function: |values, _env| match values { - [left, right] => match left.partial_cmp(&right) { - Some(Ordering::Equal) => Ok(Value::from(0)), - Some(Ordering::Less) => Ok(Value::from(-1)), - Some(Ordering::Greater) => Ok(Value::from(1)), - None => Err(anyhow::anyhow!("cannot compare {} and {}",left.static_type(),right.static_type()).into()), - }, - _ => unreachable!("the type checker should never invoke this function if the argument count does not match") - }, - return_type: StaticType::Int, - }) - .build() - .expect("must succeed") + impl_cmp!( + "<", + Ordering::Less, + "Returns true if the left value is less than the right." ); - - env.declare_global_fn( - FunctionBuilder::default() - .name(">=<".to_string()) - .body(FunctionBody::GenericFunction { - type_signature: TypeSignature::Exact(vec![ - Parameter::new("left", StaticType::Any), - Parameter::new("right", StaticType::Any), - ]), - function: |values, _env| match values { - [left, right] => match left.partial_cmp(&right) { - Some(Ordering::Equal) => Ok(Value::from(0)), - Some(Ordering::Less) => Ok(Value::from(1)), - Some(Ordering::Greater) => Ok(Value::from(-1)), - None => Err(anyhow::anyhow!("cannot compare {} and {}",left.static_type(),right.static_type()).into()), - }, - _ => unreachable!("the type checker should never invoke this function if the argument count does not match") - }, - return_type: StaticType::Int, - }) - .build() - .expect("must succeed") + impl_cmp!( + "<=", + Ordering::Less | Ordering::Equal, + "Returns true if the left value is less than or equal to the right." ); + env.declare_global_fn(Rc::new(NativeFunction { + name: "==".to_string(), + documentation: Some("Returns true if two values are equal.".to_string()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Any, StaticType::Any]), + return_type: Box::new(StaticType::Bool), + }, + func: NativeFunc::Simple(Box::new(|args| match args { + [left, right] => Ok(Value::Bool(left == right)), + _ => Err(VmError::native(format!( + "expected 2 arguments, got {}", + args.len() + ))), + })), + })); + + env.declare_global_fn(Rc::new(NativeFunction { + name: "!=".to_string(), + documentation: Some("Returns true if two values are not equal.".to_string()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Any, StaticType::Any]), + return_type: Box::new(StaticType::Bool), + }, + func: NativeFunc::Simple(Box::new(|args| match args { + [left, right] => Ok(Value::Bool(left != right)), + _ => Err(VmError::native(format!( + "expected 2 arguments, got {}", + args.len() + ))), + })), + })); + + env.declare_global_fn(Rc::new(NativeFunction { + name: "<=>".to_string(), + documentation: Some("Three-way comparison (spaceship operator). Returns -1 if left < right, 0 if equal, 1 if left > right.".to_string()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Any, StaticType::Any]), + return_type: Box::new(StaticType::Int), + }, + func: NativeFunc::Simple(Box::new(|args| match args { + [left, right] => match left.partial_cmp(right) { + Some(Ordering::Equal) => Ok(Value::Int(0)), + Some(Ordering::Less) => Ok(Value::Int(-1)), + Some(Ordering::Greater) => Ok(Value::Int(1)), + None => Err(VmError::native(format!( + "cannot compare {} and {}", + left.static_type(), + right.static_type() + ))), + }, + _ => Err(VmError::native(format!( + "expected 2 arguments, got {}", + args.len() + ))), + })), + })); + + env.declare_global_fn(Rc::new(NativeFunction { + name: ">=<".to_string(), + documentation: Some("Reverse three-way comparison. Returns 1 if left < right, 0 if equal, -1 if left > right.".to_string()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Any, StaticType::Any]), + return_type: Box::new(StaticType::Int), + }, + func: NativeFunc::Simple(Box::new(|args| match args { + [left, right] => match left.partial_cmp(right) { + Some(Ordering::Equal) => Ok(Value::Int(0)), + Some(Ordering::Less) => Ok(Value::Int(1)), + Some(Ordering::Greater) => Ok(Value::Int(-1)), + None => Err(VmError::native(format!( + "cannot compare {} and {}", + left.static_type(), + right.static_type() + ))), + }, + _ => Err(VmError::native(format!( + "expected 2 arguments, got {}", + args.len() + ))), + })), + })); + macro_rules! impl_bitop { - ($operator:literal,$operation:expr) => { - env.declare_global_fn( - FunctionBuilder::default() - .name($operator.to_string()) - .body(FunctionBody::GenericFunction { - type_signature: TypeSignature::Exact(vec![ - Parameter::new("left", StaticType::Bool), - Parameter::new("right", StaticType::Bool), - ]), - function: |values, _env| match values { - [Value::Bool(left), Value::Bool(right)] => Ok(Value::Bool($operation(*left, *right))), - _ => unreachable!("the type checker should never invoke this function if the argument count does not match") - }, - return_type: StaticType::Bool, - }) - .build() - .expect("must succeed") - ); - env.declare_global_fn( - FunctionBuilder::default() - .name($operator.to_string()) - .body(FunctionBody::GenericFunction { - type_signature: TypeSignature::Exact(vec![ - Parameter::new("left", StaticType::Int), - Parameter::new("right", StaticType::Int), - ]), - function: |values, _env| match values { - // TODO: remove this clone - [Value::Number(Number::Int(left)), Value::Number(Number::Int(right))] => Ok(Value::Number(Number::Int($operation(left.clone(), right.clone())))), - _ => unreachable!("the type checker should never invoke this function if the argument count does not match") - }, - return_type: StaticType::Int, - }) - .build() - .expect("must succeed"), - ); + ($operator:literal,$operation:expr,$docs_bool:literal,$docs_int:literal) => { + env.declare_global_fn(Rc::new(NativeFunction { + name: $operator.to_string(), + documentation: Some($docs_bool.to_string()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Bool, StaticType::Bool]), + return_type: Box::new(StaticType::Bool), + }, + func: NativeFunc::Simple(Box::new(|args| match args { + [Value::Bool(l), Value::Bool(r)] => Ok(Value::Bool($operation(*l, *r))), + _ => Err(VmError::native(format!( + "expected 2 bool arguments, got {}", + args.len() + ))), + })), + })); + env.declare_global_fn(Rc::new(NativeFunction { + name: $operator.to_string(), + documentation: Some($docs_int.to_string()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Int, StaticType::Int]), + return_type: Box::new(StaticType::Int), + }, + func: NativeFunc::Simple(Box::new(|args| match args { + [left, right] => { + let l = left.to_int().ok_or_else(|| { + VmError::native(format!("expected int, got {}", left.static_type())) + })?; + let r = right.to_int().ok_or_else(|| { + VmError::native(format!( + "expected int, got {}", + right.static_type() + )) + })?; + Ok(Value::from_int($operation(l, r))) + } + _ => Err(VmError::native(format!( + "expected 2 arguments, got {}", + args.len() + ))), + })), + })); }; } - impl_bitop!("&", std::ops::BitAnd::bitand); - impl_bitop!("|", std::ops::BitOr::bitor); - impl_bitop!("~", std::ops::BitXor::bitxor); - - env.declare_global_fn( - FunctionBuilder::default() - .body(FunctionBody::NumericUnaryOp { body: |x| x.not() }) - .name("~".to_string()) - .build() - .expect("must succeed"), + impl_bitop!( + "&", + std::ops::BitAnd::bitand, + "Logical AND of two booleans.", + "Bitwise AND of two integers." ); + impl_bitop!( + "|", + std::ops::BitOr::bitor, + "Logical OR of two booleans.", + "Bitwise OR of two integers." + ); + impl_bitop!( + "~", + std::ops::BitXor::bitxor, + "Logical XOR of two booleans.", + "Bitwise XOR of two integers." + ); + + env.declare_global_fn(Rc::new(NativeFunction { + name: "~".to_string(), + documentation: Some("Bitwise NOT of a number.".to_string()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Number]), + return_type: Box::new(StaticType::Number), + }, + func: NativeFunc::Simple(Box::new(|args| match args { + [v] => v + .to_number() + .map(Not::not) + .map(Value::from_number) + .ok_or_else(|| { + VmError::native(format!("expected number, got {}", v.static_type())) + }), + _ => Err(VmError::native(format!( + "expected 1 argument, got {}", + args.len() + ))), + })), + })); for ident in ["!", "not"] { - env.declare_global_fn( - FunctionBuilder::default() - .body(FunctionBody::GenericFunction { - type_signature: TypeSignature::Exact(vec![Parameter::new("value", StaticType::Bool)]), - function: |values, _env| match values { - [Value::Bool(b)] => Ok(Value::Bool(b.not())), - _ => unreachable!("the type checker should never invoke this function if the argument count does not match"), - }, - return_type: StaticType::Bool, - }) - .name(ident.to_string()) - .build() - .expect("must succeed") - ); + env.declare_global_fn(Rc::new(NativeFunction { + name: ident.to_string(), + documentation: Some( + "Logical negation. Returns the opposite boolean value.".to_string(), + ), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Bool]), + return_type: Box::new(StaticType::Bool), + }, + func: NativeFunc::Simple(Box::new(|args| match args { + [Value::Bool(b)] => Ok(Value::Bool(b.not())), + _ => Err(VmError::native(format!( + "expected 1 bool argument, got {}", + args.len() + ))), + })), + })); } - env.declare_global_fn( - FunctionBuilder::default() - .name(">>".to_string()) - .body(FunctionBody::GenericFunction { - type_signature: TypeSignature::Exact(vec![ - Parameter::new("left", StaticType::Int), - Parameter::new("right", StaticType::Int), - ]), - function: |values, _env| match values { - [Value::Number(Number::Int(left)), Value::Number(Number::Int(right))] => left.clone().checked_shr(right.clone()) - .ok_or_else(|| FunctionCarrier::IntoEvaluationError(Box::new(BinaryOperatorError::new("cannot apply >> operator to operands".to_string())))) // TODO: improve error message - .map(|x| Value::Number(Number::Int(x))), - _ => unreachable!("the type checker should never invoke this function if the argument count does not match") - }, - return_type: StaticType::Int, - }) - .build() - .expect("must succeed") - ); - - env.declare_global_fn( - FunctionBuilder::default() - .name("<<".to_string()) - .body(FunctionBody::GenericFunction { - type_signature: TypeSignature::Exact(vec![ - Parameter::new("left", StaticType::Int), - Parameter::new("right", StaticType::Int), - ]), - function: |values, _env| match values { - [Value::Number(Number::Int(left)), Value::Number(Number::Int(right))] => left.clone().checked_shl(right.clone()) - .ok_or_else(|| FunctionCarrier::IntoEvaluationError(Box::new(BinaryOperatorError::new("cannot apply << operator to operands".to_string())))) // TODO: improve error message - .map(|x| Value::Number(Number::Int(x))), - _ => unreachable!("the type checker should never invoke this function if the argument count does not match") - }, - return_type: StaticType::Int, - }) - .build() - .expect("must succeed") - ); + env.declare_global_fn(Rc::new(NativeFunction { + name: ">>".to_string(), + documentation: Some("Right bit shift.".to_string()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Int, StaticType::Int]), + return_type: Box::new(StaticType::Int), + }, + func: NativeFunc::Simple(Box::new(|args| match args { + [left, right] => { + let l = left.to_int().ok_or_else(|| { + VmError::native(format!("expected int, got {}", left.static_type())) + })?; + let r = right.to_int().ok_or_else(|| { + VmError::native(format!("expected int, got {}", right.static_type())) + })?; + l.checked_shr(r).map(Value::from_int).ok_or_else(|| { + VmError::native("cannot apply >> operator to operands".to_string()) + }) + } + _ => Err(VmError::native(format!( + "expected 2 arguments, got {}", + args.len() + ))), + })), + })); + + env.declare_global_fn(Rc::new(NativeFunction { + name: "<<".to_string(), + documentation: Some("Left bit shift.".to_string()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Int, StaticType::Int]), + return_type: Box::new(StaticType::Int), + }, + func: NativeFunc::Simple(Box::new(|args| match args { + [left, right] => { + let l = left.to_int().ok_or_else(|| { + VmError::native(format!("expected int, got {}", left.static_type())) + })?; + let r = right.to_int().ok_or_else(|| { + VmError::native(format!("expected int, got {}", right.static_type())) + })?; + l.checked_shl(r).map(Value::from_int).ok_or_else(|| { + VmError::native("cannot apply << operator to operands".to_string()) + }) + } + _ => Err(VmError::native(format!( + "expected 2 arguments, got {}", + args.len() + ))), + })), + })); macro_rules! delegate_to_f64 { ($method:ident,$docs:literal) => { - let function = FunctionBuilder::default() - .body( - ndc_interpreter::function::FunctionBody::NumericUnaryOp { - body: |num: Number| match num { + env.declare_global_fn(Rc::new(NativeFunction { + name: stringify!($method).to_string(), + documentation: Some($docs.to_string()), + static_type: StaticType::Function { + parameters: Some(vec![StaticType::Number]), + return_type: Box::new(StaticType::Number), + }, + func: NativeFunc::Simple(Box::new(|args| match args { + [v] => v + .to_number() + .map(|num| match num { Number::Int(i) => Number::Float(f64::from(i).$method()), Number::Float(f) => Number::Float(f.$method()), Number::Rational(r) => { Number::Float(r.to_f64().unwrap_or(f64::NAN).$method()) } Number::Complex(c) => Number::Complex(c.$method()), - }, - }, - ) - .name(stringify!($method).to_string()) - .documentation(String::from($docs)) - .build() - .expect("expected delegate_to_f64 to always create function object correctly"); - env.declare_global_fn(function); + }) + .map(Value::from_number) + .ok_or_else(|| { + VmError::native(format!("expected number, got {}", v.static_type())) + }), + _ => Err(VmError::native(format!( + "expected 1 argument, got {}", + args.len() + ))), + })), + })); }; } @@ -506,7 +598,7 @@ pub mod f64 { delegate_to_f64!(asinh, "Inverse hyperbolic sine function."); delegate_to_f64!( atan, - "Computes the arctangent of a number. Return value is in radians in the range [-pi/2, pi/2];" + "Computes the arctangent of a number. Return value is in radians in the range [-pi/2, pi/2]." ); delegate_to_f64!(atanh, "Inverse hyperbolic tangent function."); delegate_to_f64!(cbrt, "Returns the cube root of a number."); diff --git a/ndc_stdlib/src/rand.rs b/ndc_stdlib/src/rand.rs index 34cc4a76..c0ca4c43 100644 --- a/ndc_stdlib/src/rand.rs +++ b/ndc_stdlib/src/rand.rs @@ -6,40 +6,40 @@ use rand::distr::uniform::SampleUniform; use rand::seq::SliceRandom; use tap::Tap; +use ndc_vm::value::{SeqValue, Value}; + pub fn random_n( lower: N, upper: N, ) -> anyhow::Result { let mut rng = rand::rng(); - let side: Uniform = Uniform::new(lower, upper).context(format!( - "Lower bound ({lower}) cannot be greater than upper bound ({upper})." - ))?; + let side: Uniform = Uniform::new(lower, upper).with_context(|| { + format!("Lower bound ({lower}) cannot be greater than upper bound ({upper}).") + })?; Ok(rng.sample(side)) } #[export_module] mod inner { use itertools::Itertools; - use ndc_interpreter::iterator::mut_seq_to_iterator; - use ndc_interpreter::num::Number; - use ndc_interpreter::sequence::Sequence; - use ndc_interpreter::value::Value; + use ndc_core::num::Number; /// Randomly shuffles the elements of the list in place. - pub fn shuffle(list: &mut [Value]) { + pub fn shuffle(list: &mut Vec) { list.shuffle(&mut rand::rng()); } /// Returns a copy of the input sequence converted to a list with the elements shuffled in random order. /// /// Note: this currently does consume iterators - #[function(return_type = Vec)] - pub fn shuffled(list: &mut Sequence) -> Value { - Value::list( - mut_seq_to_iterator(list) + #[function(return_type = Vec<_>)] + pub fn shuffled(list: SeqValue) -> anyhow::Result { + Ok(Value::list( + list.try_into_iter() + .ok_or_else(|| anyhow::anyhow!("shuffled requires a sequence"))? .collect_vec() .tap_mut(|v| v.shuffle(&mut rand::rng())), - ) + )) } #[function(name = "randf")] diff --git a/ndc_stdlib/src/regex.rs b/ndc_stdlib/src/regex.rs index a2670f49..d5d14059 100644 --- a/ndc_stdlib/src/regex.rs +++ b/ndc_stdlib/src/regex.rs @@ -1,19 +1,24 @@ -use ndc_interpreter::value::Value; use once_cell::sync::Lazy; use regex::Regex; -#[ndc_macros::export_module] -mod inner { +use ndc_macros::export_module; +use ndc_vm::value::Value; +#[export_module] +mod inner { /// Extracts all signed integers from the given string. #[function(return_type = Vec)] pub fn nums(haystack: &str) -> Value { static RE: Lazy = Lazy::new(|| Regex::new(r"-?\d+").unwrap()); - Value::collect_list(RE.captures_iter(haystack).filter_map(|cap| { - let (full, []) = cap.extract(); - full.parse::().ok() - })) + Value::list( + RE.captures_iter(haystack) + .filter_map(|cap| { + let (full, []) = cap.extract(); + full.parse::().ok().map(Value::Int) + }) + .collect(), + ) } /// Extracts all unsigned integers from the given string. @@ -21,10 +26,14 @@ mod inner { pub fn unsigned_nums(haystack: &str) -> Value { static RE: Lazy = Lazy::new(|| Regex::new(r"\d+").unwrap()); - Value::collect_list(RE.captures_iter(haystack).filter_map(|cap| { - let (full, []) = cap.extract(); - full.parse::().ok() - })) + Value::list( + RE.captures_iter(haystack) + .filter_map(|cap| { + let (full, []) = cap.extract(); + full.parse::().ok().map(Value::Int) + }) + .collect(), + ) } /// Returns `true` if the string matches the given regular expression. @@ -44,7 +53,7 @@ mod inner { Value::list( captures .iter() - .filter_map(|x| x.map(|x| Value::from(x.as_str()))) + .filter_map(|x| x.map(|x| Value::string(x.as_str()))) .collect::>(), ) }) @@ -59,12 +68,12 @@ mod inner { let r = Regex::new(regex)?; let Some(captures) = r.captures(haystack) else { - return Ok(Value::empty_list()); + return Ok(Value::list(vec![])); }; let list = captures .iter() - .filter_map(|x| x.map(|x| Value::from(x.as_str()))) + .filter_map(|x| x.map(|x| Value::string(x.as_str()))) .collect::>(); Ok(Value::list(list)) diff --git a/ndc_stdlib/src/sequence.rs b/ndc_stdlib/src/sequence.rs index aefc9b70..923a9683 100644 --- a/ndc_stdlib/src/sequence.rs +++ b/ndc_stdlib/src/sequence.rs @@ -1,59 +1,12 @@ #![allow(clippy::ptr_arg)] use anyhow::anyhow; -use itertools::Itertools; -use ndc_interpreter::iterator::{MutableValueIntoIterator, mut_seq_to_iterator}; -use ndc_interpreter::sequence::Sequence; -use ndc_interpreter::{ - compare::FallibleOrd, - {evaluate::EvaluationResult, function::Callable, value::Value}, -}; +use ndc_core::compare::FallibleOrd; use ndc_macros::export_module; +use ndc_vm::VmCallable; +use ndc_vm::value::{Object, SeqValue, Value}; +use ndc_vm::{CombinationsIter, TakeIter}; use std::cmp::Ordering; -use std::rc::Rc; - -trait TryCompare { - type Error; - fn try_min(&mut self) -> Result; - fn try_max(&mut self) -> Result; -} - -impl TryCompare for C -where - C: Iterator, - T: FallibleOrd, - T: Into, -{ - type Error = anyhow::Error; - - fn try_min(&mut self) -> Result { - self.try_fold(None::, |a, b| match a { - None => Ok(Some(b)), - Some(a) => a.try_cmp(&b).map(|o| match o { - Ordering::Greater => Some(b), - Ordering::Equal | Ordering::Less => Some(a), - }), - }) - .and_then(|x| { - x.map(Into::into) - .ok_or_else(|| anyhow::anyhow!("empty input to min")) - }) - } - - fn try_max(&mut self) -> Result { - self.try_fold(None::, |a, b| match a { - None => Ok(Some(b)), - Some(a) => a.try_cmp(&b).map(|o| match o { - Ordering::Less => Some(b), - Ordering::Equal | Ordering::Greater => Some(a), - }), - }) - .and_then(|x| { - x.map(Into::into) - .ok_or_else(|| anyhow::anyhow!("empty input to max")) - }) - } -} fn try_sort_by( v: &mut [Value], @@ -77,47 +30,127 @@ fn try_sort_by( Ok(()) } +fn vm_try_max(mut iter: impl Iterator) -> anyhow::Result { + iter.try_fold(None::, |acc, b| match acc { + None => Ok(Some(b)), + Some(a) => a + .try_cmp(&b) + .map_err(|e: String| anyhow!(e)) + .map(|o| Some(if o == Ordering::Less { b } else { a })), + }) + .and_then(|x: Option| x.ok_or_else(|| anyhow!("empty input to max"))) +} + +fn vm_try_min(mut iter: impl Iterator) -> anyhow::Result { + iter.try_fold(None::, |acc, b| match acc { + None => Ok(Some(b)), + Some(a) => a + .try_cmp(&b) + .map_err(|e: String| anyhow!(e)) + .map(|o| Some(if o == Ordering::Greater { b } else { a })), + }) + .and_then(|x: Option| x.ok_or_else(|| anyhow!("empty input to min"))) +} + #[export_module] mod inner { - use ndc_interpreter::iterator::{Repeat, ValueIterator}; - use ndc_interpreter::{function::FunctionCarrier, iterator::mut_value_to_iterator}; - use std::cell::RefCell; - + use super::{try_sort_by, vm_try_max, vm_try_min}; + use itertools::Itertools; + use ndc_core::compare::FallibleOrd; + use ndc_vm::VmCallable; + use std::cmp::Ordering; + use std::rc::Rc; + + /// Returns `true` if the element is contained in the sequence. #[function(name = "in")] - pub fn op_contains(elem: &Value, seq: &Sequence) -> bool { - seq.contains(elem) + pub fn op_contains(elem: Value, seq: SeqValue) -> bool { + match &seq { + Value::Object(obj) => match obj.as_ref() { + Object::String(s) => match &elem { + Value::Object(e) => match e.as_ref() { + Object::String(needle) => { + if Rc::ptr_eq(s, needle) { + return true; + } + s.borrow().contains(needle.borrow().as_str()) + } + _ => false, + }, + _ => false, + }, + Object::List(l) => l.borrow().contains(&elem), + Object::Tuple(t) => t.contains(&elem), + Object::Map { entries, .. } => entries.borrow().contains_key(&elem), + Object::Deque(d) => d.borrow().contains(&elem), + Object::MinHeap(h) => h.borrow().iter().any(|v| v.0.0 == elem), + Object::MaxHeap(h) => h.borrow().iter().any(|v| v.0 == elem), + Object::Iterator(i) => { + { + let iter = i.borrow(); + if let Some((start, end, inclusive)) = iter.range_bounds() { + let Value::Int(n) = elem else { return false }; + return if inclusive { + n >= start && n <= end + } else { + n >= start && n < end + }; + } + if let Some(start) = iter.unbounded_range_start() { + let Value::Int(n) = elem else { return false }; + return n >= start; + } + } + // Finite non-range iterator — linear scan + loop { + match i.borrow_mut().next() { + Some(v) if v == elem => return true, + Some(_) => {} + None => return false, + } + } + } + _ => false, + }, + _ => false, + } } /// Returns the highest element in the sequence. - pub fn max(seq: &Sequence) -> anyhow::Result { + pub fn max(seq: SeqValue) -> anyhow::Result { match seq { - Sequence::String(s) => s - .try_borrow()? - .chars() - .max() - .ok_or_else(|| anyhow::anyhow!("empty input to max")) - .map(|v| Value::from(String::from(v))), - Sequence::List(l) => l.try_borrow()?.iter().try_max(), - Sequence::Tuple(l) => l.iter().try_max(), - Sequence::Map(map, _) => map.borrow().keys().try_max(), - Sequence::Iterator(iter) => iter.borrow_mut().try_max(), - Sequence::MaxHeap(h) => h - .borrow() - .peek() - .map(|hv| hv.0.clone()) - .ok_or_else(|| anyhow::anyhow!("empty input to max")), - // I think this is always going to be O(n) - Sequence::MinHeap(_) => Err(anyhow::anyhow!("not supported for MinHeap")), - Sequence::Deque(d) => d.try_borrow()?.iter().try_max(), + Value::Object(obj) => match Rc::unwrap_or_clone(obj) { + Object::String(s) => { + let chars: Vec = s + .borrow() + .chars() + .map(|c| Value::string(c.to_string())) + .collect(); + vm_try_max(chars.into_iter()) + } + Object::Map { entries, .. } => vm_try_max(entries.into_inner().into_keys()), + Object::MaxHeap(h) => h + .borrow() + .peek() + .map(|v| v.0.clone()) + .ok_or_else(|| anyhow!("empty input to max")), + Object::MinHeap(_) => Err(anyhow!("max is not supported for MinHeap")), + obj => vm_try_max( + Value::Object(Rc::new(obj)) + .try_into_iter() + .ok_or_else(|| anyhow!("cannot find max of non-sequence"))?, + ), + }, + _ => Err(anyhow!("cannot find max of non-sequence")), } } + /// Returns the element for which the key function returns the highest value. - pub fn max_by_key(seq: &mut Sequence, func: &Callable<'_>) -> EvaluationResult { + pub fn max_by_key(seq: SeqValue, func: &mut VmCallable<'_>) -> anyhow::Result { by_key(seq, func, Ordering::Greater) } /// Returns the element for which the key function returns the lowest value. - pub fn min_by_key(seq: &mut Sequence, func: &Callable<'_>) -> EvaluationResult { + pub fn min_by_key(seq: SeqValue, func: &mut VmCallable<'_>) -> anyhow::Result { by_key(seq, func, Ordering::Less) } @@ -126,7 +159,7 @@ mod inner { /// The comparator function takes two elements and returns a number. A positive result means the /// first argument is greater than the second, a negative result means the first argument is /// less than the second, and zero means they are equal. - pub fn max_by(seq: &mut Sequence, comp: &Callable<'_>) -> EvaluationResult { + pub fn max_by(seq: SeqValue, comp: &mut VmCallable<'_>) -> anyhow::Result { by_comp(seq, comp, Ordering::Greater) } @@ -135,56 +168,57 @@ mod inner { /// The comparator function takes two elements and returns a number. A positive result means the /// first argument is greater than the second, a negative result means the first argument is /// less than the second, and zero means they are equal. - pub fn min_by(seq: &mut Sequence, comp: &Callable<'_>) -> EvaluationResult { + pub fn min_by(seq: SeqValue, comp: &mut VmCallable<'_>) -> anyhow::Result { by_comp(seq, comp, Ordering::Less) } /// Returns the lowest element in the sequence. - pub fn min(seq: &Sequence) -> anyhow::Result { + pub fn min(seq: SeqValue) -> anyhow::Result { match seq { - Sequence::String(s) => s - .try_borrow()? - .chars() - .min() - .ok_or_else(|| anyhow::anyhow!("empty input to min")) - .map(|v| Value::from(String::from(v))), - Sequence::List(l) => l.try_borrow()?.iter().try_min(), - Sequence::Tuple(l) => l.iter().try_min(), - Sequence::Map(map, _) => map.borrow().keys().try_min(), - Sequence::Iterator(iter) => iter.borrow_mut().try_min(), - // I think this is always going to be O(n) - Sequence::MaxHeap(_) => Err(anyhow::anyhow!("not supported for MaxHeap")), - Sequence::MinHeap(h) => h - .borrow() - .peek() - .map(|hv| hv.0.0.clone()) - .ok_or_else(|| anyhow::anyhow!("empty input to max")), - Sequence::Deque(d) => d.try_borrow()?.iter().try_min(), + Value::Object(obj) => match Rc::unwrap_or_clone(obj) { + Object::String(s) => { + let chars: Vec = s + .borrow() + .chars() + .map(|c| Value::string(c.to_string())) + .collect(); + vm_try_min(chars.into_iter()) + } + Object::Map { entries, .. } => vm_try_min(entries.into_inner().into_keys()), + Object::MinHeap(h) => h + .borrow() + .peek() + .map(|v| v.0.0.clone()) + .ok_or_else(|| anyhow!("empty input to min")), + Object::MaxHeap(_) => Err(anyhow!("min is not supported for MaxHeap")), + obj => vm_try_min( + Value::Object(Rc::new(obj)) + .try_into_iter() + .ok_or_else(|| anyhow!("cannot find min of non-sequence"))?, + ), + }, + _ => Err(anyhow!("cannot find min of non-sequence")), } } - /// Sorts the input sequence in place. - /// - /// This function only works for strings and lists and will throw errors otherwise. - pub fn sort(seq: &mut Sequence) -> anyhow::Result<()> { - match seq { - Sequence::String(str) => { - let r = &mut *str.borrow_mut(); - *r = r.chars().sorted().collect::(); - } - Sequence::List(list) => { - let mut m = list.borrow_mut(); - try_sort_by(&mut m, Value::try_cmp)?; - } - Sequence::Tuple(_) => return Err(anyhow!("tuple cannot be sorted in place")), - Sequence::Map(_, _) => return Err(anyhow!("map cannot be sorted in place")), - Sequence::Iterator(_) => return Err(anyhow!("iterator cannot be sorted in place")), - Sequence::MaxHeap(_) | Sequence::MinHeap(_) => { - return Err(anyhow!("heap is already sorted")); + /// Sorts the list in place. + #[function(return_type = ())] + pub fn sort(list: &mut Vec) -> anyhow::Result<()> { + let mut err: Option = None; + list.sort_by(|a, b| match a.partial_cmp(b) { + Some(ord) => ord, + None => { + err = Some(format!( + "cannot compare {} and {}", + a.static_type(), + b.static_type() + )); + Ordering::Equal } - Sequence::Deque(_) => return Err(anyhow!("deque cannot be sorted in place")), + }); + if let Some(e) = err { + return Err(anyhow::anyhow!(e)); } - Ok(()) } @@ -197,20 +231,42 @@ mod inner { /// /// This function only works for strings and lists and will throw errors otherwise. #[function(return_type = ())] - pub fn sort_by(list: &mut Vec, comp: &Callable<'_>) -> EvaluationResult { - try_sort_by::(list, |left, right| { - let ret = comp.call(&mut [left.clone(), right.clone()])?; - - Ok(ret.try_cmp(&Value::from(0))?) - })?; - Ok(Value::unit()) + pub fn sort_by(list: &mut Vec, comp: &mut VmCallable<'_>) -> anyhow::Result<()> { + let mut err: Option = None; + list.sort_by(|left, right| { + if err.is_some() { + return Ordering::Equal; + } + match comp.call(vec![left.clone(), right.clone()]) { + Ok(ret) => match ret.cmp_to_zero() { + Ok(ord) => ord, + Err(e) => { + err = Some(e); + Ordering::Equal + } + }, + Err(e) => { + err = Some(e.message); + Ordering::Equal + } + } + }); + if let Some(e) = err { + return Err(anyhow::anyhow!(e)); + } + Ok(()) } /// Returns a sorted copy of the input sequence as a list. - #[function(return_type = Vec)] - pub fn sorted(seq: &mut Sequence) -> anyhow::Result { - let mut list = mut_seq_to_iterator(seq).collect::>(); - try_sort_by(&mut list, Value::try_cmp)?; + #[function(return_type = Vec<_>)] + pub fn sorted(seq: SeqValue) -> anyhow::Result { + let mut list: Vec = seq + .try_into_iter() + .ok_or_else(|| anyhow!("sorted requires a sequence"))? + .collect(); + try_sort_by(&mut list, |a, b| { + a.try_cmp(b).map_err(|e: String| anyhow!(e)) + })?; Ok(Value::list(list)) } @@ -220,27 +276,56 @@ mod inner { /// - for values lower than `0` the first argument is smaller than the second argument /// - for values higher than `0` the first argument is greater than the second argument /// - for values equal to `0` the first argument is equal to the second argument - #[function(return_type = Vec)] - pub fn sorted_by(seq: &mut Sequence, comp: &Callable<'_>) -> EvaluationResult { - let mut list = mut_seq_to_iterator(seq).collect::>(); - try_sort_by::(&mut list, |left, right| { - let ret = comp.call(&mut [left.clone(), right.clone()])?; - - Ok(ret.try_cmp(&Value::from(0))?) - })?; + pub fn sorted_by(seq: SeqValue, comp: &mut VmCallable<'_>) -> anyhow::Result { + let mut list: Vec = seq + .try_into_iter() + .ok_or_else(|| anyhow!("sorted_by requires a sequence"))? + .collect(); + let mut err: Option = None; + list.sort_by(|left, right| { + if err.is_some() { + return Ordering::Equal; + } + match comp.call(vec![left.clone(), right.clone()]) { + Ok(ret) => match ret.cmp_to_zero() { + Ok(ord) => ord, + Err(e) => { + err = Some(e); + Ordering::Equal + } + }, + Err(e) => { + err = Some(e.message); + Ordering::Equal + } + } + }); + if let Some(e) = err { + return Err(anyhow::anyhow!(e)); + } Ok(Value::list(list)) } /// Returns the length of a string in bytes. - pub fn byte_len(str: &str) -> usize { - str.len() + pub fn byte_len(str: &str) -> i64 { + str.len() as i64 } /// Returns the length of the sequence, for strings this returns the number of UTF-8 characters. - pub fn len(seq: &Sequence) -> anyhow::Result { - match seq.length() { - Some(n) => Ok(n), - None => Err(anyhow!( + pub fn len(seq: SeqValue) -> anyhow::Result { + match &seq { + Value::Object(obj) => match obj.as_ref() { + Object::List(l) => Ok(l.borrow().len() as i64), + Object::Tuple(t) => Ok(t.len() as i64), + Object::Deque(d) => Ok(d.borrow().len() as i64), + Object::String(s) => Ok(s.borrow().chars().count() as i64), + Object::Map { entries, .. } => Ok(entries.borrow().len() as i64), + _ => Err(anyhow!( + "cannot determine the length of {}", + seq.static_type() + )), + }, + _ => Err(anyhow!( "cannot determine the length of {}", seq.static_type() )), @@ -248,363 +333,322 @@ mod inner { } /// Enumerates the given sequence returning a list of tuples where the first element of the tuple is the index of the element in the input sequence. - - #[function(return_type = Vec<(i64, Value)>)] - pub fn enumerate(seq: &mut Sequence) -> Value { - match seq { - Sequence::String(s) => Value::list( - s.borrow() - .chars() - .enumerate() - .map(|(index, char)| Value::tuple(vec![Value::from(index), Value::from(char)])) - .collect::>(), - ), - Sequence::Map(map, _) => Value::list( - map.borrow() - .iter() - .enumerate() - .map(|(index, (key, value))| { - Value::Sequence(Sequence::Tuple(Rc::new(vec![ - Value::from(index), - Value::Sequence(Sequence::Tuple(Rc::new(vec![ - Value::clone(key), - Value::clone(value), - ]))), - ]))) - }) - .collect::>(), - ), - // TODO: This entire branch is so cringe, why are we even trying to use iterators if we do shit like this - Sequence::Iterator(rc) => { - let mut iter = rc.borrow_mut(); - let mut out = Vec::new(); - for (idx, value) in iter.by_ref().enumerate() { - out.push(Value::Sequence(Sequence::Tuple(Rc::new(vec![ - Value::from(idx), - value, - ])))); - } - - Value::list(out) - } - seq => Value::list( - mut_seq_to_iterator(seq) - .enumerate() - .map(|(idx, value)| Value::tuple(vec![Value::from(idx), value])) - .collect::>(), - ), - } + pub fn enumerate(seq: SeqValue) -> anyhow::Result { + Ok(Value::list( + seq.try_into_iter() + .ok_or_else(|| anyhow!("enumerate requires a sequence"))? + .enumerate() + .map(|(i, v)| Value::tuple(vec![Value::Int(i as i64), v])) + .collect(), + )) } /// Reduces/folds the given sequence using the given combining function and a custom initial value. - #[function(return_type = Vec<_>)] - pub fn fold(seq: &mut Sequence, initial: Value, function: &Callable<'_>) -> EvaluationResult { - fold_iterator(mut_seq_to_iterator(seq), initial, function) + pub fn fold( + seq: SeqValue, + initial: Value, + function: &mut VmCallable<'_>, + ) -> anyhow::Result { + fold_iterator( + seq.try_into_iter() + .ok_or_else(|| anyhow!("fold requires a sequence"))?, + initial, + function, + ) } /// Reduces/folds the given sequence using the given combining function. - #[function(return_type = Value)] - pub fn reduce(seq: &mut Sequence, function: &Callable<'_>) -> EvaluationResult { - let mut iterator = mut_seq_to_iterator(seq); + pub fn reduce(seq: SeqValue, function: &mut VmCallable<'_>) -> anyhow::Result { + let mut iterator = seq + .try_into_iter() + .ok_or_else(|| anyhow!("reduce requires a sequence"))?; let fst = iterator .next() .ok_or_else(|| anyhow!("first argument to reduce must not be empty"))?; - fold_iterator(iterator, fst, function) } /// Filters the given sequence using the `predicate`. - #[function(return_type = Vec<_>)] - pub fn filter(seq: &mut Sequence, predicate: &Callable<'_>) -> EvaluationResult { - let iterator = mut_seq_to_iterator(seq); + pub fn filter(seq: SeqValue, predicate: &mut VmCallable<'_>) -> anyhow::Result { let mut out = Vec::new(); - for element in iterator { - out.push(element); - let last_idx = out.len() - 1; - let result = predicate.call(&mut out[last_idx..])?; - match result { - Value::Bool(true) => {} - Value::Bool(false) => { - out.pop(); - } - _ => return Err(anyhow!("return value of predicate must be a boolean").into()), + for element in seq + .try_into_iter() + .ok_or_else(|| anyhow!("filter requires a sequence"))? + { + match predicate + .call(vec![element.clone()]) + .map_err(|e| anyhow!(e))? + { + Value::Bool(true) => out.push(element), + Value::Bool(false) => {} + _ => return Err(anyhow!("return value of predicate must be a boolean")), } } - Ok(Value::list(out)) } /// Returns the number of elements in the input sequence for which the given `predicate` returns `true`. - #[function(return_type = i64)] - pub fn count(seq: &mut Sequence, predicate: &Callable<'_>) -> EvaluationResult { - let iterator = mut_seq_to_iterator(seq); - let mut out = 0; - for element in iterator { - let result = predicate.call(&mut [element])?; - match result { - Value::Bool(true) => { - out += 1; - } + pub fn count(seq: SeqValue, predicate: &mut VmCallable<'_>) -> anyhow::Result { + let mut out = 0i64; + for element in seq + .try_into_iter() + .ok_or_else(|| anyhow!("count requires a sequence"))? + { + match predicate.call(vec![element]).map_err(|e| anyhow!(e))? { + Value::Bool(true) => out += 1, Value::Bool(false) => {} - _ => return Err(anyhow!("return value of predicate must be a boolean").into()), + _ => return Err(anyhow!("return value of predicate must be a boolean")), } } - - Ok(Value::number(out)) + Ok(Value::Int(out)) } /// Returns the value of the first element for which the `predicate` is true for the given input sequence. - #[function(return_type = Value)] - pub fn find(seq: &mut Sequence, predicate: &Callable<'_>) -> EvaluationResult { - let iterator = mut_seq_to_iterator(seq); - for element in iterator { - let result = predicate.call(&mut [element.clone()])?; - match result { + pub fn find(seq: SeqValue, predicate: &mut VmCallable<'_>) -> anyhow::Result { + for element in seq + .try_into_iter() + .ok_or_else(|| anyhow!("find requires a sequence"))? + { + match predicate + .call(vec![element.clone()]) + .map_err(|e| anyhow!(e))? + { Value::Bool(true) => return Ok(element), Value::Bool(false) => {} - _ => return Err(anyhow!("return value of predicate must be a boolean").into()), + _ => return Err(anyhow!("return value of predicate must be a boolean")), } } - - Err(anyhow!("find did not find anything").into()) + Err(anyhow!("find did not find anything")) } /// Returns the first index of the element for which the `predicate` is true in the input sequence. - #[function(return_type = usize)] - pub fn locate(seq: &mut Sequence, predicate: &Callable<'_>) -> EvaluationResult { - let iterator = mut_seq_to_iterator(seq); - for (idx, element) in iterator.enumerate() { - let result = predicate.call(&mut [element])?; - match result { - Value::Bool(true) => return Ok(Value::from(idx)), + pub fn locate(seq: SeqValue, predicate: &mut VmCallable<'_>) -> anyhow::Result { + for (idx, element) in seq + .try_into_iter() + .ok_or_else(|| anyhow!("locate requires a sequence"))? + .enumerate() + { + match predicate.call(vec![element]).map_err(|e| anyhow!(e))? { + Value::Bool(true) => return Ok(Value::Int(idx as i64)), Value::Bool(false) => {} - _ => return Err(anyhow!("return value of predicate must be a boolean").into()), + _ => return Err(anyhow!("return value of predicate must be a boolean")), } } - - Err(anyhow!("locate did not find anything").into()) + Err(anyhow!("locate did not find anything")) } /// Returns the first index of the element or produces an error - #[function(name = "locate", return_type = usize)] - pub fn locate_element(seq: &mut Sequence, element: &Value) -> EvaluationResult { - let iterator = mut_seq_to_iterator(seq); - for (idx, el) in iterator.enumerate() { - if &el == element { - return Ok(Value::from(idx)); + #[function(name = "locate")] + pub fn locate_element(seq: SeqValue, element: Value) -> anyhow::Result { + for (idx, el) in seq + .try_into_iter() + .ok_or_else(|| anyhow!("locate requires a sequence"))? + .enumerate() + { + if el == element { + return Ok(Value::Int(idx as i64)); } } - - Err(anyhow!("locate did not find anything").into()) + Err(anyhow!("locate did not find anything")) } /// Returns `true` if the `predicate` is true for none of the elements in `seq`. - #[function(return_type = bool)] - pub fn none(seq: &mut Sequence, function: &Callable<'_>) -> EvaluationResult { - for item in mut_seq_to_iterator(seq) { - match function.call(&mut [item])? { + pub fn none(seq: SeqValue, function: &mut VmCallable<'_>) -> anyhow::Result { + for item in seq + .try_into_iter() + .ok_or_else(|| anyhow!("none requires a sequence"))? + { + match function.call(vec![item]).map_err(|e| anyhow!(e))? { Value::Bool(true) => return Ok(Value::Bool(false)), Value::Bool(false) => {} v => { - return Err(anyhow!(format!( + return Err(anyhow!( "invalid return type, predicate returned {}", v.static_type() - )) - .into()); + )); } } } - Ok(Value::Bool(true)) } + /// Returns `true` if the `predicate` is true for all the elements in `seq`. - #[function(return_type = bool)] - pub fn all(seq: &mut Sequence, function: &Callable<'_>) -> EvaluationResult { - for item in mut_seq_to_iterator(seq) { - match function.call(&mut [item])? { + pub fn all(seq: SeqValue, function: &mut VmCallable<'_>) -> anyhow::Result { + for item in seq + .try_into_iter() + .ok_or_else(|| anyhow!("all requires a sequence"))? + { + match function.call(vec![item]).map_err(|e| anyhow!(e))? { Value::Bool(true) => {} Value::Bool(false) => return Ok(Value::Bool(false)), v => { - return Err(anyhow!(format!( + return Err(anyhow!( "invalid return type, predicate returned {}", v.static_type() - )) - .into()); + )); } } } - Ok(Value::Bool(true)) } /// Returns `true` if the `predicate` is true for any of the elements in `seq`. - #[function(return_type = bool)] - pub fn any(seq: &mut Sequence, predicate: &Callable<'_>) -> EvaluationResult { - for item in mut_seq_to_iterator(seq) { - match predicate.call(&mut [item])? { + pub fn any(seq: SeqValue, predicate: &mut VmCallable<'_>) -> anyhow::Result { + for item in seq + .try_into_iter() + .ok_or_else(|| anyhow!("any requires a sequence"))? + { + match predicate.call(vec![item]).map_err(|e| anyhow!(e))? { Value::Bool(true) => return Ok(Value::Bool(true)), Value::Bool(false) => {} v => { - return Err(anyhow!(format!( + return Err(anyhow!( "invalid return type, predicate returned {}", v.static_type() - )) - .into()); + )); } } } - Ok(Value::Bool(false)) } /// Applies the function to each element in a sequence returning the result as a list. - #[function(return_type = Vec<_>)] - pub fn map(seq: &mut Sequence, function: &Callable<'_>) -> EvaluationResult { - let iterator = mut_seq_to_iterator(seq); + pub fn map(seq: SeqValue, function: &mut VmCallable<'_>) -> anyhow::Result { let mut out = Vec::new(); - - for item in iterator { - out.push(function.call(&mut [item])?); + for item in seq + .try_into_iter() + .ok_or_else(|| anyhow!("map requires a sequence"))? + { + out.push(function.call(vec![item]).map_err(|e| anyhow!(e))?); } - Ok(Value::list(out)) } /// Applies a function to each item in a sequence, flattens the resulting sequences, and returns a single combined sequence. - #[function(return_type = Vec<_>)] - pub fn flat_map(seq: &mut Sequence, function: &Callable<'_>) -> EvaluationResult { - // let iterator = ; + pub fn flat_map(seq: SeqValue, function: &mut VmCallable<'_>) -> anyhow::Result { let mut out = Vec::new(); - - for item in mut_seq_to_iterator(seq) { - let fnout = function.call(&mut [item])?; - match fnout { - Value::Sequence(mut inner_seq) => { - out.extend(mut_seq_to_iterator(&mut inner_seq)); - } - _ => { - return Err( - anyhow!("callable argument to flat_map must return a sequence").into(), - ); - } - } + for item in seq + .try_into_iter() + .ok_or_else(|| anyhow!("flat_map requires a sequence"))? + { + let result = function.call(vec![item]).map_err(|e| anyhow!(e))?; + let inner = result + .try_into_iter() + .ok_or_else(|| anyhow!("callable argument to flat_map must return a sequence"))?; + out.extend(inner); } - Ok(Value::list(out)) } /// Returns the first element of the sequence or the `default` value otherwise. - pub fn first_or(seq: &mut Sequence, default: Value) -> Value { - let mut iterator = mut_seq_to_iterator(seq); - if let Some(item) = iterator.next() { - item - } else { - default - } + pub fn first_or(seq: SeqValue, default: Value) -> Value { + seq.try_into_iter() + .and_then(|mut i| i.next()) + .unwrap_or(default) } /// Returns the first element of the sequence or the return value of the given function. - pub fn first_or_else(seq: &mut Sequence, default: &Callable<'_>) -> EvaluationResult { - let mut iterator = mut_seq_to_iterator(seq); - Ok(if let Some(item) = iterator.next() { - item - } else { - default.call(&mut [])? - }) + pub fn first_or_else(seq: SeqValue, default: &mut VmCallable<'_>) -> anyhow::Result { + if let Some(item) = seq.try_into_iter().and_then(|mut i| i.next()) { + return Ok(item); + } + default.call(vec![]).map_err(|e| anyhow!(e)) } - /// Returns the `k` sized combinations of the given sequence `seq` as a list of tuples. - #[function(return_type = Vec<_>)] - pub fn combinations(seq: &mut Sequence, k: usize) -> Value { - Value::list( - mut_seq_to_iterator(seq) - .combinations(k) - .map(Value::tuple) - .collect::>(), - ) + /// Returns the `k` sized combinations of the given sequence `seq` as a lazy iterator of tuples. + pub fn combinations(seq: SeqValue, k: i64) -> anyhow::Result { + let k = k as usize; + let iter = CombinationsIter::new(seq, k) + .ok_or_else(|| anyhow!("combinations requires a sequence"))?; + Ok(Value::iterator(iter.into_shared())) + } + + /// Returns a lazy iterator yielding the first `n` elements of `seq`. + #[function(return_type = Iterator)] + pub fn take(seq: SeqValue, n: usize) -> anyhow::Result { + let iter = TakeIter::new(seq, n).ok_or_else(|| anyhow!("take requires a sequence"))?; + Ok(Value::iterator(iter.into_shared())) } /// Returns the `k` sized permutations of the given sequence `seq` as a list of tuples. - #[function(return_type = Vec<_>)] - pub fn permutations(seq: &mut Sequence, k: usize) -> Value { - Value::list( - mut_seq_to_iterator(seq) + pub fn permutations(seq: SeqValue, k: i64) -> anyhow::Result { + let k = k as usize; + Ok(Value::list( + seq.try_into_iter() + .ok_or_else(|| anyhow!("permutations requires a sequence"))? .permutations(k) .map(Value::tuple) .collect::>(), - ) + )) } - /// Returns al prefixes of a sequence, each as a list. - #[function(return_type = Vec<_>)] - pub fn prefixes(seq: &mut Sequence) -> Value { - // Special case for string which is more efficient and doesn't produce lists of characters - if let Sequence::String(string) = &seq { - return Value::list( - string - .borrow() - .chars() - .scan(String::new(), |acc, item| { - // Item must be string!! - acc.push(item); - Some(Value::string(acc.clone())) - }) - .collect::>(), - ); + /// Returns all prefixes of a sequence, each as a list. + pub fn prefixes(seq: SeqValue) -> anyhow::Result { + // Special case for String — produce string prefixes instead of lists of chars. + if let Value::Object(ref obj) = seq { + if let Object::String(s) = obj.as_ref() { + return Ok(Value::list( + s.borrow() + .chars() + .scan(String::new(), |acc, c| { + acc.push(c); + Some(Value::string(acc.clone())) + }) + .collect(), + )); + } } - - let iterator = mut_seq_to_iterator(seq); - - Value::list( - iterator + Ok(Value::list( + seq.try_into_iter() + .ok_or_else(|| anyhow!("prefixes requires a sequence"))? .scan(Vec::new(), |acc, item| { acc.push(item); Some(Value::list(acc.clone())) }) - .collect::>(), - ) + .collect(), + )) } /// Returns all suffixes of a sequence, each as a list; for strings, returns all trailing substrings. - #[function(return_type = Vec<_>)] - pub fn suffixes(seq: &mut Sequence) -> Value { - // Special case for string which is more efficient and doesn't produce lists of characters - if let Sequence::String(string) = &seq { - return Value::list( - string - .borrow() - .char_indices() - .map(|(idx, _)| Value::string(&string.borrow()[idx..])) - .collect::>(), - ); + pub fn suffixes(seq: SeqValue) -> anyhow::Result { + // Special case for String — produce string suffixes instead of lists of chars. + if let Value::Object(ref obj) = seq { + if let Object::String(s) = obj.as_ref() { + let borrowed = s.borrow(); + return Ok(Value::list( + borrowed + .char_indices() + .map(|(i, _)| Value::string(borrowed[i..].to_string())) + .collect(), + )); + } } - - let iterator = mut_seq_to_iterator(seq); - let out = iterator.collect::>(); - - Value::list( + let out: Vec = seq + .try_into_iter() + .ok_or_else(|| anyhow!("suffixes requires a sequence"))? + .collect(); + Ok(Value::list( (0..out.len()) .map(|i| Value::list(out[i..].to_vec())) - .collect::>(), - ) + .collect(), + )) } /// Transposes a sequence of sequences, turning rows into columns, and returns the result as a list of lists. - // TODO: right now transposed always produces a list, it probably should produce whatever the input type was (if possible) - // TODO: this might not be the expected result for sets (since iterators over sets yield tuples) - #[function(return_type = Vec<_>)] - pub fn transposed(seq: &mut Sequence) -> EvaluationResult { - let mut main = mut_seq_to_iterator(seq).collect::>(); - let mut iterators = Vec::new(); - for iter in &mut main { - iterators.push(mut_value_to_iterator(iter)?); + pub fn transposed(seq: SeqValue) -> anyhow::Result { + let main: Vec = seq + .try_into_iter() + .ok_or_else(|| anyhow!("transposed requires a sequence"))? + .collect(); + let mut iterators: Vec>> = Vec::new(); + for elem in main { + iterators.push(Box::new(elem.try_into_iter().ok_or_else(|| { + anyhow!("elements of transposed sequence must be iterable") + })?)); } let mut out = Vec::new(); loop { - let row = iterators - .iter_mut() - .filter_map(Iterator::next) - .collect::>(); + let row: Vec = iterators.iter_mut().filter_map(|i| i.next()).collect(); if row.is_empty() { return Ok(Value::list(out)); } @@ -613,83 +657,83 @@ mod inner { } /// Return a list of all windows, wrapping back to the first elements when the window would otherwise exceed the length of source list, producing tuples of size 2. - #[function(return_type = Vec<(Value, Value)>)] - pub fn circular_tuple_windows(seq: &mut Sequence) -> Value { - // TODO: this implementation probably clones a bit more than it needs to, but it's better tol - // have something than nothing - Value::list( - mut_seq_to_iterator(seq) + pub fn circular_tuple_windows(seq: SeqValue) -> anyhow::Result { + Ok(Value::list( + seq.try_into_iter() + .ok_or_else(|| anyhow!("circular_tuple_windows requires a sequence"))? .collect::>() .iter() .circular_tuple_windows::<(_, _)>() .map(|(a, b)| Value::tuple(vec![a.clone(), b.clone()])) .collect::>(), - ) + )) } /// Returns a list of all size-2 windows in `seq`. - #[function(return_type = Vec<(Value, Value)>)] - pub fn pairwise(seq: &mut Sequence) -> Value { - Value::list( - mut_seq_to_iterator(seq) + pub fn pairwise(seq: SeqValue) -> anyhow::Result { + Ok(Value::list( + seq.try_into_iter() + .ok_or_else(|| anyhow!("pairwise requires a sequence"))? .collect::>() .windows(2) - .map(Value::tuple) + .map(|w| Value::list(w.to_vec())) .collect::>(), - ) + )) } /// Applies a function to each pair of consecutive elements in a sequence and returns the results as a list. #[function(name = "pairwise")] - #[function(return_type = Vec<(Value, Value)>)] - pub fn pairwise_map(seq: &mut Sequence, function: &Callable<'_>) -> EvaluationResult { - let main = mut_seq_to_iterator(seq).collect::>(); - - let mut out = Vec::with_capacity(main.len() - 1); + pub fn pairwise_map(seq: SeqValue, function: &mut VmCallable<'_>) -> anyhow::Result { + let main: Vec = seq + .try_into_iter() + .ok_or_else(|| anyhow!("pairwise requires a sequence"))? + .collect(); + let mut out = Vec::with_capacity(main.len().saturating_sub(1)); for (a, b) in main.into_iter().tuple_windows() { - out.push(function.call(&mut [a, b])?); + out.push(function.call(vec![a, b]).map_err(|e| anyhow!(e))?); } - Ok(Value::list(out)) } /// Returns a list of all contiguous windows of `length` size. The windows overlap. If the `seq` is shorter than size, the iterator returns no values. - #[function(return_type = Vec>)] - pub fn windows(seq: &mut Sequence, length: usize) -> Value { - Value::list( - mut_seq_to_iterator(seq) + pub fn windows(seq: SeqValue, length: i64) -> anyhow::Result { + let length = length as usize; + Ok(Value::list( + seq.try_into_iter() + .ok_or_else(|| anyhow!("windows requires a sequence"))? .collect::>() .windows(length) - .map(Value::list) + .map(|w| Value::list(w.to_vec())) .collect::>(), - ) + )) } /// Return a list that represents the powerset of the elements of `seq`. /// /// The powerset of a set contains all subsets including the empty set and the full input set. A powerset has length `2^n` where `n` is the length of the input set. /// Each list produced by this function represents a subset of the elements in the source sequence. - #[function(return_type = Vec>)] - pub fn subsequences(seq: &mut Sequence) -> Value { - Value::list( - mut_seq_to_iterator(seq) + pub fn subsequences(seq: SeqValue) -> anyhow::Result { + Ok(Value::list( + seq.try_into_iter() + .ok_or_else(|| anyhow!("subsequences requires a sequence"))? .powerset() .map(Value::list) .collect::>(), - ) + )) } /// Return a list that represents the powerset of the elements of `seq` that are exactly `length` long. #[function(name = "subsequences")] - #[function(return_type = Vec>)] - pub fn subsequences_len(seq: &mut Sequence, length: usize) -> Value { - Value::list( - mut_seq_to_iterator(seq) + pub fn subsequences_len(seq: SeqValue, length: i64) -> anyhow::Result { + let length = length as usize; + Ok(Value::list( + seq.try_into_iter() + .ok_or_else(|| anyhow!("subsequences requires a sequence"))? .powerset() .filter(|x| x.len() == length) .map(Value::list) .collect::>(), - ) + )) } /// Computes the Cartesian product of multiple iterables, returning a list of all possible combinations where each combination contains one element from each iterable. @@ -718,181 +762,165 @@ mod inner { /// [3,"c",false] /// ] /// ``` - #[function(return_type = Vec>)] - pub fn multi_cartesian_product(seq: &mut Sequence) -> anyhow::Result { + pub fn multi_cartesian_product(seq: SeqValue) -> anyhow::Result { let mut iterators = Vec::new(); - - for mut value in mut_seq_to_iterator(seq) { - let iter = mut_value_to_iterator(&mut value)?.collect_vec().into_iter(); - iterators.push(iter); + for elem in seq + .try_into_iter() + .ok_or_else(|| anyhow!("multi_cartesian_product requires a sequence"))? + { + let inner: Vec = elem + .try_into_iter() + .ok_or_else(|| anyhow!("elements of sequence must be iterable"))? + .collect_vec(); + iterators.push(inner.into_iter()); } - - let out = iterators - .into_iter() - .multi_cartesian_product() - .map(Value::list) - .collect_vec(); - - Ok(Value::list(out)) + Ok(Value::list( + iterators + .into_iter() + .multi_cartesian_product() + .map(Value::list) + .collect_vec(), + )) } /// Split the input sequence into evenly sized chunks. If the input length of the sequence /// is not dividable by the chunk_size the last chunk will contain fewer elements. - #[function(return_type = Vec>)] - pub fn chunks(seq: &mut Sequence, chunk_size: usize) -> anyhow::Result { + pub fn chunks(seq: SeqValue, chunk_size: usize) -> anyhow::Result { if chunk_size == 0 { return Err(anyhow!("chunk size must be non-zero")); } - - let iter = mut_seq_to_iterator(seq); - Ok(Value::list( - iter.chunks(chunk_size) + seq.try_into_iter() + .ok_or_else(|| anyhow!("chunks requires a sequence"))? + .chunks(chunk_size) .into_iter() .map(|chunk| Value::list(chunk.collect_vec())) .collect_vec(), )) } + /// Returns an infinite iterator that repeats the given value. #[function(return_type = Iterator)] pub fn repeat(value: Value) -> Value { - Value::Sequence(Sequence::Iterator(Rc::new(RefCell::new( - ValueIterator::Repeat(Repeat { - value, - cur: 0, - limit: None, - }), + Value::iterator(Rc::new(std::cell::RefCell::new(ndc_vm::RepeatIter::new( + value, )))) } + /// Returns an iterator that repeats the given value `times` times. #[function(name = "repeat", return_type = Iterator)] pub fn repeat_times(value: Value, times: usize) -> Value { - Value::Sequence(Sequence::Iterator(Rc::new(RefCell::new( - ValueIterator::Repeat(Repeat { - value, - cur: 0, - limit: Some(times), - }), - )))) + Value::iterator(Rc::new(std::cell::RefCell::new( + ndc_vm::RepeatIter::new_limited(value, times), + ))) } } -fn by_key(seq: &mut Sequence, func: &Callable<'_>, better: Ordering) -> EvaluationResult { - let mut best_value = None; +fn by_key(seq: Value, func: &mut VmCallable<'_>, better: Ordering) -> anyhow::Result { + let mut best_value: Option = None; let mut best_key: Option = None; - - for value in mut_seq_to_iterator(seq) { - let new_key = func.call(&mut [value.clone()])?; + for value in seq + .try_into_iter() + .ok_or_else(|| anyhow!("sequence is required"))? + { + let new_key = func.call(vec![value.clone()]).map_err(|e| anyhow!(e))?; let is_better = match &best_key { None => true, - Some(current_best) => new_key.try_cmp(current_best)? == better, + Some(current_best) => { + new_key + .try_cmp(current_best) + .map_err(|e: String| anyhow!(e))? + == better + } }; if is_better { best_key = Some(new_key); best_value = Some(value); } } - - best_value.ok_or_else(|| anyhow::anyhow!("sequence was empty").into()) + best_value.ok_or_else(|| anyhow!("sequence was empty")) } -fn by_comp(seq: &mut Sequence, comp: &Callable<'_>, better: Ordering) -> EvaluationResult { +fn by_comp(seq: Value, comp: &mut VmCallable<'_>, better: Ordering) -> anyhow::Result { let mut best: Option = None; - - for value in mut_seq_to_iterator(seq) { + for value in seq + .try_into_iter() + .ok_or_else(|| anyhow!("sequence is required"))? + { let is_better = match &best { None => true, Some(current) => { - let result = comp.call(&mut [value.clone(), current.clone()])?; - result.try_cmp(&Value::from(0))? == better + let result = comp + .call(vec![value.clone(), current.clone()]) + .map_err(|e| anyhow!(e))?; + result.cmp_to_zero().map_err(|e| anyhow!(e))? == better } }; if is_better { best = Some(value); } } - - best.ok_or_else(|| anyhow::anyhow!("sequence was empty").into()) + best.ok_or_else(|| anyhow!("sequence was empty")) } fn fold_iterator( - iterator: MutableValueIntoIterator<'_>, + iter: impl Iterator, initial: Value, - function: &Callable<'_>, -) -> EvaluationResult { + function: &mut VmCallable<'_>, +) -> anyhow::Result { let mut acc = initial; - for item in iterator { - acc = function.call(&mut [acc, item])?; + for item in iter { + acc = function.call(vec![acc, item]).map_err(|e| anyhow!(e))?; } - Ok(acc) } pub mod extra { - use anyhow::anyhow; - use itertools::izip; - - use ndc_interpreter::function::{FunctionBuilder, StaticType}; - use ndc_interpreter::{ - environment::Environment, function::FunctionBody, iterator::mut_value_to_iterator, - value::Value, - }; - - pub fn register(env: &mut Environment) { - env.declare_global_fn( - FunctionBuilder::default() - .name("zip".to_string()) - .documentation("Combines multiple sequences (or iterables) into a single sequence of tuples, where the ith tuple contains the ith element from each input sequence.\n\nIf the input sequences are of different lengths, the resulting sequence is truncated to the length of the shortest input.".to_string()) - .body(FunctionBody::generic( - ndc_interpreter::function::TypeSignature::Variadic, - StaticType::List(Box::new(StaticType::Tuple(vec![StaticType::Any, StaticType::Any]))), - |args, _env| match args { - [_] => { - Err(anyhow!("zip must be called with 2 or more arguments").into()) - } - [a, b] => { - let a = mut_value_to_iterator(a)?; - let b = mut_value_to_iterator(b)?; - let out = a - .zip(b) - .map(|(a, b)| Value::tuple(vec![a, b])) - .collect::>(); - Ok(Value::list(out)) - } - [a, b, c] => { - let a = mut_value_to_iterator(a)?; - let b = mut_value_to_iterator(b)?; - let c = mut_value_to_iterator(c)?; - let mut out = Vec::new(); - for (a, b, c) in izip!(a, b, c) { - out.push(Value::tuple(vec![a, b, c])); - } - Ok(Value::list(out)) - } - values => { - // HOLY HEAP ALLOCATION BATMAN! - // This branch can probably lose some heap allocations if I had 50 more IQ points - let mut lists = Vec::with_capacity(values.len()); - for value in values.iter_mut() { - lists.push(mut_value_to_iterator(value)?.collect::>()); - } - let out_len = lists.iter().map(Vec::len).min().unwrap(); - let mut out = Vec::with_capacity(out_len); - - for idx in 0..out_len { - let mut tup = Vec::with_capacity(lists.len()); - for (list_idx, _) in lists.iter().enumerate() { - tup.insert(list_idx, lists[list_idx][idx].clone()); - } - out.insert(idx, Value::tuple(tup)); - } - - Ok(Value::list(out)) - } + use ndc_core::{FunctionRegistry, StaticType}; + use ndc_vm::error::VmError; + use ndc_vm::value::{NativeFunc, NativeFunction, Object, Value}; + use std::rc::Rc; + + pub fn register(env: &mut FunctionRegistry>) { + env.declare_global_fn(Rc::new(NativeFunction { + name: "zip".to_string(), + documentation: Some("Zips two or more sequences together into a list of tuples. The result length is the minimum length of the inputs.".to_string()), + static_type: StaticType::Function { + parameters: None, + return_type: Box::new(StaticType::List(Box::new(StaticType::Tuple(vec![ + StaticType::Any, + StaticType::Any, + ])))), + }, + func: NativeFunc::Simple(Box::new(|args| { + if args.len() < 2 { + return Err(VmError::native( + "zip must be called with 2 or more arguments".to_string(), + )); + } + let iters: Option>> = args + .iter() + .map(|arg| match arg { + Value::Object(obj) => match obj.as_ref() { + Object::List(l) => Some(l.borrow().clone()), + Object::Tuple(t) => Some(t.to_vec()), + _ => None, }, - )) - .build() - .expect("function definitions must be valid"), - ); + _ => None, + }) + .collect(); + let iters = + iters.ok_or_else(|| VmError::native("zip requires sequences".to_string()))?; + let min_len = iters.iter().map(|v| v.len()).min().unwrap_or(0); + let result: Vec = (0..min_len) + .map(|i| { + Value::Object(Rc::new(Object::Tuple( + iters.iter().map(|v| v[i].clone()).collect(), + ))) + }) + .collect(); + Ok(Value::Object(Rc::new(Object::list(result)))) + })), + })); } } diff --git a/ndc_stdlib/src/serde.rs b/ndc_stdlib/src/serde.rs index 19e6b9f5..2b2783b4 100644 --- a/ndc_stdlib/src/serde.rs +++ b/ndc_stdlib/src/serde.rs @@ -1,53 +1,44 @@ -use ndc_macros::export_module; -use std::rc::Rc; -use std::{cell::RefCell, str::FromStr}; - use anyhow::Context; -use ndc_interpreter::hash_map::HashMap; -use ndc_interpreter::sequence::Sequence; -use ndc_interpreter::value::Value; -use num::BigInt; +use ndc_core::hash_map::HashMap; +use ndc_macros::export_module; +use ndc_vm::value::{Object, Value}; use num::ToPrimitive; use serde_json::{Map, Number, Value as JsonValue, json}; +use std::rc::Rc; fn value_to_json(value: Value) -> Result { match value { - Value::Option(Some(value)) => value_to_json(*value), - Value::Option(None) => Ok(JsonValue::Null), - Value::Number(number) => match number { - ndc_interpreter::num::Number::Int(int) => match int { - ndc_interpreter::int::Int::Int64(i) => Ok(json!(i)), - ndc_interpreter::int::Int::BigInt(big_int) => { - Number::from_str(&big_int.to_string()) - .map(JsonValue::Number) - .context("Cannot convert bigint to string") - } - }, - ndc_interpreter::num::Number::Float(f) => Ok(json!(f)), - ndc_interpreter::num::Number::Rational(ratio) => Ok(json!(ratio.to_f64())), - ndc_interpreter::num::Number::Complex(complex) => Ok(json!(format!("{complex}"))), - }, + Value::None => Ok(JsonValue::Null), Value::Bool(b) => Ok(json!(b)), - Value::Sequence(s) => match s { - Sequence::String(s) => Ok(json!(&*s.borrow())), - Sequence::List(values) => Ok(JsonValue::Array( - values - .borrow() + Value::Int(i) => Ok(json!(i)), + Value::Float(f) => Ok(json!(f)), + Value::Object(obj) => match obj.as_ref() { + Object::Some(inner) => value_to_json(inner.clone()), + Object::BigInt(big_int) => { + use std::str::FromStr; + Number::from_str(&big_int.to_string()) + .map(JsonValue::Number) + .context("Cannot convert bigint to JSON number") + } + Object::Rational(ratio) => Ok(json!(ratio.to_f64())), + Object::Complex(complex) => Ok(json!(format!("{complex}"))), + Object::String(s) => Ok(json!(&*s.borrow())), + Object::List(v) => Ok(JsonValue::Array( + v.borrow() .iter() .map(|v| value_to_json(v.clone())) .collect::, _>>()?, )), - Sequence::Tuple(values) => match values.len() { + Object::Tuple(v) => match v.len() { 0 => Ok(JsonValue::Null), _ => Ok(JsonValue::Array( - values - .iter() + v.iter() .map(|v| value_to_json(v.clone())) .collect::, _>>()?, )), }, - Sequence::Map(values, _) => Ok(JsonValue::Object( - values + Object::Map { entries, .. } => Ok(JsonValue::Object( + entries .borrow() .iter() .map(|(key, value)| { @@ -55,34 +46,36 @@ fn value_to_json(value: Value) -> Result { }) .collect::, _>>()?, )), - Sequence::Iterator(i) => { - let mut i = i.borrow_mut(); + Object::Iterator(i) => { let mut out = Vec::new(); - for value in i.by_ref() { - out.push(value_to_json(value)?); + let mut iter = i.borrow_mut(); + while let Some(v) = iter.next() { + out.push(value_to_json(v)?); } Ok(JsonValue::Array(out)) } - Sequence::MaxHeap(h) => Ok(JsonValue::Array( + Object::MaxHeap(h) => Ok(JsonValue::Array( h.borrow() .iter() - .map(|h| value_to_json(h.0.clone())) + .map(|v| value_to_json(v.0.clone())) .collect::, _>>()?, )), - Sequence::MinHeap(h) => Ok(JsonValue::Array( + Object::MinHeap(h) => Ok(JsonValue::Array( h.borrow() .iter() - .map(|h| value_to_json(h.0.0.clone())) + .map(|v| value_to_json(v.0.0.clone())) .collect::, _>>()?, )), - Sequence::Deque(d) => Ok(JsonValue::Array( + Object::Deque(d) => Ok(JsonValue::Array( d.borrow() .iter() .map(|v| value_to_json(v.clone())) .collect::, _>>()?, )), + Object::Function(_) | Object::OverloadSet(_) => { + Err(anyhow::anyhow!("Unable to serialize function")) + } }, - Value::Function(_) => Err(anyhow::anyhow!("Unable to serialize function")), } } @@ -90,34 +83,32 @@ fn json_to_value(value: JsonValue) -> Result { Ok(match value { JsonValue::Null => Value::unit(), JsonValue::Bool(b) => Value::Bool(b), - JsonValue::Number(n) => n.as_str().parse::().map(Value::from).or_else(|_| { - n.as_f64() - .map(Value::from) - .context("Cannot parse number as int or float") - })?, + JsonValue::Number(n) => { + if let Some(i) = n.as_i64() { + Value::Int(i) + } else if let Some(f) = n.as_f64() { + Value::Float(f) + } else { + return Err(anyhow::anyhow!("Cannot parse JSON number")); + } + } JsonValue::String(s) => Value::string(s), JsonValue::Array(a) => Value::list( a.into_iter() .map(json_to_value) .collect::, _>>()?, ), - JsonValue::Object(o) => Value::Sequence(Sequence::Map( - Rc::new(RefCell::new( - o.into_iter() - .map(|(key, value)| { - json_to_value(value).map(|value| (Value::string(key), value)) - }) - .collect::, _>>()?, - )), + JsonValue::Object(o) => Value::Object(Rc::new(Object::map( + o.into_iter() + .map(|(key, value)| json_to_value(value).map(|value| (Value::string(key), value))) + .collect::, _>>()?, None, - )), + ))), }) } #[export_module] mod inner { - use ndc_interpreter::value::Value; - /// Converts a JSON string to a value pub fn json_decode(input: &str) -> anyhow::Result { let json: JsonValue = serde_json::from_str(input)?; diff --git a/ndc_stdlib/src/string.rs b/ndc_stdlib/src/string.rs index ce26a1b3..6e2c061c 100644 --- a/ndc_stdlib/src/string.rs +++ b/ndc_stdlib/src/string.rs @@ -1,15 +1,18 @@ use ndc_macros::export_module; +use ndc_vm::value::{SeqValue, Value}; -use ndc_interpreter::iterator::mut_seq_to_iterator; -use ndc_interpreter::sequence::{Sequence, StringRepr}; -use ndc_interpreter::value::Value; +use std::cell::RefCell; use std::rc::Rc; +type StringRepr = Rc>; + use anyhow::{Context, anyhow}; -use std::fmt::Write; -pub fn join_to_string(list: &mut Sequence, sep: &str) -> anyhow::Result { - let mut iter = mut_seq_to_iterator(list); +pub fn join_to_string(list: SeqValue, sep: &str) -> anyhow::Result { + use std::fmt::Write; + let mut iter = list + .try_into_iter() + .ok_or_else(|| anyhow!("join requires a sequence"))?; match iter.next() { None => Ok(String::new()), Some(first) => { @@ -26,24 +29,26 @@ pub fn join_to_string(list: &mut Sequence, sep: &str) -> anyhow::Result #[export_module] mod inner { - /// The string concat operator + /// Concatenates two values into a string. #[function(name = "<>")] - pub fn op_string_concat(left: &Value, right: &Value) -> String { + pub fn op_string_concat(left: Value, right: Value) -> String { format!("{left}{right}") } + /// Appends the right string to the left string in place. #[function(name = "++=")] - pub fn op_list_concat(left: &mut StringRepr, right: &mut StringRepr) { + pub fn op_list_concat(left: &mut StringRepr, right: &mut StringRepr) -> Value { if Rc::ptr_eq(left, right) { let new = right.borrow().repeat(2).clone(); *left.borrow_mut() = new; } else { left.borrow_mut().push_str(&right.borrow()) } + Value::from_string_rc(Rc::clone(left)) } /// Returns the provided value as a string - pub fn string(value: &Value) -> String { + pub fn string(value: Value) -> String { format!("{value}") } @@ -100,54 +105,54 @@ mod inner { string.push_str(value); } - // TODO: should we optimize something here? + /// Concatenates two strings into a new string. #[function(name = "++")] pub fn concat(left: &str, right: &str) -> String { format!("{left}{right}") } /// Joins elements of the sequence into a single string using `sep` as the separator. - pub fn join(list: &mut Sequence, sep: &str) -> anyhow::Result { + pub fn join(list: SeqValue, sep: &str) -> anyhow::Result { join_to_string(list, sep) } /// Splits the string into paragraphs, using blank lines as separators. #[function(return_type = Vec)] pub fn paragraphs(string: &str) -> Value { - Value::collect_list(string.split("\n\n").map(ToString::to_string)) + Value::list(string.split("\n\n").map(Value::string).collect()) } /// Joins paragraphs into a single string, inserting blank lines between them. - pub fn unparagraphs(list: &mut Sequence) -> anyhow::Result { + pub fn unparagraphs(list: SeqValue) -> anyhow::Result { join_to_string(list, "\n\n") } /// Splits the string into lines, using newline characters as separators. #[function(return_type = Vec)] pub fn lines(string: &str) -> Value { - Value::collect_list(string.lines().map(ToString::to_string)) + Value::list(string.lines().map(Value::string).collect()) } /// Joins lines into a single string, inserting newline characters between them. - pub fn unlines(list: &mut Sequence) -> anyhow::Result { + pub fn unlines(list: SeqValue) -> anyhow::Result { join_to_string(list, "\n") } /// Splits the string into words, using whitespace as the separator. #[function(return_type = Vec)] pub fn words(string: &str) -> Value { - Value::collect_list(string.split_whitespace().map(ToString::to_string)) + Value::list(string.split_whitespace().map(Value::string).collect()) } /// Joins words into a single string, separating them with spaces. - pub fn unwords(list: &mut Sequence) -> anyhow::Result { + pub fn unwords(list: SeqValue) -> anyhow::Result { join_to_string(list, " ") } /// Splits the string by whitespace into a list of substrings. #[function(return_type = Vec)] pub fn split(string: &str) -> Value { - Value::collect_list(string.split_whitespace().map(ToString::to_string)) + Value::list(string.split_whitespace().map(Value::string).collect()) } /// Returns `true` if `haystack` starts with `pat`. @@ -163,13 +168,7 @@ mod inner { /// Splits the string using a given pattern as the delimiter. #[function(name = "split", return_type = Vec)] pub fn split_with_pattern(string: &str, pattern: &str) -> Value { - Value::list( - string - .split(pattern) - .map(ToString::to_string) - .map(Value::string) - .collect::>(), - ) + Value::list(string.split(pattern).map(Value::string).collect()) } /// Splits the string at the first occurrence of `pattern`, returning a tuple-like value. diff --git a/ndc_stdlib/src/value.rs b/ndc_stdlib/src/value.rs index 13e21bb7..747099af 100644 --- a/ndc_stdlib/src/value.rs +++ b/ndc_stdlib/src/value.rs @@ -1,131 +1,82 @@ use ndc_macros::export_module; -use std::fmt::Write; +use ndc_vm::value::{Object, Value}; #[export_module] mod inner { - use ndc_interpreter::function::Callable; - use ndc_interpreter::heap::{MaxHeap, MinHeap}; - use ndc_interpreter::sequence::Sequence; - use ndc_interpreter::value::Value; - use std::cell::RefCell; use std::rc::Rc; - /// Returns the documentation as a string for a given function in Andy C++. - /// - /// This function takes a function as its argument and returns a string containing its documentation. - pub fn docs(func: &Callable<'_>) -> anyhow::Result { - let mut buf = String::new(); - - let sig = func.function.type_signature(); - let fun = &func.function; - - if fun.name().is_empty() { - write!(buf, "fn({sig})")?; - } else { - write!(buf, "fn {}({sig})", fun.name())?; - } - - if !fun.short_documentation().is_empty() { - writeln!(buf, " -> {}", fun.short_documentation())?; - } else { - writeln!(buf)?; + /// Returns the documentation string for a function, or an empty string if none is available. + pub fn docs(value: Value) -> anyhow::Result { + match &value { + Value::Object(obj) => match obj.as_ref() { + Object::Function(f) => Ok(f.documentation().unwrap_or("").to_string()), + _ => Err(anyhow::anyhow!( + "docs requires a function, got {}", + value.static_type() + )), + }, + _ => Err(anyhow::anyhow!( + "docs requires a function, got {}", + value.static_type() + )), } - - buf.pop(); // Remove last newline - - Ok(buf) } - /// Returns the reference count for the value, if the value is not reference counted it will return 0 + /// Returns the reference count for the value, if the value is not reference counted it will return 0. /// /// Note: this function does increase the ref count by 1 pub fn ref_count(value: Value) -> usize { match value { - Value::Option(_) | Value::Number(_) | Value::Bool(_) => 0, - Value::Sequence(seq) => match seq { - Sequence::String(rc) => Rc::strong_count(&rc), - Sequence::List(rc) => Rc::strong_count(&rc), - Sequence::Tuple(rc) => Rc::strong_count(&rc), - Sequence::Map(rc, _) => Rc::strong_count(&rc), - Sequence::Iterator(rc) => Rc::strong_count(&rc), - Sequence::MaxHeap(rc) => Rc::strong_count(&rc), - Sequence::MinHeap(rc) => Rc::strong_count(&rc), - Sequence::Deque(rc) => Rc::strong_count(&rc), - }, - Value::Function(r) => Rc::strong_count(&r), + Value::Object(rc) => Rc::strong_count(&rc), + _ => 0, } } /// Creates a new instance of `Some` - #[function(name = "Some", return_type = Option)] // <-- fake type constructor + #[function(name = "Some", return_type = Option)] pub fn some(value: Value) -> Value { - Value::Option(Some(Box::new(value))) + Value::Object(Rc::new(Object::Some(value))) } /// Creates a new instance of `None` #[function(return_type = Option<_>)] pub fn none() -> Value { - Value::Option(None) + Value::None } - /// Returns true if the argument is Some - pub fn is_some(value: &Value) -> bool { - matches!(value, Value::Option(Some(_))) + /// Returns true if the argument is `Some`. + pub fn is_some(value: Value) -> bool { + matches!( + value, + Value::Object(ref obj) + if matches!(obj.as_ref(), Object::Some(_)) + ) } - /// Returns true if the argument is None - pub fn is_none(value: &Value) -> bool { - matches!(value, Value::Option(None)) + /// Returns true if the argument is `None`. + pub fn is_none(value: Value) -> bool { + matches!(value, Value::None) } /// Extracts the value from an Option or errors if it's either None or a non-Option type - /// - /// Note: this function should take an Option as parameter - // TODO: the type of value should be `Option` but the macro crate probably doesn't support that yet pub fn unwrap(value: Value) -> anyhow::Result { match value { - Value::Option(Some(val)) => Ok(*val), - Value::Option(None) => Err(anyhow::anyhow!("option was none")), - _ => Err(anyhow::anyhow!( - "incorrect argument to unwrap (temporary error)" - )), + Value::Object(obj) => match obj.as_ref() { + Object::Some(inner) => Ok(inner.clone()), + _ => Err(anyhow::anyhow!("incorrect argument to unwrap")), + }, + Value::None => Err(anyhow::anyhow!("option was none")), + _ => Err(anyhow::anyhow!("incorrect argument to unwrap")), } } /// Returns a shallow copy of the given value. - pub fn clone(value: &Value) -> Value { - match value { - Value::Option(o) => Value::Option(o.clone()), - number @ Value::Number(_) => number.clone(), - Value::Bool(b) => Value::Bool(*b), - Value::Sequence(Sequence::String(string)) => Value::string(string.borrow().to_owned()), - Value::Sequence(Sequence::List(list)) => Value::list(list.borrow().to_owned()), - Value::Sequence(Sequence::Map(map, default)) => Value::Sequence(Sequence::Map( - Rc::new(RefCell::new(map.borrow().clone())), - default.to_owned(), - )), - Value::Sequence(Sequence::Tuple(tuple)) => { - Value::Sequence(Sequence::Tuple(tuple.clone())) - } - Value::Sequence(Sequence::Iterator(iterator)) => { - Value::Sequence(Sequence::Iterator(iterator.clone())) - } - Value::Sequence(Sequence::MaxHeap(heap)) => Value::Sequence(Sequence::MaxHeap( - Rc::new(RefCell::new(MaxHeap::from_heap(heap.borrow().to_owned()))), - )), - Value::Sequence(Sequence::MinHeap(heap)) => Value::Sequence(Sequence::MinHeap( - Rc::new(RefCell::new(MinHeap::from_heap(heap.borrow().to_owned()))), - )), - Value::Sequence(Sequence::Deque(deque)) => Value::Sequence(Sequence::Deque(Rc::new( - RefCell::new(deque.borrow().to_owned()), - ))), - // TODO: for function should deepcopy have some special behavior - Value::Function(f) => Value::Function(f.clone()), - } + pub fn clone(value: Value) -> Value { + value.shallow_clone() } /// Returns a deep copy of the given value, duplicating all nested structures. - pub fn deepcopy(value: &Value) -> Value { - value.deepcopy() + pub fn deepcopy(value: Value) -> Value { + value.deep_copy() } } diff --git a/ndc_vm/Cargo.toml b/ndc_vm/Cargo.toml new file mode 100644 index 00000000..da351e2a --- /dev/null +++ b/ndc_vm/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "ndc_vm" +edition.workspace = true +version.workspace = true + +[features] +vm-trace = [] + +[dependencies] +thiserror.workspace = true +ndc_parser.workspace = true +ndc_lexer.workspace = true +ndc_core.workspace = true +num.workspace = true +ordered-float.workspace = true diff --git a/ndc_vm/src/chunk.rs b/ndc_vm/src/chunk.rs new file mode 100644 index 00000000..9e04d9dd --- /dev/null +++ b/ndc_vm/src/chunk.rs @@ -0,0 +1,231 @@ +use crate::Value; +use ndc_lexer::Span; +use ndc_parser::CaptureSource; +use std::rc::Rc; + +/// A single bytecode instruction. +/// +/// ## Stack effects +/// +/// Each instruction documents its net stack effect as `[before → after]`. +/// Variable-size operands use `n` for the instruction's argument. +/// +/// | Instruction | Stack effect | Notes | +/// |---------------|-----------------------------------------|--------------------------------------------| +/// | `Constant` | `[… → … value]` | +1 | +/// | `Pop` | `[… value → …]` | −1 | +/// | `GetLocal` | `[… → … value]` | +1 (copies from slot) | +/// | `GetUpvalue` | `[… → … value]` | +1 (reads upvalue cell) | +/// | `GetGlobal` | `[… → … value]` | +1 (copies from globals) | +/// | `SetLocal` | `[… value → …]` | −1 (pops, writes to slot†) | +/// | `SetUpvalue` | `[… value → …]` | −1 (pops, writes to upvalue cell) | +/// | `Call` | `[… callee a1…an → … result]` | −n (pops callee + args, pushes result) | +/// | `Return` | `[… retval → …]` | pops retval, truncates frame, pushes retval| +/// | `Halt` | `[…]` | terminates execution | +/// | `Jump` | `[…]` | 0 (unconditional jump) | +/// | `JumpIfTrue` | `[… bool → … bool]` | 0 (peeks, jumps if true) | +/// | `JumpIfFalse` | `[… bool → … bool]` | 0 (peeks, jumps if false) | +/// | `MakeList` | `[… v1…vn → … list]` | −(n−1) | +/// | `MakeTuple` | `[… v1…vn → … tuple]` | −(n−1) | +/// | `MakeMap` | `[… k1 v1…kn vn (default?) → … map]` | −(2n−1) or −2n if has_default | +/// | `MakeRange` | `[… start (end?) → … iter]` | −1 if bounded, 0 if unbounded | +/// | `Closure` | `[… → … closure]` | +1 | +/// | `GetIterator` | `[… value → … iter]` | 0 (pops value, pushes iterator) | +/// | `IterNext` | `[… iter → … iter value]` or jump | +1 if has next, 0 + jump if exhausted | +/// | `ListPush` | `[… value → …]` | −1 (pops value, mutates list in slot) | +/// | `MapInsert` | `[… key value → …]` | −2 (pops both, mutates map in slot) | +/// | `Unpack` | `[… compound → … v1…vn]` | +(n−1) (pops 1, pushes n) | +/// | `CloseUpvalue`| `[…]` | 0 (closes upvalue cells, no stack change) | +/// | `Memoize` | `[… fn → … memoized_fn]` | 0 (pops and pushes) | +/// +/// † `SetLocal` for a **declaration** (slot == stack top) is effectively a no-op on the +/// stack: it pops then immediately pushes to extend. For a **reassignment** (slot < top) +/// it truly shrinks the stack by 1. +// NOTE: OpCode cannot be Copy because the Closure variant holds Rc<[CaptureSource]>. +// The dispatch loop accesses opcodes by reference to avoid cloning the 32-byte enum on +// every iteration; see Vm::run_to_depth. +#[derive(Clone, PartialEq, Eq)] +pub enum OpCode { + /// Pops callee and `n` arguments, pushes result. `[… callee a1…an → … result]` + Call(usize), + /// Pops top of stack. `[… value → …]` + Pop, + /// Unconditional jump. `[…] → […]` + Jump(isize), + /// Peeks top; jumps if true. `[… bool → … bool]` + JumpIfTrue(isize), + /// Peeks top; jumps if false. `[… bool → … bool]` + JumpIfFalse(isize), + /// Pushes a constant. `[… → … value]` + Constant(usize), + /// Copies local slot onto stack. `[… → … value]` + GetLocal(usize), + /// Reads upvalue cell onto stack. `[… → … value]` + GetUpvalue(usize), + /// Pops top and writes to local slot. `[… value → …]` + SetLocal(usize), + /// Pops top and writes to upvalue cell. `[… value → …]` + SetUpvalue(usize), + /// Copies global slot onto stack. `[… → … value]` + GetGlobal(usize), + /// Pops `n` values, pushes a list. `[… v1…vn → … list]` + MakeList(usize), + /// Pops `n` values, pushes a tuple. `[… v1…vn → … tuple]` + MakeTuple(usize), + /// Pops `2n` values (+ optional default), pushes a map. + MakeMap { pairs: usize, has_default: bool }, + /// Pushes a closure, capturing values from locals/upvalues. `[… → … closure]` + Closure { + constant_idx: usize, + values: Rc<[CaptureSource]>, + }, + /// Pops value, pushes iterator. No-op if already an iterator. `[… value → … iter]` + GetIterator, + /// Peeks iterator; pushes next value or jumps if exhausted. `[… iter → … iter value]` + IterNext(isize), + /// Pops value, appends to list at local slot. `[… value → …]` + ListPush(usize), + /// Pops value then key, inserts into map at local slot. `[… key value → …]` + MapInsert(usize), + /// Pops start (and end if bounded), pushes range iterator. + MakeRange { inclusive: bool, bounded: bool }, + /// Pops a compound value, pushes `n` elements. `[… compound → … v1…vn]` + Unpack(usize), + /// Terminates execution. + Halt, + /// Returns from function call. Pops return value, truncates frame, pushes return value. + Return, + /// Closes open upvalues at or above `frame_pointer + slot`. No stack change. + CloseUpvalue(usize), + /// Pops function, pushes memoized wrapper. `[… fn → … memoized_fn]` + Memoize, +} + +impl std::fmt::Debug for OpCode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Call(n) => write!(f, "Call({n})"), + Self::Pop => write!(f, "Pop"), + Self::Jump(n) => write!(f, "Jump({n})"), + Self::JumpIfTrue(n) => write!(f, "JumpIfTrue({n})"), + Self::JumpIfFalse(n) => write!(f, "JumpIfFalse({n})"), + Self::Constant(n) => write!(f, "Constant({n})"), + Self::GetLocal(n) => write!(f, "GetLocal({n})"), + Self::SetLocal(n) => write!(f, "SetLocal({n})"), + Self::GetGlobal(n) => write!(f, "GetGlobal({n})"), + Self::GetUpvalue(n) => write!(f, "GetUpvalue({n})"), + Self::SetUpvalue(n) => write!(f, "SetUpvalue({n})"), + Self::MakeList(n) => write!(f, "MakeList({n})"), + Self::MakeTuple(n) => write!(f, "MakeTuple({n})"), + Self::MakeMap { pairs, has_default } => { + write!(f, "MakeMap({pairs}, default={has_default})") + } + Self::Closure { + constant_idx, + values, + } => { + write!(f, "Closure({constant_idx}")?; + for cap in values.iter() { + match cap { + CaptureSource::Local(n) => write!(f, ", local({n})")?, + CaptureSource::Upvalue(n) => write!(f, ", upvalue({n})")?, + } + } + write!(f, ")") + } + Self::GetIterator => write!(f, "GetIterator"), + Self::IterNext(n) => write!(f, "IterNext({n})"), + Self::ListPush(n) => write!(f, "ListPush({n})"), + Self::MapInsert(n) => write!(f, "MapInsert({n})"), + Self::MakeRange { inclusive, bounded } => { + write!(f, "MakeRange(inclusive={inclusive}, bounded={bounded})") + } + Self::Halt => write!(f, "Halt"), + Self::Return => write!(f, "Return"), + Self::Unpack(n) => write!(f, "Unpack({n})"), + Self::CloseUpvalue(n) => write!(f, "CloseUpvalue({n})"), + Self::Memoize => write!(f, "Memoize"), + } + } +} + +/// A chunk of bytecode along with the constants it references. +#[derive(Default, Clone)] +pub struct Chunk { + constants: Vec, + code: Vec, + spans: Vec, +} + +impl Chunk { + pub fn len(&self) -> usize { + self.code.len() + } + + pub fn add_constant(&mut self, value: Value) -> usize { + self.constants.push(value); + self.constants.len() - 1 + } + + pub fn write(&mut self, op: OpCode, span: Span) -> usize { + self.code.push(op); + self.spans.push(span); + self.code.len() - 1 + } + + pub fn patch_jump(&mut self, op_idx: usize) { + let offset = + isize::try_from(self.code.len() - op_idx - 1).expect("jump too large to patch"); + match self.code.get_mut(op_idx) { + Some( + OpCode::JumpIfFalse(n) + | OpCode::JumpIfTrue(n) + | OpCode::Jump(n) + | OpCode::IterNext(n), + ) => *n = offset, + _ => panic!("expected a patchable jump instruction at index {op_idx}"), + } + } + + /// Emits a `Jump` that goes back to `target` (a previously recorded chunk offset). + pub fn write_jump_back(&mut self, target: usize, span: Span) -> usize { + let offset = + -isize::try_from(self.len() - target + 1).expect("loop too large to jump back"); + self.write(OpCode::Jump(offset), span) + } + + pub fn is_empty(&self) -> bool { + self.code.is_empty() + } + #[inline(always)] + pub fn opcode(&self, idx: usize) -> &OpCode { + &self.code[idx] + } + + pub fn opcodes(&self) -> &[OpCode] { + &self.code + } + + pub fn constant(&self, idx: usize) -> &Value { + &self.constants[idx] + } + + pub fn span(&self, ip: usize) -> Span { + self.spans[ip] + } + + /// Iterates opcodes as `(index, opcode, constant_value)` where `constant_value` + /// is `Some` for `Constant(idx)` and `Closure(idx)` opcodes. + pub fn iter(&self) -> impl Iterator)> { + self.code.iter().enumerate().map(|(i, op)| { + let val = match op { + OpCode::Constant(idx) + | OpCode::Closure { + constant_idx: idx, .. + } => Some(&self.constants[*idx]), + _ => None, + }; + (i, op, val) + }) + } +} diff --git a/ndc_vm/src/compiler.rs b/ndc_vm/src/compiler.rs new file mode 100644 index 00000000..60306860 --- /dev/null +++ b/ndc_vm/src/compiler.rs @@ -0,0 +1,921 @@ +use crate::chunk::{Chunk, OpCode}; +use crate::value::{CompiledFunction, Function}; +use crate::{Object, Value}; +use ndc_core::{StaticType, TypeSignature}; +use ndc_lexer::Span; +use ndc_parser::{ + Binding, CaptureSource, Expression, ExpressionLocation, ForBody, ForIteration, LogicalOperator, + Lvalue, ResolvedVar, +}; +use std::rc::Rc; + +#[derive(Default, Clone)] +pub struct Compiler { + chunk: Chunk, + num_locals: usize, + loop_stack: Vec, + allow_return: bool, +} + +impl Compiler { + pub fn compile( + expressions: impl Iterator, + ) -> Result { + Ok(Self::compile_resumable(expressions)?.0) + } + + /// Compile expressions and return both the finished function and a + /// checkpoint that can be passed to `resume` to append more code later. + /// The checkpoint is the compiler state *before* the `Halt` instruction, + /// so `resume` can extend the bytecode without re-running old instructions. + pub fn compile_resumable( + expressions: impl Iterator, + ) -> Result<(CompiledFunction, Self), CompileError> { + let mut compiler = Self::default(); + for expr_loc in expressions { + compiler.compile_expr(expr_loc)?; + } + compiler.finish() + } + + /// Resume from a checkpoint produced by `compile_resumable` or a previous + /// `resume` call. Compiles `new_expressions` starting where the checkpoint + /// left off, returning the extended function and a new checkpoint. + /// + /// The returned `CompiledFunction` contains all instructions (old + new), + /// so the VM can be pointed at `checkpoint.halt_ip()` to execute only the + /// new part while the stack already holds the old locals. + pub fn resume( + self, + new_expressions: impl Iterator, + ) -> Result<(CompiledFunction, Self), CompileError> { + let mut compiler = self; // checkpoint has no trailing Halt + for expr_loc in new_expressions { + compiler.compile_expr(expr_loc)?; + } + compiler.finish() + } + + /// The instruction index where the trailing `Halt` was written. + /// When resuming, this is the `ip` to start from in the new function. + pub fn halt_ip(&self) -> usize { + self.chunk.len() + } + + /// Number of top-level local slots used so far. + pub fn num_locals(&self) -> usize { + self.num_locals + } + + /// Internal: clone a checkpoint (pre-Halt), write Halt, return both. + fn finish(mut self) -> Result<(CompiledFunction, Self), CompileError> { + let checkpoint = self.clone(); + self.chunk.write(OpCode::Halt, Span::new(0, 0)); + let function = CompiledFunction { + name: None, + static_type: StaticType::Function { + parameters: Some(vec![]), + return_type: Box::new(StaticType::Any), + }, + body: self.chunk, + num_locals: self.num_locals, + }; + Ok((function, checkpoint)) + } + + fn compile_expr( + &mut self, + expression_location: ExpressionLocation, + ) -> Result<(), CompileError> { + let ExpressionLocation { expression, span } = expression_location; + match expression { + Expression::BoolLiteral(b) => { + let idx = self.chunk.add_constant(Value::Bool(b)); + self.chunk.write(OpCode::Constant(idx), span); + } + Expression::StringLiteral(s) => { + let idx = self.chunk.add_constant(Value::string(s)); + self.chunk.write(OpCode::Constant(idx), span); + } + Expression::Int64Literal(i) => { + let idx = self.chunk.add_constant(Value::int(i)); + self.chunk.write(OpCode::Constant(idx), span); + } + Expression::Float64Literal(f) => { + let idx = self.chunk.add_constant(Value::float(f)); + self.chunk.write(OpCode::Constant(idx), span); + } + Expression::BigIntLiteral(i) => { + let idx = self.chunk.add_constant(Value::bigint(i)); + self.chunk.write(OpCode::Constant(idx), span); + } + Expression::ComplexLiteral(c) => { + let idx = self.chunk.add_constant(Value::complex(c)); + self.chunk.write(OpCode::Constant(idx), span); + } + Expression::Identifier { name, resolved } => { + if name == "None" { + let idx = self.chunk.add_constant(Value::None); + self.chunk.write(OpCode::Constant(idx), span); + } else { + self.compile_binding(resolved, span)?; + } + } + Expression::Statement(stm) => { + let needs_pop = produces_value(&stm.expression); + self.compile_expr(*stm)?; + if needs_pop { + self.chunk.write(OpCode::Pop, Span::new(0, 0)); + } + } + Expression::Logical { + left, + right, + operator, + } => { + let left_span = left.span; + self.compile_expr(*left)?; + match operator { + LogicalOperator::And => { + let end_jump = self.chunk.write(OpCode::JumpIfFalse(0), left_span); + self.chunk.write(OpCode::Pop, Span::new(0, 0)); + self.compile_expr(*right)?; + self.chunk.patch_jump(end_jump); + } + LogicalOperator::Or => { + let end_jump = self.chunk.write(OpCode::JumpIfTrue(0), left_span); + self.chunk.write(OpCode::Pop, Span::new(0, 0)); + self.compile_expr(*right)?; + self.chunk.patch_jump(end_jump); + } + } + } + Expression::VariableDeclaration { value, l_value } => { + self.compile_expr(*value)?; + self.compile_declare_lvalue(l_value, span)?; + } + Expression::Assignment { + l_value, + r_value: value, + } => match l_value { + Lvalue::Index { + value: container, + index, + resolved_set, + .. + } => { + let set_fn = resolved_set.expect("[]= must be resolved"); + self.compile_binding(set_fn, span)?; + self.compile_expr(*container)?; + self.compile_expr(*index)?; + self.compile_expr(*value)?; + self.chunk.write(OpCode::Call(3), span); + } + l_value @ Lvalue::Identifier { .. } => { + self.compile_expr(*value)?; + self.compile_lvalue(l_value, span)?; + let idx = self.chunk.add_constant(Value::unit()); + self.chunk.write(OpCode::Constant(idx), Span::new(0, 0)); + } + Lvalue::Sequence(seq) => { + self.compile_expr(*value)?; + self.chunk.write(OpCode::Unpack(seq.len()), span); + for l_value in seq { + self.compile_lvalue(l_value, span)?; + } + let idx = self.chunk.add_constant(Value::unit()); + self.chunk.write(OpCode::Constant(idx), Span::new(0, 0)); + } + }, + Expression::OpAssignment { + l_value, + r_value, + resolved_assign_operation, + resolved_operation, + .. + } => { + match l_value { + Lvalue::Identifier { + resolved, + span: lv_span, + .. + } => { + let var = resolved.expect("lvalue must be resolved"); + if matches!(resolved_assign_operation, Binding::Resolved(_)) { + // In-place operation (e.g. |=, &=) resolved exactly: modifies + // the value's Rc in place via sync_map_mutations in the bridge, + // so all aliases sharing the Rc see the change. We discard the + // unit return value; the variable slot already holds the + // (now-updated) shared reference. + self.compile_binding(resolved_assign_operation, span)?; + self.emit_get_var(var, lv_span); + self.compile_expr(*r_value)?; + self.chunk.write(OpCode::Call(2), span); + self.chunk.write(OpCode::Pop, span); + } else if let Binding::Dynamic(assign_candidates) = + resolved_assign_operation + { + // Assign-op exists but type was unknown at compile time (Any). + // Build a merged overload set: assign-op candidates first so they + // win for map/string/list args, then binary-op candidates as + // fallback for numeric args. Assign-ops return lhs so SET_VAR + // stores a meaningful value; sync_map_mutations propagates in-place + // changes to VM Rcs via the bridge. + let binary_candidates = match resolved_operation { + Binding::Dynamic(c) => c, + Binding::Resolved(v) => vec![v], + Binding::None => vec![], + }; + let merged: Vec<_> = assign_candidates + .into_iter() + .chain(binary_candidates) + .collect(); + self.compile_binding(Binding::Dynamic(merged), span)?; + self.emit_get_var(var, lv_span); + self.compile_expr(*r_value)?; + self.chunk.write(OpCode::Call(2), span); + self.emit_set_var(var, lv_span); + } else { + // No exact in-place op: call the regular operation and store result. + self.compile_binding(resolved_operation, span)?; + self.emit_get_var(var, lv_span); + self.compile_expr(*r_value)?; + self.chunk.write(OpCode::Call(2), span); + self.emit_set_var(var, lv_span); + } + } + Lvalue::Index { + value, + index, + resolved_get, + resolved_set, + } => { + // let getter = ; + let container_span = value.span; + let index_span = index.span; + + let tmp_container = self.num_locals; + let tmp_index = self.num_locals + 1; + self.num_locals += 2; + + self.compile_expr(*value)?; + self.chunk + .write(OpCode::SetLocal(tmp_container), container_span); + self.compile_expr(*index)?; + self.chunk.write(OpCode::SetLocal(tmp_index), index_span); + + self.compile_binding( + resolved_set.expect("[]= must be resolved"), + container_span.merge(index_span), + )?; + self.chunk + .write(OpCode::GetLocal(tmp_container), container_span); + self.chunk.write(OpCode::GetLocal(tmp_index), index_span); + + self.compile_binding(resolved_operation, span)?; + self.compile_binding( + resolved_get.expect("[] must be resolved"), + index_span, + )?; + self.chunk + .write(OpCode::GetLocal(tmp_container), container_span); + self.chunk.write(OpCode::GetLocal(tmp_index), index_span); + self.chunk.write(OpCode::Call(2), span); // [](container, index) → current_value + self.compile_expr(*r_value)?; + self.chunk.write(OpCode::Call(2), span); // op(current_value, r_value) → new_value + self.chunk.write(OpCode::Call(3), span); // []=(container, index, new_value) + self.chunk.write(OpCode::Pop, span); // discard []= result; common code below pushes unit + } + Lvalue::Sequence(_) => { + return Err(CompileError::lvalue_required_to_be_single_identifier(span)); + } + } + let idx = self.chunk.add_constant(Value::unit()); + self.chunk.write(OpCode::Constant(idx), span); + } + Expression::FunctionDeclaration { + name, + resolved_name, + body, + type_signature, + return_type, + captures, + pure, + .. + } => { + self.compile_function_decl( + name, + resolved_name, + *body, + type_signature, + return_type, + captures, + pure, + span, + )?; + } + Expression::Grouping(statements) => { + self.compile_expr(*statements)?; + } + Expression::Block { statements } => { + self.compile_block(statements, span)?; + } + Expression::If { + condition, + on_true, + on_false, + } => { + self.compile_if(*condition, *on_true, on_false.map(|e| *e), span)?; + } + Expression::While { + expression: condition, + loop_body, + } => { + self.compile_while(*condition, *loop_body, span)?; + } + Expression::For { iterations, body } => { + self.compile_for(iterations, *body, span)?; + } + Expression::Call { + function, + arguments, + } => { + let function_span = function.span; + self.compile_expr(*function)?; + + let argument_count = arguments.len(); + for argument in arguments { + self.compile_expr(argument)?; + } + + self.chunk + .write(OpCode::Call(argument_count), function_span); + } + Expression::Tuple { values } => { + let size = values.len(); + for expression in values { + self.compile_expr(expression)?; + } + self.chunk.write(OpCode::MakeTuple(size), span); + } + Expression::List { values } => { + let size = values.len(); + for expression in values { + self.compile_expr(expression)?; + } + self.chunk.write(OpCode::MakeList(size), span); + } + Expression::Map { values, default } => { + let pairs = values.len(); + let has_default = default.is_some(); + for (key, value) in values { + self.compile_expr(key)?; + match value { + Some(v) => self.compile_expr(v)?, + None => { + let idx = self.chunk.add_constant(Value::unit()); + self.chunk.write(OpCode::Constant(idx), Span::new(0, 0)); + } + } + } + if let Some(default) = default { + self.compile_expr(*default)?; + } + self.chunk + .write(OpCode::MakeMap { pairs, has_default }, span); + } + Expression::Return { value } => { + if !self.allow_return { + return Err(CompileError::return_outside_function(span)); + } + self.compile_expr(*value)?; + self.chunk.write(OpCode::Return, span); + } + Expression::Break => { + let idx = self.chunk.write(OpCode::Jump(0), span); // will be backpatched + self.current_loop_context_mut() + .ok_or(CompileError::unexpected_break(span))? + .break_instructions + .push(idx); + } + Expression::Continue => { + self.chunk.write_jump_back( + self.current_loop_context() + .ok_or(CompileError::unexpected_continue(span))? + .start, + span, + ); + } + range @ (Expression::RangeInclusive { .. } | Expression::RangeExclusive { .. }) => { + let (inclusive, start, end) = match range { + Expression::RangeInclusive { start, end } => (true, start, end), + Expression::RangeExclusive { start, end } => (false, start, end), + _ => unreachable!(), + }; + let start = start.expect("unbounded range start not yet supported"); + self.compile_expr(*start)?; + let bounded = end.is_some(); + if let Some(end) = end { + self.compile_expr(*end)?; + } + self.chunk + .write(OpCode::MakeRange { inclusive, bounded }, span); + } + } + + Ok(()) + } + + fn compile_lvalue(&mut self, l_value: Lvalue, span: Span) -> Result<(), CompileError> { + match l_value { + Lvalue::Identifier { + resolved, + span: lv_span, + .. + } => { + self.emit_set_var(resolved.expect("identifiers must be resolved"), lv_span); + } + Lvalue::Index { + value, + index, + resolved_set, + .. + } => { + // Value to store is on top of stack. We need to: + // 1. Save it to a temp slot + // 2. Compile container and index + // 3. Get the value back + // 4. Call []= function + // 5. Pop the return value + + let tmp_value = self.num_locals; + self.num_locals += 1; + self.chunk.write(OpCode::SetLocal(tmp_value), span); + + self.compile_binding(resolved_set.expect("[]= must be resolved"), span)?; + self.compile_expr(*value)?; + self.compile_expr(*index)?; + self.chunk.write(OpCode::GetLocal(tmp_value), span); + self.chunk.write(OpCode::Call(3), span); + self.chunk.write(OpCode::Pop, Span::new(0, 0)); + } + Lvalue::Sequence(seq) => { + self.chunk.write(OpCode::Unpack(seq.len()), span); + for lv in seq { + self.compile_lvalue(lv, span)?; + } + } + } + + Ok(()) + } + + fn compile_declare_lvalue(&mut self, l_value: Lvalue, span: Span) -> Result<(), CompileError> { + match l_value { + Lvalue::Identifier { resolved, .. } => { + let slot = match resolved.expect("declaration lvalue must be resolved") { + ResolvedVar::Local { slot } => slot, + _ => unreachable!("declaration lvalue must be a local"), + }; + self.chunk.write(OpCode::SetLocal(slot), span); + self.num_locals = self.num_locals.max(slot + 1); + } + Lvalue::Index { .. } => unreachable!("cannot declare into index"), + Lvalue::Sequence(seq) => { + self.chunk.write(OpCode::Unpack(seq.len()), span); + for lv in seq { + self.compile_declare_lvalue(lv, span)?; + } + } + } + Ok(()) + } + + fn compile_binding(&mut self, resolved: Binding, span: Span) -> Result<(), CompileError> { + match resolved { + Binding::None => return Err(CompileError::unresolved_binding(span)), + Binding::Resolved(var) => self.emit_get_var(var, span), + Binding::Dynamic(candidates) => { + let idx = self + .chunk + .add_constant(Value::Object(Rc::new(Object::OverloadSet(candidates)))); + self.chunk.write(OpCode::Constant(idx), span); + } + } + + Ok(()) + } + + fn emit_get_var(&mut self, var: ResolvedVar, span: Span) { + match var { + ResolvedVar::Local { slot } => self.chunk.write(OpCode::GetLocal(slot), span), + ResolvedVar::Upvalue { slot } => self.chunk.write(OpCode::GetUpvalue(slot), span), + ResolvedVar::Global { slot } => self.chunk.write(OpCode::GetGlobal(slot), span), + }; + } + + fn emit_set_var(&mut self, var: ResolvedVar, span: Span) { + match var { + ResolvedVar::Local { slot } => self.chunk.write(OpCode::SetLocal(slot), span), + ResolvedVar::Upvalue { slot } => self.chunk.write(OpCode::SetUpvalue(slot), span), + ResolvedVar::Global { .. } => unreachable!("globals are native, never assigned"), + }; + } + fn compile_block( + &mut self, + statements: Vec, + _span: Span, + ) -> Result<(), CompileError> { + if statements.is_empty() { + let idx = self.chunk.add_constant(Value::unit()); + // Synthetic unit from empty block has no meaningful source + self.chunk.write(OpCode::Constant(idx), Span::new(0, 0)); + } else { + let last = statements.len() - 1; + for (i, stmt) in statements.into_iter().enumerate() { + let is_last_expr = i == last && produces_value(&stmt.expression); + self.compile_expr(stmt)?; + if i == last && !is_last_expr { + let idx = self.chunk.add_constant(Value::unit()); + // Synthetic unit when last statement doesn't produce value + self.chunk.write(OpCode::Constant(idx), Span::new(0, 0)); + } + } + } + + Ok(()) + } + + fn compile_if( + &mut self, + condition: ExpressionLocation, + on_true: ExpressionLocation, + on_false: Option, + _span: Span, + ) -> Result<(), CompileError> { + let condition_span = condition.span; + self.compile_expr(condition)?; + let conditional_jump_idx = self.chunk.write(OpCode::JumpIfFalse(0), condition_span); + self.chunk.write(OpCode::Pop, Span::new(0, 0)); + self.compile_expr(on_true)?; + if let Some(on_false) = on_false { + let jump_to_end = self.chunk.write(OpCode::Jump(0), Span::new(0, 0)); + self.chunk.patch_jump(conditional_jump_idx); + self.chunk.write(OpCode::Pop, Span::new(0, 0)); + self.compile_expr(on_false)?; + self.chunk.patch_jump(jump_to_end); + } else { + // No else branch — push unit so the if-expression always produces a value. + let jump_to_end = self.chunk.write(OpCode::Jump(0), Span::new(0, 0)); + self.chunk.patch_jump(conditional_jump_idx); + self.chunk.write(OpCode::Pop, Span::new(0, 0)); + let idx = self.chunk.add_constant(Value::unit()); + self.chunk.write(OpCode::Constant(idx), Span::new(0, 0)); + self.chunk.patch_jump(jump_to_end); + } + + Ok(()) + } + + fn compile_while( + &mut self, + condition: ExpressionLocation, + loop_body: ExpressionLocation, + _span: Span, + ) -> Result<(), CompileError> { + let condition_span = condition.span; + let loop_start = self.new_loop_context(); + self.compile_expr(condition)?; + let conditional_jump_idx = self.chunk.write(OpCode::JumpIfFalse(0), condition_span); + self.chunk.write(OpCode::Pop, Span::new(0, 0)); + self.compile_expr(loop_body)?; + self.chunk.write(OpCode::Pop, Span::new(0, 0)); + self.chunk.write_jump_back(loop_start, Span::new(0, 0)); + self.chunk.patch_jump(conditional_jump_idx); + self.chunk.write(OpCode::Pop, Span::new(0, 0)); + let break_instructions = + std::mem::take(&mut self.current_loop_context_mut().unwrap().break_instructions); + for instruction in break_instructions { + self.chunk.patch_jump(instruction) + } + self.end_loop_context(); + Ok(()) + } + + #[allow(clippy::too_many_arguments)] + fn compile_function_decl( + &mut self, + name: Option, + resolved_name: Option, + body: ExpressionLocation, + type_signature: TypeSignature, + return_type: Option, + captures: Vec, + pure: bool, + span: Span, + ) -> Result<(), CompileError> { + let num_params = match &type_signature { + TypeSignature::Exact(params) => params.len(), + TypeSignature::Variadic => 0, + }; + let return_type = return_type.unwrap_or_default(); + let static_type = StaticType::Function { + parameters: match &type_signature { + TypeSignature::Variadic => None, + TypeSignature::Exact(types) => { + Some(types.iter().map(|x| x.type_name.clone()).collect()) + } + }, + return_type: Box::new(return_type.clone()), + }; + let mut fn_compiler = Self { + num_locals: num_params, + allow_return: true, + ..Default::default() + }; + fn_compiler.compile_expr(body)?; + fn_compiler.chunk.write(OpCode::Return, Span::new(0, 0)); + + let compiled = CompiledFunction { + name, + static_type, + body: fn_compiler.chunk, + num_locals: fn_compiler.num_locals, + }; + let idx = self + .chunk + .add_constant(Value::function(Function::Compiled(Rc::new(compiled)))); + + if !captures.is_empty() { + self.chunk.write( + OpCode::Closure { + constant_idx: idx, + values: captures.into(), + }, + span, + ); + } else { + self.chunk.write(OpCode::Constant(idx), span); + } + + // For `pure fn`, wrap the function in a memoization cache. The cache + // is allocated fresh each time the declaration is evaluated, so each + // closure instance has its own independent cache. + if pure { + self.chunk.write(OpCode::Memoize, span); + } + + match resolved_name { + Some(ResolvedVar::Local { slot }) => { + self.chunk.write(OpCode::SetLocal(slot), span); + self.num_locals = self.num_locals.max(slot + 1); + } + Some(ResolvedVar::Upvalue { .. } | ResolvedVar::Global { .. }) => { + unreachable!("the analyser never assigns a declaration to a non-local binding") + } + None => {} + } + + Ok(()) + } + + fn compile_for( + &mut self, + iterations: Vec, + body: ForBody, + span: Span, + ) -> Result<(), CompileError> { + match body { + ForBody::Block(block) => { + self.compile_for_iterations(&iterations, span, &mut |this| { + // The body is always a block, which always pushes exactly one value. + // Discard it — the loop itself produces no value. + this.compile_expr(block.clone())?; + this.chunk.write(OpCode::Pop, span); + Ok(()) + })?; + Ok(()) + } + ForBody::List { + expr, + accumulator_slot, + } => { + let tmp_list = accumulator_slot + .expect("list accumulator slot must be assigned by the analyser"); + self.num_locals = self.num_locals.max(tmp_list + 1); + self.chunk.write(OpCode::MakeList(0), span); + self.chunk.write(OpCode::SetLocal(tmp_list), span); + self.compile_for_iterations(&iterations, span, &mut |this| { + this.compile_expr(expr.clone())?; + this.chunk.write(OpCode::ListPush(tmp_list), span); + Ok(()) + })?; + self.chunk.write(OpCode::GetLocal(tmp_list), span); + Ok(()) + } + ForBody::Map { + key, + value, + default, + accumulator_slot, + } => { + let tmp_map = accumulator_slot + .expect("map accumulator slot must be assigned by the analyser"); + self.num_locals = self.num_locals.max(tmp_map + 1); + let has_default = default.is_some(); + if let Some(default) = default { + self.compile_expr(*default)?; + } + self.chunk.write( + OpCode::MakeMap { + pairs: 0, + has_default, + }, + span, + ); + self.chunk.write(OpCode::SetLocal(tmp_map), span); + self.compile_for_iterations(&iterations, span, &mut |this| { + this.compile_expr(key.clone())?; + if let Some(value) = value.clone() { + this.compile_expr(value)?; + } else { + let idx = this.chunk.add_constant(Value::unit()); + this.chunk.write(OpCode::Constant(idx), Span::new(0, 0)); + } + this.chunk.write(OpCode::MapInsert(tmp_map), span); + Ok(()) + })?; + self.chunk.write(OpCode::GetLocal(tmp_map), span); + Ok(()) + } + } + } + + /// Shared loop scaffolding for `compile_for_block`, `compile_for_list`, and + /// `compile_for_map`. Handles iteration and guard clauses; calls `compile_leaf` + /// for the innermost body once all iterations are peeled off. + fn compile_for_iterations( + &mut self, + iterations: &[ForIteration], + span: Span, + compile_leaf: &mut dyn FnMut(&mut Self) -> Result<(), CompileError>, + ) -> Result<(), CompileError> { + let Some((first, rest)) = iterations.split_first() else { + return compile_leaf(self); + }; + + match first { + ForIteration::Iteration { l_value, sequence } => { + self.compile_expr(sequence.clone())?; + self.chunk.write(OpCode::GetIterator, sequence.span); + + let loop_start = self.new_loop_context(); + let iter_next = self.chunk.write(OpCode::IterNext(0), span); + self.compile_declare_lvalue(l_value.clone(), span)?; + + self.compile_for_iterations(rest, span, compile_leaf)?; + + // Close upvalues for the loop variable so each iteration's closures + // get their own frozen copy rather than sharing a mutable slot. + if let Some(slot) = min_lvalue_slot(l_value) { + self.chunk.write(OpCode::CloseUpvalue(slot), span); + } + + self.chunk.write_jump_back(loop_start, span); + + // Both IterNext-done and break jump to the iterator Pop + self.chunk.patch_jump(iter_next); + let break_instructions = std::mem::take( + &mut self.current_loop_context_mut().unwrap().break_instructions, + ); + for instruction in break_instructions { + self.chunk.patch_jump(instruction); + } + self.end_loop_context(); + + // Pop the iterator + self.chunk.write(OpCode::Pop, Span::new(0, 0)); + } + ForIteration::Guard(condition) => { + self.compile_expr(condition.clone())?; + let skip_jump = self.chunk.write(OpCode::JumpIfFalse(0), span); + self.chunk.write(OpCode::Pop, Span::new(0, 0)); + self.compile_for_iterations(rest, span, compile_leaf)?; + let end_jump = self.chunk.write(OpCode::Jump(0), span); + self.chunk.patch_jump(skip_jump); + self.chunk.write(OpCode::Pop, Span::new(0, 0)); + self.chunk.patch_jump(end_jump); + } + } + + Ok(()) + } + + fn new_loop_context(&mut self) -> usize { + let start = self.chunk.len(); + self.loop_stack.push(LoopContext { + start, + break_instructions: Vec::new(), + }); + start + } + + fn current_loop_context(&self) -> Option<&LoopContext> { + self.loop_stack.last() + } + + fn current_loop_context_mut(&mut self) -> Option<&mut LoopContext> { + self.loop_stack.last_mut() + } + + fn end_loop_context(&mut self) { + self.loop_stack + .pop() + .expect("expected there to be a loop context to pop"); + } +} + +#[derive(Clone)] +struct LoopContext { + start: usize, + break_instructions: Vec, +} + +/// Returns the minimum local slot referenced by an lvalue, used to determine +/// which upvalues to close at the end of a loop iteration. +fn min_lvalue_slot(lv: &Lvalue) -> Option { + match lv { + Lvalue::Identifier { + resolved: Some(ResolvedVar::Local { slot }), + .. + } => Some(*slot), + Lvalue::Sequence(seq) => seq.iter().filter_map(min_lvalue_slot).min(), + _ => None, + } +} + +fn produces_value(expr: &Expression) -> bool { + match expr { + Expression::Statement(_) + | Expression::VariableDeclaration { .. } + | Expression::FunctionDeclaration { + resolved_name: Some(_), + .. + } + | Expression::While { .. } + | Expression::Break + | Expression::Continue + | Expression::Return { .. } => false, + Expression::For { body, .. } => { + matches!(**body, ForBody::List { .. } | ForBody::Map { .. }) + } + _ => true, + } +} + +#[derive(thiserror::Error, Debug)] +#[error("{text}")] +pub struct CompileError { + text: String, + span: Span, +} + +impl CompileError { + fn unresolved_binding(span: Span) -> Self { + Self { + text: "encountered unresolved binding during compilation, this is probably an internal error".to_string(), + span, + } + } + + fn unexpected_break(span: Span) -> Self { + Self { + text: "unexpected break statement outside of loop".to_string(), + span, + } + } + fn unexpected_continue(span: Span) -> Self { + Self { + text: "unexpected continue statement outside of loop".to_string(), + span, + } + } + + fn return_outside_function(span: Span) -> Self { + Self { + text: "unexpected return statement outside of function body".to_string(), + span, + } + } + + fn lvalue_required_to_be_single_identifier(span: Span) -> Self { + Self { + text: "This lvalue is required to be a single identifier".to_string(), + span, + } + } + + pub fn span(&self) -> Span { + self.span + } +} diff --git a/ndc_vm/src/disassemble.rs b/ndc_vm/src/disassemble.rs new file mode 100644 index 00000000..e9dc8e13 --- /dev/null +++ b/ndc_vm/src/disassemble.rs @@ -0,0 +1,54 @@ +use crate::chunk::OpCode; +use crate::value::{CompiledFunction, Value}; +use std::fmt::Write; +use std::rc::Rc; + +pub fn disassemble(function: &CompiledFunction, source: Option<&str>) -> String { + let mut out = String::new(); + disassemble_function(function, source, &mut out); + out +} + +fn disassemble_function(function: &CompiledFunction, source: Option<&str>, out: &mut String) { + let name = function.name.as_deref().unwrap_or("