From 4e76f1990e4c89fc6c97b91a17c00693b5ca2695 Mon Sep 17 00:00:00 2001 From: mat ess Date: Sat, 5 Aug 2023 23:17:42 -0400 Subject: [PATCH] Start working on typechecking --- Cargo.lock | 28 ++++++++++----- Cargo.toml | 7 ++-- _tests/golden/parse.mul | 3 ++ _tests/goldentests.rs | 7 ++++ docs/ROADMAP.md | 3 +- flake.lock | 74 +++++++++++++++++++++++++--------------- flake.nix | 9 ++--- src/builtins.rs | 60 ++++++++++++++++++++++++++------ src/dictionary.rs | 6 ++++ src/evaluate.rs | 49 ++++++++++++-------------- src/lib.rs | 2 ++ src/parse/parser.lalrpop | 26 +++++++++----- src/syntax.rs | 40 ++++++++++++++++++++-- src/typecheck.rs | 20 +++++++++++ 14 files changed, 239 insertions(+), 95 deletions(-) create mode 100644 _tests/golden/parse.mul create mode 100644 _tests/goldentests.rs create mode 100644 src/dictionary.rs create mode 100644 src/typecheck.rs diff --git a/Cargo.lock b/Cargo.lock index 51cfb67..703a9b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -392,9 +392,21 @@ dependencies = [ [[package]] name = "regex" -version = "1.8.1" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370" +checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7b6d6190b7594385f61bd3911cd1be99dfddcfc365a4160cc2ab5bff4aed294" dependencies = [ "aho-corasick", "memchr", @@ -403,9 +415,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.1" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" [[package]] name = "rustix" @@ -482,18 +494,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.43" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42" +checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.43" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f" +checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index f895764..81f6e99 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,8 +7,11 @@ edition = "2021" lalrpop = "0.20.0" [dependencies] -anyhow = "1.0.72" +anyhow = "1" lalrpop-util = { version = "0.20.0", features = ["lexer"] } pico-args = { version = "0.5.0", features = ["eq-separator", "short-space-opt", "combined-flags"] } regex = "1" -thiserror = "1.0.43" +thiserror = "1" + +# [dev-dependencies] +# goldentests = "1" diff --git a/_tests/golden/parse.mul b/_tests/golden/parse.mul new file mode 100644 index 0000000..d091d78 --- /dev/null +++ b/_tests/golden/parse.mul @@ -0,0 +1,3 @@ +fn main() { + +} diff --git a/_tests/goldentests.rs b/_tests/goldentests.rs new file mode 100644 index 0000000..9ba0839 --- /dev/null +++ b/_tests/goldentests.rs @@ -0,0 +1,7 @@ +use goldentests::{TestConfig, TestResult}; + +#[test] +fn run_golden_tests() -> TestResult<()> { + let config = TestConfig::new("target/debug/mul", "tests/golden", "// ")?; + config.run_tests() +} diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index 1af86f2..9401144 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -3,10 +3,11 @@ ## stages - [x] parser - [ ] lossless syntax trees (rowan + ungrammar) + - [ ] formatter (pretty-printer) - [ ] typechecker - [x] interpreter +- [ ] virtual machine - [ ] code generator -- [ ] formatter (pretty-printer) ## features - [ ] primitives diff --git a/flake.lock b/flake.lock index aa059d7..deab36c 100644 --- a/flake.lock +++ b/flake.lock @@ -10,11 +10,11 @@ "rust-overlay": "rust-overlay" }, "locked": { - "lastModified": 1684468982, - "narHash": "sha256-EoC1N5sFdmjuAP3UOkyQujSOT6EdcXTnRw8hPjJkEgc=", + "lastModified": 1688772518, + "narHash": "sha256-ol7gZxwvgLnxNSZwFTDJJ49xVY5teaSvF7lzlo3YQfM=", "owner": "ipetkov", "repo": "crane", - "rev": "99de890b6ef4b4aab031582125b6056b792a4a30", + "rev": "8b08e96c9af8c6e3a2b69af5a7fa168750fcf88e", "type": "github" }, "original": { @@ -60,11 +60,11 @@ "nixpkgs-lib": "nixpkgs-lib" }, "locked": { - "lastModified": 1683560683, - "narHash": "sha256-XAygPMN5Xnk/W2c1aW0jyEa6lfMDZWlQgiNtmHXytPc=", + "lastModified": 1688466019, + "narHash": "sha256-VeM2akYrBYMsb4W/MmBo1zmaMfgbL4cH3Pu8PGyIwJ0=", "owner": "hercules-ci", "repo": "flake-parts", - "rev": "006c75898cf814ef9497252b022e91c946ba8e17", + "rev": "8e8d955c22df93dbe24f19ea04f47a74adbdc5ec", "type": "github" }, "original": { @@ -78,11 +78,11 @@ "systems": "systems" }, "locked": { - "lastModified": 1681202837, - "narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=", + "lastModified": 1687709756, + "narHash": "sha256-Y5wKlQSkgEK2weWdOu4J3riRd+kV/VCgHsqLNTTWQ/0=", "owner": "numtide", "repo": "flake-utils", - "rev": "cfacdce06f30d2b68473a46042957675eebb3401", + "rev": "dbabf0ca0c0c4bce6ea5eaf65af5cb694d2082c7", "type": "github" }, "original": { @@ -92,12 +92,15 @@ } }, "flake-utils_2": { + "inputs": { + "systems": "systems_2" + }, "locked": { - "lastModified": 1667395993, - "narHash": "sha256-nuEHfE/LcWyuSWnS8t12N1wc105Qtau+/OdUAjtQ0rA=", + "lastModified": 1685518550, + "narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=", "owner": "numtide", "repo": "flake-utils", - "rev": "5aed5285a952e0b949eb3ba02c12fa4fcfef535f", + "rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef", "type": "github" }, "original": { @@ -129,11 +132,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1684570954, - "narHash": "sha256-FX5y4Sm87RWwfu9PI71XFvuRpZLowh00FQpIJ1WfXqE=", + "lastModified": 1690640159, + "narHash": "sha256-5DZUYnkeMOsVb/eqPYb9zns5YsnQXRJRC8Xx/nPMcno=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "3005f20ce0aaa58169cdee57c8aa12e5f1b6e1b3", + "rev": "e6ab46982debeab9831236869539a507f670a129", "type": "github" }, "original": { @@ -146,11 +149,11 @@ "nixpkgs-lib": { "locked": { "dir": "lib", - "lastModified": 1682879489, - "narHash": "sha256-sASwo8gBt7JDnOOstnps90K1wxmVfyhsTPPNTGBPjjg=", + "lastModified": 1688049487, + "narHash": "sha256-100g4iaKC9MalDjUW9iN6Jl/OocTDtXdeAj7pEGIRh4=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "da45bf6ec7bbcc5d1e14d3795c025199f28e0de0", + "rev": "4bc72cae107788bf3f24f30db2e2f685c9298dc9", "type": "github" }, "original": { @@ -163,16 +166,16 @@ }, "nixpkgs-stable": { "locked": { - "lastModified": 1678872516, - "narHash": "sha256-/E1YwtMtFAu2KUQKV/1+KFuReYPANM2Rzehk84VxVoc=", + "lastModified": 1685801374, + "narHash": "sha256-otaSUoFEMM+LjBI1XL/xGB5ao6IwnZOXc47qhIgJe8U=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "9b8e5abb18324c7fe9f07cb100c3cd4a29cda8b8", + "rev": "c37ca420157f4abc31e26f436c1145f8951ff373", "type": "github" }, "original": { "owner": "NixOS", - "ref": "nixos-22.11", + "ref": "nixos-23.05", "repo": "nixpkgs", "type": "github" } @@ -188,11 +191,11 @@ "nixpkgs-stable": "nixpkgs-stable" }, "locked": { - "lastModified": 1684195081, - "narHash": "sha256-IKnQUSBhQTChFERxW2AzuauVpY1HRgeVzAjNMAA4B6I=", + "lastModified": 1690743255, + "narHash": "sha256-dsJzQsyJGWCym1+LMyj2rbYmvjYmzeOrk7ypPrSFOPo=", "owner": "cachix", "repo": "pre-commit-hooks.nix", - "rev": "96eabec58248ed8f4b0ad59e7ce9398018684fdc", + "rev": "fcbf4705d98398d084e6cb1c826a0b90a91d22d7", "type": "github" }, "original": { @@ -221,11 +224,11 @@ ] }, "locked": { - "lastModified": 1683080331, - "narHash": "sha256-nGDvJ1DAxZIwdn6ww8IFwzoHb2rqBP4wv/65Wt5vflk=", + "lastModified": 1688351637, + "narHash": "sha256-CLTufJ29VxNOIZ8UTg0lepsn3X03AmopmaLTTeHDCL4=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "d59c3fa0cba8336e115b376c2d9e91053aa59e56", + "rev": "f9b92316727af9e6c7fee4a761242f7f46880329", "type": "github" }, "original": { @@ -248,6 +251,21 @@ "repo": "default", "type": "github" } + }, + "systems_2": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } } }, "root": "root", diff --git a/flake.nix b/flake.nix index 765e3fb..4e1c3e5 100644 --- a/flake.nix +++ b/flake.nix @@ -31,7 +31,7 @@ pre-commit.settings.hooks = { rustfmt.enable = true; clippy.enable = true; - cargo-check.enable = true; + # cargo-check.enable = true; }; checks = { inherit package; }; packages.default = package; @@ -55,7 +55,7 @@ buildInputs = builtins.attrValues { inherit (pkgs) rust-analyzer - # rustfmt + rustfmt clippy # profiling cargo-flamegraph @@ -65,11 +65,6 @@ cargo-edit cargo-license ; - rustfmt = pkgs.rustfmt.overrideAttrs (old: { - preFixup = pkgs.lib.optionalString pkgs.stdenv.isDarwin '' - install_name_tool -add_rpath "${pkgs.rustc}/lib" "$out/bin/rustfmt" - ''; - }); }; }; }; diff --git a/src/builtins.rs b/src/builtins.rs index 82ef8f8..dafa320 100644 --- a/src/builtins.rs +++ b/src/builtins.rs @@ -2,7 +2,9 @@ use std::fmt::Write; -use crate::evaluate::{Dictionary, Value}; +use crate::dictionary::Dictionary; +use crate::evaluate::Value; +use crate::syntax::Type; /// Builtin functions. #[derive(Debug, Copy, Clone)] @@ -17,6 +19,7 @@ pub enum Builtin { LessThan, } +#[track_caller] fn unwrap_int_value(op_name: &'static str) -> impl Fn(Value) -> i64 { move |value| match value { Value::Integer(i) => i, @@ -25,15 +28,14 @@ fn unwrap_int_value(op_name: &'static str) -> impl Fn(Value) -> i64 { } macro_rules! int_op { - ($arguments:expr, $op:tt, $op_name:expr) => { - Value::Integer( - $arguments - .into_iter() - .map(unwrap_int_value($op_name)) - .reduce(|acc, v| acc $op v) - .unwrap(), - ) - }; + ($arguments:expr, $op:tt, $op_name:expr) => {{ + let i = $arguments + .into_iter() + .map(unwrap_int_value($op_name)) + .reduce(|acc, v| acc $op v) + .unwrap(); + Value::Integer(i) + }}; } macro_rules! int_cmp { @@ -46,6 +48,28 @@ macro_rules! int_cmp { }}; } +fn int_type() -> Type { + Type::Constructor("Integer".to_string()) +} + +fn bool_type() -> Type { + Type::Constructor("Boolean".to_string()) +} + +fn int_op_type() -> Type { + Type::Function { + parameters: vec![int_type(), int_type()], + result: Box::new(int_type()), + } +} + +fn int_cmp_type() -> Type { + Type::Function { + parameters: vec![int_type(), int_type()], + result: Box::new(bool_type()), + } +} + impl Builtin { /// A mapping from runtime names to builtin sentinels. pub fn dictionary() -> Dictionary { @@ -63,6 +87,22 @@ impl Builtin { .into() } + /// A mapping from runtime names to builtin types. + pub fn type_dictionary() -> Dictionary { + [ + ("_print", Type::Print), + ("_add", int_op_type()), + ("_sub", int_op_type()), + ("_mul", int_op_type()), + ("_div", int_op_type()), + ("_equals", Type::Equals), + ("_greaterthan", int_cmp_type()), + ("_lessthan", int_cmp_type()), + ] + .map(|(name, ty)| (name.to_string(), ty)) + .into() + } + /// Minimum # of required arguments for a builtin. pub fn min_parameters(&self) -> usize { match self { diff --git a/src/dictionary.rs b/src/dictionary.rs new file mode 100644 index 0000000..26463b7 --- /dev/null +++ b/src/dictionary.rs @@ -0,0 +1,6 @@ +use std::collections::HashMap; + +use crate::syntax::Name; + +/// Mapping of names to some T. +pub type Dictionary = HashMap; diff --git a/src/evaluate.rs b/src/evaluate.rs index 4b1ea8d..874ce56 100644 --- a/src/evaluate.rs +++ b/src/evaluate.rs @@ -1,16 +1,13 @@ //! Tree-walk interpreter for syntax. -use std::collections::HashMap; use std::fmt::Display; use thiserror::Error; use crate::builtins::Builtin; +use crate::dictionary::Dictionary; use crate::syntax::{Block, Expression, InteractiveEntry, Item, Name, Program}; -/// Mapping of names to some T. -pub type Dictionary = HashMap; - /// Runtime values. #[derive(Debug, Clone)] pub enum Value { @@ -58,6 +55,17 @@ pub struct Interpreter { environment: Dictionary, } +macro_rules! bind { + ($interpreter:expr, $name:expr, $term:expr, to: $nested:expr) => {{ + let value = $interpreter.evaluate($term)?; + $nested.environment.insert($name, value.clone()); + Ok(value) + }}; + ($interpreter:expr, $name:expr, $term:expr) => { + bind!($interpreter, $name, $term, to: $interpreter) + }; +} + impl Interpreter { /// Create a fresh interpreter populated with builtins. pub fn new() -> Self { @@ -89,10 +97,11 @@ impl Interpreter { name, parameters, body, + .. } => { let closure = Value::Closure { environment: self.environment.clone(), - parameters: parameters.clone(), + parameters: parameters.iter().map(|p| p.name.clone()).collect(), result: Expression::Block(Box::new(body.clone())), }; self.environment.insert(name.clone(), closure.clone()); @@ -106,7 +115,7 @@ impl Interpreter { match entry { InteractiveEntry::Item(item) => self.interpret_item(item), InteractiveEntry::Binding(binding) => { - self.bind(binding.name.clone(), &binding.expression) + bind!(self, binding.name.clone(), &binding.expression) } InteractiveEntry::Expression(term) => self.evaluate(term), } @@ -120,15 +129,18 @@ impl Interpreter { .ok_or(Error::UnboundName(name.clone())) .cloned(), Expression::Block(b) => self.evaluate_block(b.as_ref()), - Expression::Lambda { parameters, result } => Ok(Value::Closure { + Expression::Lambda { + parameters, result, .. + } => Ok(Value::Closure { environment: self.environment.clone(), - parameters: parameters.clone(), + parameters: parameters.iter().map(|p| p.name.clone()).collect(), result: *result.clone(), }), Expression::Call { callee, arguments } => { let callee = self.evaluate(callee.as_ref())?; self.apply_call(&callee, arguments) } + Expression::Annotation { expression, .. } => self.evaluate(expression.as_ref()), Expression::Boolean(b) => Ok(Value::Boolean(*b)), Expression::Integer(i) => Ok(Value::Integer(*i)), Expression::Unit => Ok(Value::Unit), @@ -148,7 +160,7 @@ impl Interpreter { let mut nested = Interpreter::nested(environment.clone()); for (name, argument) in parameters.iter().zip(arguments) { // we don't want arguments to refer to each other, so use the parent interpreter - self.bind_nested(name.clone(), argument, &mut nested)?; + bind!(self, name.clone(), argument, to: nested)?; } nested.evaluate(result) } @@ -176,27 +188,10 @@ impl Interpreter { fn evaluate_block(&mut self, block: &Block) -> Result { let mut nested = self.clone(); for binding in &block.bindings { - nested.bind(binding.name.clone(), &binding.expression)?; + bind!(nested, binding.name.clone(), &binding.expression)?; } nested.evaluate(&block.result) } - - fn bind(&mut self, name: Name, term: &Expression) -> Result { - let value = self.evaluate(term)?; - self.environment.insert(name, value.clone()); - Ok(value) - } - - fn bind_nested( - &mut self, - name: Name, - term: &Expression, - nested: &mut Interpreter, - ) -> Result { - let value = self.evaluate(term)?; - nested.environment.insert(name, value.clone()); - Ok(value) - } } impl Default for Interpreter { diff --git a/src/lib.rs b/src/lib.rs index 43bfecd..18838c3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,7 @@ pub mod builtins; pub mod cli; +pub mod dictionary; pub mod evaluate; pub mod parse; pub mod syntax; +pub mod typecheck; diff --git a/src/parse/parser.lalrpop b/src/parse/parser.lalrpop index 78f3b5d..d88935e 100644 --- a/src/parse/parser.lalrpop +++ b/src/parse/parser.lalrpop @@ -5,7 +5,9 @@ use crate::syntax::{ Item, InteractiveEntry, Name, + Parameter, Program, + Type, }; grammar; @@ -18,20 +20,26 @@ pub InteractiveEntry: InteractiveEntry = { ";"? => InteractiveEntry::Expression(<>), } -Item: Item = { - "fn" "(" > ")" ";"? => Item::Fn { <> }, +Type: Type = { + Name => Type::Constructor(<>), + "fn" "(" > ")" "->" > => Type::Function { <> }, + "(" ")", } +Item: Item = { + "fn" "(" > ")" " )?> ";"? => Item::Fn { <> }, +} + +Parameter: Parameter = + )?> => Parameter { <> }; + Binding: Binding = - "let" "=" => Binding { <> }; + "let" )?> "=" => Binding { <> }; Expression: Expression = { - Simple, + "|" > "|" " )?> > => Expression::Lambda { <> }, Boxed => Expression::Block(<>), -}; - -Simple: Expression = { - "|" > "|" > => Expression::Lambda { <> }, + > ":" => Expression::Annotation { <> }, Atom, }; @@ -40,8 +48,8 @@ Atom: Expression = { > "(" > ")" => Expression::Call { <> }, Integer => Expression::Integer(<>), Boolean => Expression::Boolean(<>), - "(" ")", "(" ")" => Expression::Unit, + "(" ")", }; BlockBinding = ";"; diff --git a/src/syntax.rs b/src/syntax.rs index 312aeeb..3cec830 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -1,6 +1,6 @@ //! Syntax elements. -/// Identifiers. +/// An identifier. pub type Name = String; /// The rough equivalent of a module. @@ -14,20 +14,49 @@ pub enum InteractiveEntry { Expression(Expression), } +/// Surface syntax types. +#[derive(Debug, Clone)] +pub enum Type { + /// A type variable. + Variable(Name), + /// A function type. + Function { + /// Parameter types. + parameters: Vec, + /// The return type. + result: Box, + }, + /// A type constructor. + Constructor(Name), + /// Special typing sentinel for the print builtin. + Print, + /// Special typing sentinel for the equals builtin. + Equals, +} + /// A top level definition. #[derive(Debug, Clone)] pub enum Item { Fn { name: Name, - parameters: Vec, + parameters: Vec, + return_type: Option, body: Block, }, } +/// Function parameter with an optional type. +#[derive(Debug, Clone)] +pub struct Parameter { + pub name: Name, + pub annotation: Option, +} + /// Syntactic mapping from an identifier to an expression. #[derive(Debug, Clone)] pub struct Binding { pub name: Name, + pub annotation: Option, pub expression: Expression, } @@ -37,13 +66,18 @@ pub enum Expression { Variable(Name), Block(Box), Lambda { - parameters: Vec, + parameters: Vec, + return_type: Option, result: Box, }, Call { callee: Box, arguments: Vec, }, + Annotation { + expression: Box, + annotation: Type, + }, Boolean(bool), Integer(i64), Unit, diff --git a/src/typecheck.rs b/src/typecheck.rs new file mode 100644 index 0000000..ee9e748 --- /dev/null +++ b/src/typecheck.rs @@ -0,0 +1,20 @@ +use crate::builtins::Builtin; +use crate::dictionary::Dictionary; +use crate::syntax::{Program, Type}; + +/// Typechecker state. +struct Typechecker { + environment: Dictionary, +} + +impl Typechecker { + fn new() -> Self { + Typechecker { + environment: Builtin::type_dictionary(), + } + } + + fn check_program(&mut self, program: &Program) -> Result<(), ()> { + Ok(()) + } +}