From 3ce75cfed8fb30cee6934b9b61e357a171e700dd Mon Sep 17 00:00:00 2001 From: mat ess Date: Sat, 6 May 2023 01:14:15 -0400 Subject: [PATCH] Simple interpreter --- Cargo.lock | 7 +++ Cargo.toml | 1 + docs/ROADMAP.md | 21 +++++-- example.mul | 8 +-- src/interpreter.rs | 150 +++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/main.rs | 71 +++++++++++++++------ src/parser.lalrpop | 30 +++++---- src/syntax.rs | 46 ++------------ 9 files changed, 255 insertions(+), 80 deletions(-) create mode 100644 src/interpreter.rs diff --git a/Cargo.lock b/Cargo.lock index e0c941d..47a383f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "anyhow" +version = "1.0.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" + [[package]] name = "ascii-canvas" version = "3.0.0" @@ -272,6 +278,7 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" name = "mul" version = "0.1.0" dependencies = [ + "anyhow", "lalrpop", "lalrpop-util", "regex", diff --git a/Cargo.toml b/Cargo.toml index d707db7..a7ec04e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,5 +7,6 @@ edition = "2021" lalrpop = "0.19.9" [dependencies] +anyhow = "1.0.71" lalrpop-util = { version = "0.19.9", features = ["lexer"] } regex = "1" diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index 33d2b08..1ea9a13 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -1,17 +1,28 @@ # implementation roadmap -- [ ] parser - - [ ] uniform function call syntax +## stages +- [x] parser - [ ] lossless syntax trees (rowan + ungrammar) - [ ] typechecker -- [ ] interpreter +- [x] interpreter - [ ] code generator +- [ ] formatter (pretty-printer) + +## features +- [ ] primitives + - [x] booleans + - [x] integers + - [ ] floating point numbers + - [ ] text - [ ] functions - - [ ] lambdas / closures + - [x] lambdas / closures - [ ] generic functions + - [ ] uniform function call syntax + - [ ] operators - [ ] algebraic data types - [ ] product types - [ ] sum types - [ ] variant types - [ ] pattern matching -- [ ] trait / interface system +- [ ] traits + - [ ] operator overloading diff --git a/example.mul b/example.mul index 35f901b..cb2f735 100644 --- a/example.mul +++ b/example.mul @@ -1,9 +1,9 @@ -main = || { +let main = || { let result = sum_equals(1, 2, 3); print(result) -} +}; -sum_equals = |x, y, expected| { +let sum_equals = |x, y, expected| { let sum = add(x, y); equals(sum, expected) -} +}; diff --git a/src/interpreter.rs b/src/interpreter.rs new file mode 100644 index 0000000..9691e62 --- /dev/null +++ b/src/interpreter.rs @@ -0,0 +1,150 @@ +use std::collections::HashMap; + +use crate::syntax::{Expression, Name, Program, Statement}; + +pub type Environment = HashMap; + +#[derive(Debug, Clone)] +pub enum Value { + Boolean(bool), + Integer(i64), + Closure(Vec, Expression, Environment), +} + +pub struct Interpreter { + environment: Environment, +} + +#[derive(Debug)] +pub enum InterpreterError { + VariableNotFound(Name), + IncorrectArgumentCount(usize, usize), + NonFunctionCall(Value), + EmptyProgram, + ExecutionError(String), +} + +impl From for InterpreterError { + fn from(s: String) -> Self { + InterpreterError::ExecutionError(s) + } +} + +impl std::fmt::Display for InterpreterError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + InterpreterError::VariableNotFound(name) => write!(f, "Variable '{}' not found", name), + InterpreterError::IncorrectArgumentCount(expected, actual) => { + write!(f, "Expected {} arguments, but got {}", expected, actual) + } + InterpreterError::NonFunctionCall(value) => { + write!(f, "Function call on non-function value: {:?}", value) + } + InterpreterError::EmptyProgram => write!(f, "Program is empty"), + InterpreterError::ExecutionError(s) => write!(f, "Execution error: {}", s), + } + } +} + +type Result = std::result::Result; + +impl Interpreter { + pub fn new() -> Self { + Self { + environment: Environment::new(), + } + } + + pub fn interpret_statement(&mut self, statement: &Statement) -> Result { + match statement { + Statement::Binding(name, expression) => { + let value = self.evaluate(expression)?; + self.environment.insert(name.clone(), value.clone()); + Ok(value) + } + Statement::Expression(expression) => self.evaluate(expression), + } + } + + pub fn interpret(&mut self, program: &Program) -> Result { + let mut result = None; + + for statement in program { + result = Some(self.interpret_statement(statement)?); + } + + match result { + Some(value) => Ok(value), + None => Err(InterpreterError::EmptyProgram), + } + } + + fn evaluate(&mut self, expression: &Expression) -> Result { + match expression { + Expression::Variable(name) => self + .environment + .get(name) + .cloned() + .ok_or_else(|| InterpreterError::VariableNotFound(name.clone())), + + Expression::Boolean(value) => Ok(Value::Boolean(*value)), + + Expression::Integer(value) => Ok(Value::Integer(*value)), + + Expression::Lambda(params, body) => Ok(Value::Closure( + params.clone(), + *body.clone(), + self.environment.clone(), + )), + + Expression::Call(function, args) => { + let function_value = self.evaluate(function)?; + + match function_value { + Value::Closure(params, body, mut environment) => { + if params.len() != args.len() { + return Err(InterpreterError::IncorrectArgumentCount( + params.len(), + args.len(), + )); + } + + let mut arg_values = Vec::new(); + for arg in args { + arg_values.push(self.evaluate(arg)?); + } + + // Bind argument values to parameter names in a new environment + for (param, arg_value) in params.iter().zip(arg_values.into_iter()) { + environment.insert(param.clone(), arg_value); + } + + // Evaluate the body expression in the new environment + let mut interpreter = Interpreter { environment }; + interpreter.evaluate(&body) + } + _ => Err(InterpreterError::NonFunctionCall(function_value)), + } + } + + Expression::Block(statements, expression) => { + let new_environment = self.environment.clone(); + + for statement in statements { + self.interpret_statement(statement)?; + } + + let mut interpreter = Interpreter { + environment: new_environment, + }; + interpreter.evaluate(expression) + } + } + } +} + +impl Default for Interpreter { + fn default() -> Self { + Self::new() + } +} diff --git a/src/lib.rs b/src/lib.rs index 349ddab..bf5927c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ use lalrpop_util::lalrpop_mod; +pub mod interpreter; lalrpop_mod!(pub parser); pub mod syntax; diff --git a/src/main.rs b/src/main.rs index ff787e8..4b6dd04 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,22 +1,17 @@ +use std::env::args; +use std::fs::read_to_string; use std::io::{stdin, stdout, BufRead, Write}; +use std::path::Path; +use std::process::exit; -use mul::parser::{ExpressionParser, StatementParser}; +use mul::interpreter::Interpreter; +use mul::parser::{ProgramParser, StatementParser}; -macro_rules! parse { - ($parser:expr, $line:expr) => { - match $parser.parse($line.as_str()) { - Ok(parsed) => println!("{parsed:?}"), - Err(lalrpop_util::ParseError::UnrecognizedEOF { .. }) => break, - Err(e) => println!("Parse error: {e}"), - } - }; -} - -fn main() { +fn repl() { let mut stdin = stdin().lock(); let mut stdout = stdout().lock(); - let expression = ExpressionParser::new(); - let statement = StatementParser::new(); + let parser = StatementParser::new(); + let mut intepreter = Interpreter::new(); loop { let mut line = String::new(); print!("> "); @@ -24,10 +19,50 @@ fn main() { stdin .read_line(&mut line) .expect("Failed to read from stdin"); - if line.trim_end().ends_with(';') { - parse!(statement, line) - } else { - parse!(expression, line) + if line.trim().is_empty() { + println!(); + continue; } + if !line.trim_end().ends_with(';') { + line.push(';'); + } + let parsed = match parser.parse(&line) { + Ok(parsed) => parsed, + Err(lalrpop_util::ParseError::UnrecognizedEOF { .. }) => break, + Err(e) => { + eprintln!("{e}"); + continue; + } + }; + match intepreter.interpret_statement(&parsed) { + Ok(value) => println!("{value:?}"), + Err(e) => { + eprintln!("{e}"); + continue; + } + }; + } +} + +fn build(file: &Path) { + let content = read_to_string(file).expect("Failed to read file"); + let parser = ProgramParser::new(); + let mut interpreter = Interpreter::new(); + let program = parser.parse(content.as_str()).unwrap_or_else(|e| { + eprintln!("{e}"); + exit(1); + }); + let v = interpreter.interpret(&program).unwrap_or_else(|e| { + eprintln!("{e}"); + exit(1); + }); + println!("{v:?}") +} + +fn main() { + if let Some(file) = args().nth(1) { + build(file.as_ref()) + } else { + repl() } } diff --git a/src/parser.lalrpop b/src/parser.lalrpop index e19a959..5fdfd15 100644 --- a/src/parser.lalrpop +++ b/src/parser.lalrpop @@ -1,32 +1,40 @@ -use crate::syntax::{Expression, Name, Statement}; +use crate::syntax::{Expression, Name, Program, Statement}; grammar; +pub Program: Program = Statement*; + pub Statement: Statement = { "let" "=" ";" => Statement::Binding(<>), - ";" => Statement::Expression(<>), + ";"? => Statement::Expression(<>), + ";" => Statement::Expression(<>), }; -pub Expression: Expression = { - "|" > "|" => Expression::Lambda(params, Box::new(body)), +pub Expression = { + Simple, + Block, +}; + +Simple: Expression = { + "|" > "|" => Expression::Lambda(parameters, Box::new(body)), Atom, }; Atom: Expression = { Name => Expression::Variable(<>), - "{" "}" => Expression::Block(statements, Box::new(result)), - "(" > ")" => Expression::Call(Box::new(callee), args), + "(" > ")" => Expression::Call(Box::new(callee), arguments), Integer => Expression::Integer(<>), Boolean => Expression::Boolean(<>), "(" ")", }; -// TODO: decide on identifier syntax -Name: Name = - r"[a-zA-Z_]+" => <>.to_string(); +Block: Expression = + "{" "}" => Expression::Block(statements, Box::new(result)); -Integer: i64 = - r"[0-9]+" => <>.parse::().unwrap(); +// TODO: decide on identifier syntax +Name: Name = r"[a-zA-Z_]+" => <>.to_string(); + +Integer: i64 = r"[0-9]+" => <>.parse::().unwrap(); Boolean: bool = { "true" => true, diff --git a/src/syntax.rs b/src/syntax.rs index 53c8ee5..9f91f8b 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -1,23 +1,14 @@ -use std::fmt::Display; - pub type Name = String; -#[derive(Debug)] +pub type Program = Vec; + +#[derive(Debug, Clone)] pub enum Statement { Binding(Name, Expression), Expression(Expression), } -impl Display for Statement { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Statement::Binding(name, body) => write!(f, "{name} = {body};"), - Statement::Expression(body) => write!(f, "{body};"), - } - } -} - -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum Expression { Variable(Name), Block(Vec, Box), @@ -26,32 +17,3 @@ pub enum Expression { Boolean(bool), Integer(i64), } - -impl Display for Expression { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Expression::Variable(name) => write!(f, "{name}"), - Expression::Block(statements, result) => { - writeln!(f, "{{")?; - for statement in statements { - writeln!(f, "\t{statement}")?; - } - writeln!(f, "\t{result}")?; - writeln!(f, "}}") - } - Expression::Lambda(params, body) => write!(f, "({}) -> {body}", params.join(", ")), - Expression::Call(callee, args) => { - write!(f, "{callee}(")?; - if let Some(arg) = args.first() { - write!(f, "{arg}")?; - } - for arg in args.iter().skip(1) { - write!(f, ", {arg}")?; - } - write!(f, ")") - } - Expression::Boolean(v) => write!(f, "{v}"), - Expression::Integer(v) => write!(f, "{v}"), - } - } -}