diff options
Diffstat (limited to 'matrix-lang/src/parse.rs')
-rw-r--r-- | matrix-lang/src/parse.rs | 757 |
1 files changed, 757 insertions, 0 deletions
diff --git a/matrix-lang/src/parse.rs b/matrix-lang/src/parse.rs new file mode 100644 index 0000000..3a4c5f2 --- /dev/null +++ b/matrix-lang/src/parse.rs @@ -0,0 +1,757 @@ +use crate::prelude::*; + +use Value as V; +use ExprData as E; +use TokenData as T; + +pub struct ParserBuilder { + optimize: bool +} + +impl ParserBuilder { + pub fn new() -> Self { + Self { optimize: true } + } + + pub fn optimize(mut self, optimize: bool) -> Self { + self.optimize = optimize; + self + } + + pub fn build(self) -> Parser { + Parser { + lexer: Lexer::new(""), + optimize: self.optimize + } + } +} + +pub struct Parser { + lexer: Lexer, + optimize: bool +} + +macro_rules! expr_parser { + ($parser:ident, $pattern:pat, $fn:ident) => {{ + let mut expr = $parser.$fn()?; + let pos = expr.pos; + loop { + let tok = $parser.lexer.peek_token_nl()?; + match tok.data { + $pattern => { + $parser.lexer.next_token_nl()?; + let temp = $parser.$fn()?; + expr = (E::BinaryOp(Box::new(expr), Box::new(temp), BinaryOp::from(tok.data)), pos).into() + } + _ => break + } + } + Ok(expr) + }}; +} + +macro_rules! expr_parser_reverse { + ($parser:ident, $pattern:pat, $fn:ident, $cur:ident) => {{ + let expr = $parser.$fn()?; + let tok = $parser.lexer.peek_token_nl()?; + let pos = tok.pos; + Ok(match tok.data { + $pattern => { + $parser.lexer.next_token_nl()?; + (E::BinaryOp(Box::new(expr), Box::new($parser.$cur()?), BinaryOp::from(tok.data)), pos).into() + } + _ => expr + }) + }}; +} + +macro_rules! error { + ($($arg:tt)*) => { + exception!(PARSE_EXCEPTION, $($arg)*) + }; +} + +impl Parser { + + fn force_token(&mut self, tok: TokenData) -> Result<TokenData> { + let next = self.lexer.next_token()?; + if next.data != tok { + Err(error!("expected token '{tok}'").pos(next.pos)) + } else { + Ok(tok) + } + } + + fn force_token_nl(&mut self, tok: TokenData) -> Result<TokenData> { + let next = self.lexer.next_token_nl()?; + if next.data != tok { + Err(error!("expected token '{tok}'").pos(next.pos)) + } else { + Ok(tok) + } + } + + fn parse_fn_call(&mut self) -> Result<Vec<Expr>> { + self.force_token(T::LeftParen)?; + let mut params = Vec::new(); + loop { + let expr = match self.lexer.peek_token()?.data { + T::RightParen => { + self.lexer.next_token()?; + break + }, + _ => self.parse_expr()? + }; + params.push(expr); + let next = self.lexer.next_token()?; + match next.data { + T::Comma => continue, + T::RightParen => break, + _ => return Err(error!("unexpected token '{next}'").pos(next.pos)) + }; + } + Ok(params) + } + + fn parse_index(&mut self) -> Result<Vec<Expr>> { + self.force_token(T::LeftBrack)?; + let mut indicies = Vec::new(); + loop { + let expr = match self.lexer.peek_token()?.data { + T::RightBrack => { + self.lexer.next_token()?; + break + }, + _ => self.parse_expr()? + }; + indicies.push(expr); + let next = self.lexer.next_token()?; + match next.data { + T::SemiColon => continue, + T::RightBrack => break, + _ => return Err(error!("unexpected token '{next}'").pos(next.pos)) + }; + } + Ok(indicies) + } + + fn parse_matrix_part(&mut self) -> Result<Vec<Expr>> { + let mut part = Vec::new(); + loop { + let expr = match self.lexer.peek_token()?.data { + T::SemiColon => break, + T::RightBrack => break, + _ => self.parse_expr()? + }; + part.push(expr); + match self.lexer.peek_token()?.data { + T::Comma => { + self.lexer.next_token()?; + }, + _ => {}, + }; + } + Ok(part) + } + + fn parse_matrix(&mut self) -> Result<Expr> { + let pos = self.lexer.peek_token()?.pos; + self.force_token(T::LeftBrack)?; + let mut parts = Vec::new(); + loop { + let part = self.parse_matrix_part()?; + parts.push(part); + let next = self.lexer.next_token()?; + match next.data { + T::SemiColon => continue, + T::RightBrack => break, + _ => return Err(error!("unexpected token '{next}'").pos(next.pos)) + }; + } + if parts.len() == 1 { + Ok((E::List(parts.pop().unwrap()), pos).into()) + } else { + let codomain = parts.len(); + let domain = parts[0].len(); + for part in parts.iter() { + if part.len() != domain { + return Err(error!("matrix row domains do not match: {} != {}", domain, part.len()).pos(pos)) + } + } + let mut data = Vec::new(); + parts.reverse(); + while let Some(part) = parts.pop() { + data.extend(part); + } + Ok((E::Matrix((domain, codomain, data)), pos).into()) + } + } + + fn parse_table_key(&mut self) -> Result<Expr> { + let tok = self.lexer.next_token()?; + Ok(match tok.data { + T::LeftBrack => { + let expr = self.parse_expr()?; + self.force_token(T::RightBrack)?; + expr + }, + T::Ident(ident) => (E::Literal(V::String(ident.to_string().into())), tok.pos).into(), + T::String(string) => (E::Literal(V::String(string.to_string().into())), tok.pos).into(), + t => return Err(error!("unexpected token '{t}'").pos(tok.pos)) + }) + } + + fn parse_table(&mut self) -> Result<Expr> { + let pos = self.lexer.peek_token()?.pos; + self.force_token(T::Colon)?; + self.force_token(T::LeftBrace)?; + let mut table = Vec::new(); + if self.lexer.peek_token()?.data == T::RightBrace { + self.lexer.next_token()?; + return Ok((E::Table(table), pos).into()) + } + loop { + let key = self.parse_table_key()?; + self.force_token(T::Assign)?; + let value = self.parse_expr()?; + table.push((key, value)); + let next = self.lexer.next_token()?; + match next.data { + T::Comma => continue, + T::RightBrace => break, + _ => return Err(error!("unexpected token '{next}'").pos(next.pos)) + } + } + Ok((E::Table(table), pos).into()) + } + + fn parse_paren(&mut self) -> Result<Expr> { + self.force_token(T::LeftParen)?; + let expr = self.parse_expr()?; + self.force_token(T::RightParen)?; + Ok(expr) + } + + fn parse_params(&mut self) -> Result<(Vec<(Rc<str>, Position)>, bool)> { + let tok = self.lexer.next_token()?; + match tok.data { + T::Ident(ident) => { + let params = vec![(ident, tok.pos)]; + if self.lexer.peek_token()?.data == T::Varadic { + self.lexer.next_token()?; + return Ok((params, true)) + } else { + return Ok((params, false)) + } + } + T::LeftParen => (), + t => return Err(error!("unexpected token '{t}'").pos(tok.pos)) + } + + let mut params = Vec::new(); + let mut varadic = false; + + if self.lexer.peek_token()?.data == T::RightParen { + return Ok((params, varadic)); + } + + loop { + let ident = self.parse_ident()?; + params.push(ident); + let next = self.lexer.next_token()?; + match next.data { + T::Varadic => { + varadic = true; + self.force_token(T::RightParen)?; + break; + } + T::Comma => continue, + T::RightParen => break, + _ => return Err(error!("unexpected token '{next}'").pos(next.pos)) + } + } + + Ok((params, varadic)) + } + + fn parse_ident(&mut self) -> Result<AstName> { + let next = self.lexer.next_token()?; + if let T::Ident(ident) = next.data { + Ok((ident, next.pos)) + } else { + Err(error!("unexpected token '{next}'").pos(next.pos)) + } + } + + fn parse_wrapped_ident(&mut self) -> Result<AstName> { + if self.lexer.peek_token()?.data == T::LeftParen { + self.lexer.next_token()?; + let ident = self.parse_ident()?; + self.force_token(T::RightParen)?; + Ok(ident) + } else { + self.parse_ident() + } + } + + fn parse_ident_nl(&mut self) -> Result<AstName> { + let next = self.lexer.next_token_nl()?; + if let T::Ident(ident) = next.data { + Ok((ident, next.pos)) + } else { + Err(error!("unexpected token '{next}'").pos(next.pos)) + } + } + + fn parse_function(&mut self) -> Result<Expr> { + let pos = self.lexer.peek_token()?.pos; + self.force_token(T::Function)?; + let ident = self.parse_ident()?; + let (params, varadic) = match self.lexer.peek_token()?.data { + T::LeftBrace => (vec![], false), + _ => self.parse_params()?, + }; + let expr = self.parse_expr()?; + Ok((E::Function(ident, params, Box::new(expr), varadic), pos).into()) + } + + fn parse_lambda(&mut self) -> Result<Expr> { + let pos = self.lexer.peek_token()?.pos; + self.force_token(T::Backslash)?; + let (params, varadic) = match self.lexer.peek_token()?.data { + T::Arrow => (vec![], false), + _ => self.parse_params()?, + }; + self.force_token(T::Arrow)?; + let expr = self.parse_expr()?; + Ok((E::Lambda(params, Box::new(expr), varadic), pos).into()) + } + + fn parse_do_while(&mut self) -> Result<Expr> { + let pos = self.lexer.peek_token()?.pos; + self.force_token(T::Do)?; + let expr = Box::new(self.parse_expr()?); + self.force_token(T::While)?; + let cond = Box::new(self.parse_expr()?); + Ok((E::DoWhile(expr, cond), pos).into()) + } + + fn parse_while(&mut self) -> Result<Expr> { + let pos = self.lexer.peek_token()?.pos; + self.force_token(T::While)?; + let cond = Box::new(self.parse_expr()?); + let expr = Box::new(self.parse_expr()?); + Ok((E::While(cond, expr), pos).into()) + } + + fn parse_loop(&mut self) -> Result<Expr> { + let pos = self.lexer.peek_token()?.pos; + self.force_token(T::Loop)?; + let expr = self.parse_expr()?; + Ok((E::Loop(Box::new(expr)), pos).into()) + } + + fn parse_for(&mut self) -> Result<Expr> { + let pos = self.lexer.peek_token()?.pos; + self.force_token(T::For)?; + let name = self.parse_ident()?; + self.force_token(T::In)?; + let cond = Box::new(self.parse_expr()?); + let expr = Box::new(self.parse_expr()?); + Ok((E::For(name, cond, expr), pos).into()) + } + + fn parse_if(&mut self) -> Result<Expr> { + let pos = self.lexer.peek_token()?.pos; + self.force_token(T::If)?; + let cond = Box::new(self.parse_expr()?); + let expr = Box::new(self.parse_expr()?); + + if self.lexer.peek_token()?.data != T::Else { + return Ok((E::If(cond, expr, None), pos).into()) + } + self.lexer.next_token()?; + + if self.lexer.peek_token()?.data == T::If { + Ok((E::If(cond, expr, Some(Box::new(self.parse_if()?))), pos).into()) + } else { + Ok((E::If(cond, expr, Some(Box::new(self.parse_expr()?))), pos).into()) + } + } + + fn parse_let(&mut self) -> Result<Expr> { + let pos = self.lexer.peek_token()?.pos; + self.force_token(T::Let)?; + let ident = self.parse_ident_nl()?; + if self.lexer.peek_token_nl()?.data == T::Assign { + self.force_token_nl(T::Assign)?; + Ok((E::Let(ident, Box::new(self.parse_expr()?)), pos).into()) + } else { + Ok((E::Let(ident, Box::new((E::Literal(V::Nil), pos).into())), pos).into()) + } + } + + fn parse_const(&mut self) -> Result<Expr> { + let pos = self.lexer.peek_token()?.pos; + self.force_token(T::Const)?; + let ident = self.parse_ident_nl()?; + self.force_token(T::Assign)?; + let expr = self.parse_expr()?; + Ok((E::Const(ident, Box::new(expr)), pos).into()) + } + + fn parse_return(&mut self) -> Result<Expr> { + let pos = self.lexer.peek_token()?.pos; + self.force_token(T::Return)?; + Ok((E::Return(Box::new(self.parse_expr()?)), pos).into()) + } + + fn parse_block(&mut self) -> Result<Expr> { + let mut block = Vec::new(); + let pos = self.lexer.peek_token()?.pos; + self.force_token(T::LeftBrace)?; + loop { + let expr = match self.lexer.peek_token()?.data { + T::RightBrace => { + self.lexer.next_token()?; + break + }, + T::SemiColon => { + self.lexer.next_token()?; + continue; + } + _ => self.parse_expr()? + }; + block.push(expr); + let next = self.lexer.next_token_nl()?; + match next.data { + T::SemiColon => continue, + T::RightBrace => break, + _ => return Err(error!("expected a semicolon").pos(next.pos)) + } + } + Ok((E::Block(block), pos).into()) + } + + fn parse_try(&mut self) -> Result<Expr> { + let pos = self.lexer.peek_token()?.pos; + self.force_token(T::Try)?; + let expr = self.parse_expr()?; + self.force_token(T::Catch)?; + let ident = self.parse_wrapped_ident()?; + let catch = self.parse_expr()?; + Ok((E::Try(Box::new(expr), ident, Box::new(catch)), pos).into()) + } + + fn parse_value(&mut self) -> Result<Expr> { + let tok = self.lexer.next_token()?; + let data = match tok.data { + T::Nil => E::Literal(V::Nil), + T::Int(i) => E::Literal(V::Int(i)), + T::Float(f) => E::Literal(V::Float(f)), + T::Complex(c) => E::Literal(V::Complex(Complex64::new(0.0, c))), + T::Regex(r) => E::Literal(V::Regex(Rc::new(r.into()))), + T::String(s) => E::Literal(V::String(s.to_string().into())), + T::True => E::Literal(V::Bool(true)), + T::False => E::Literal(V::Bool(false)), + T::Ident(ident) => E::Ident(ident), + t => return Err(error!("unexpected token '{t}'").pos(tok.pos)) + }; + Ok((data, tok.pos).into()) + } + + fn parse_term(&mut self) -> Result<Expr> { + use T::*; + match self.lexer.peek_token()?.data { + Function => self.parse_function(), + Backslash => self.parse_lambda(), + Do => self.parse_do_while(), + While => self.parse_while(), + For => self.parse_for(), + Let => self.parse_let(), + Const => self.parse_const(), + LeftBrace => self.parse_block(), + Return => self.parse_return(), + If => self.parse_if(), + Loop => self.parse_loop(), + Try => self.parse_try(), + Break => { + let next = self.lexer.next_token()?; + Ok((E::Break, next.pos).into()) + }, + Continue => { + let next = self.lexer.next_token()?; + Ok((E::Continue, next.pos).into()) + }, + LeftBrack => self.parse_matrix(), + Colon => self.parse_table(), + LeftParen => self.parse_paren(), + _ => self.parse_value(), + } + } + + fn parse_expr_expr_access(&mut self) -> Result<Expr> { + let mut expr = self.parse_term()?; + let pos = expr.pos; + loop { + let tok = self.lexer.peek_token()?; + match tok.data { + T::Access => { + self.force_token(T::Access)?; + let temp = self.parse_ident()?; + expr = (E::FieldAccess(Box::new(expr), temp), pos).into(); + }, + _ => break + } + } + Ok(expr) + } + + fn parse_expr_call(&mut self) -> Result<Expr> { + let mut expr = self.parse_expr_expr_access()?; + let pos = expr.pos; + loop { + let tok = self.lexer.peek_token()?; + match tok.data { + T::LeftBrack => { + let index = self.parse_index()?; + expr = (E::Index(Box::new(expr), index), pos).into(); + }, + T::LeftParen => { + let params = self.parse_fn_call()?; + expr = (E::FnCall(Box::new(expr), params), pos).into(); + } + _ => break + } + } + Ok(expr) + } + + fn parse_expr_unary(&mut self) -> Result<Expr> { + let tok = self.lexer.peek_token_nl()?; + Ok(match tok.data { + T::Not => { + self.lexer.next_token()?; + (E::UnaryOp(Box::new(self.parse_expr_unary()?), UnaryOp::Not), tok.pos).into() + } + T::Subtract => { + self.lexer.next_token()?; + (E::UnaryOp(Box::new(self.parse_expr_unary()?), UnaryOp::Negate), tok.pos).into() + } + _ => self.parse_expr_call()? + }) + } + + fn parse_expr_pow(&mut self) -> Result<Expr> { + expr_parser_reverse!( + self, + T::Power, + parse_expr_unary, + parse_expr_pow + ) + } + + fn parse_expr_mult(&mut self) -> Result<Expr> { + expr_parser!(self, T::Multiply | T::Divide | T::Modulo, parse_expr_pow) + } + + fn parse_expr_add(&mut self) -> Result<Expr> { + expr_parser!(self, T::Add | T::Subtract, parse_expr_mult) + } + + fn parse_expr_shift(&mut self) -> Result<Expr> { + expr_parser!( + self, + T::BitwiseShiftLeft | T::BitwiseShiftRight, + parse_expr_add + ) + } + + fn parse_expr_bit_and(&mut self) -> Result<Expr> { + expr_parser!(self, T::BitwiseAnd, parse_expr_shift) + } + + fn parse_expr_bit_or(&mut self) -> Result<Expr> { + expr_parser!(self, T::BitwiseOr, parse_expr_bit_and) + } + + fn parse_expr_compare(&mut self) -> Result<Expr> { + expr_parser!( + self, + T::Equal | T::NotEqual | + T::LessThan | T::GreaterThan | + T::LessEqual | T::GreaterEqual, + parse_expr_bit_or + ) + } + + fn parse_expr_and(&mut self) -> Result<Expr> { + let mut expr = self.parse_expr_compare()?; + let pos = expr.pos; + loop { + let tok = self.lexer.peek_token()?; + match tok.data { + T::And => { + self.force_token(T::And)?; + let temp = self.parse_expr_compare()?; + expr = (E::And(Box::new(expr), Box::new(temp)), pos).into(); + }, + _ => break + } + } + Ok(expr) + } + + fn parse_expr_or(&mut self) -> Result<Expr> { + let mut expr = self.parse_expr_and()?; + let pos = expr.pos; + loop { + let tok = self.lexer.peek_token()?; + match tok.data { + T::Or => { + self.force_token(T::Or)?; + let temp = self.parse_expr_and()?; + expr = (E::Or(Box::new(expr), Box::new(temp)), pos).into(); + }, + _ => break + } + } + Ok(expr) + } + + fn parse_expr_range(&mut self) -> Result<Expr> { + let expr = self.parse_expr_or()?; + let pos = expr.pos; + match self.lexer.peek_token()?.data { + T::Range => { + self.lexer.next_token()?; + let temp = self.parse_expr_or()?; + Ok((E::BinaryOp(Box::new(expr), Box::new(temp), BinaryOp::Range), pos).into()) + }, + T::RangeEq => { + self.lexer.next_token()?; + let temp = self.parse_expr_or()?; + Ok((E::BinaryOp(Box::new(expr), Box::new(temp), BinaryOp::RangeEq), pos).into()) + }, + _ => Ok(expr) + } + } + + fn parse_expr_op_assign(&mut self) -> Result<Expr> { + use BinaryOp as B; + let expr = self.parse_expr_range()?; + let tok = self.lexer.peek_token_nl()?; + let pos = tok.pos; + let data: ExprData = match tok.data { + T::Assign => { + self.lexer.next_token_nl()?; + E::Assign(Box::new(expr.clone()),Box::new(self.parse_expr()?)) + }, + T::AssignAnd => { + self.lexer.next_token_nl()?; + E::Assign(Box::new(expr.clone()),Box::new((E::And(Box::new(expr), Box::new(self.parse_expr()?)), pos).into())) + }, + T::AssignOr => { + self.lexer.next_token_nl()?; + E::Assign(Box::new(expr.clone()),Box::new((E::Or(Box::new(expr), Box::new(self.parse_expr()?)),pos).into())) + }, + T::AssignAdd => { + self.lexer.next_token_nl()?; + E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::Add),pos).into())) + }, + T::AssignSubtract => { + self.lexer.next_token_nl()?; + E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::Subtract),pos).into())) + }, + T::AssignMultiply => { + self.lexer.next_token_nl()?; + E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::Multiply),pos).into())) + }, + T::AssignDivide => { + self.lexer.next_token_nl()?; + E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::Divide),pos).into())) + }, + T::AssignModulo => { + self.lexer.next_token_nl()?; + E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::Modulo),pos).into())) + }, + T::AssignPower => { + self.lexer.next_token_nl()?; + E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::Power),pos).into())) + }, + T::AssignBitwiseAnd => { + self.lexer.next_token_nl()?; + E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::BitwiseAnd),pos).into())) + }, + T::AssignBitwiseOr => { + self.lexer.next_token_nl()?; + E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::BitwiseOr),pos).into())) + }, + T::AssignBitwiseXor => { + self.lexer.next_token_nl()?; + E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::BitwiseXor),pos).into())) + }, + T::AssignBitwiseShiftLeft => { + self.lexer.next_token_nl()?; + E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::BitwiseShiftLeft),pos).into())) + }, + T::AssignBitwiseShiftRight => { + self.lexer.next_token_nl()?; + E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::BitwiseShiftRight),pos).into())) + }, + _ => expr.data + }; + Ok((data, pos).into()) + } + + fn parse_expr(&mut self) -> Result<Expr> { + let mut expr = self.parse_expr_op_assign()?; + let pos = expr.pos; + loop { + let tok = self.lexer.peek_token()?; + match tok.data { + T::Pipe => { + self.force_token(T::Pipe)?; + let temp = self.parse_expr_op_assign()?; + expr = (E::Pipeline(Box::new(expr), Box::new(temp)), pos).into(); + }, + _ => break + } + } + Ok(expr) + } + + fn parse_root(&mut self) -> Result<Expr> { + let mut block = Vec::new(); + loop { + match self.lexer.peek_token()?.data { + T::Eof => break, + T::SemiColon => { + self.lexer.next_token()?; + continue + } + _ => {} + }; + let expr = self.parse_expr()?; + block.push(expr); + let next = self.lexer.next_token_nl()?; + match next.data { + T::Eof => break, + T::SemiColon => continue, + _ => return Err(error!("expected a semicolon").pos(next.pos)) + }; + } + Ok((E::Block(block), Position::default()).into()) + } + + pub fn parse<T: Into<String>>(&mut self, into: T) -> Result<Expr> { + let lexer = Lexer::new(into); + self.lexer = lexer; + let ast = self.parse_root()?; + if self.optimize { + Ok(optimize(ast)?) + } else { + Ok(ast) + } + } +} |