use crate::prelude::*; use Value as V; use ExprData as E; use TokenData as T; pub struct ParserBuilder { optimize: bool } impl ParserBuilder { pub fn new() -> Self { Self { optimize: true } } pub fn optimize(mut self, optimize: bool) -> Self { self.optimize = optimize; self } pub fn build(self) -> Parser { Parser { lexer: Lexer::new(""), optimize: self.optimize } } } pub struct Parser { lexer: Lexer, optimize: bool } macro_rules! expr_parser { ($parser:ident, $pattern:pat, $fn:ident) => {{ let mut expr = $parser.$fn()?; let pos = expr.pos; loop { let tok = $parser.lexer.peek_token_nl()?; match tok.data { $pattern => { $parser.lexer.next_token_nl()?; let temp = $parser.$fn()?; expr = (E::BinaryOp(Box::new(expr), Box::new(temp), BinaryOp::from(tok.data)), pos).into() } _ => break } } Ok(expr) }}; } macro_rules! expr_parser_reverse { ($parser:ident, $pattern:pat, $fn:ident, $cur:ident) => {{ let expr = $parser.$fn()?; let tok = $parser.lexer.peek_token_nl()?; let pos = tok.pos; Ok(match tok.data { $pattern => { $parser.lexer.next_token_nl()?; (E::BinaryOp(Box::new(expr), Box::new($parser.$cur()?), BinaryOp::from(tok.data)), pos).into() } _ => expr }) }}; } macro_rules! error { ($($arg:tt)*) => { exception!(PARSE_EXCEPTION, $($arg)*) }; } impl Parser { fn force_token(&mut self, tok: TokenData) -> Result { let next = self.lexer.next_token()?; if next.data != tok { Err(error!("expected token '{tok}'").pos(next.pos)) } else { Ok(tok) } } fn force_token_nl(&mut self, tok: TokenData) -> Result { let next = self.lexer.next_token_nl()?; if next.data != tok { Err(error!("expected token '{tok}'").pos(next.pos)) } else { Ok(tok) } } fn parse_fn_call(&mut self) -> Result> { self.force_token(T::LeftParen)?; let mut params = Vec::new(); loop { let expr = match self.lexer.peek_token()?.data { T::RightParen => { self.lexer.next_token()?; break }, _ => self.parse_expr()? }; params.push(expr); let next = self.lexer.next_token()?; match next.data { T::Comma => continue, T::RightParen => break, _ => return Err(error!("unexpected token '{next}'").pos(next.pos)) }; } Ok(params) } fn parse_index(&mut self) -> Result> { self.force_token(T::LeftBrack)?; let mut indicies = Vec::new(); loop { let expr = match self.lexer.peek_token()?.data { T::RightBrack => { self.lexer.next_token()?; break }, _ => self.parse_expr()? }; indicies.push(expr); let next = self.lexer.next_token()?; match next.data { T::SemiColon => continue, T::RightBrack => break, _ => return Err(error!("unexpected token '{next}'").pos(next.pos)) }; } Ok(indicies) } fn parse_matrix_part(&mut self) -> Result> { let mut part = Vec::new(); loop { let expr = match self.lexer.peek_token()?.data { T::SemiColon => break, T::RightBrack => break, _ => self.parse_expr()? }; part.push(expr); match self.lexer.peek_token()?.data { T::Comma => { self.lexer.next_token()?; }, _ => {}, }; } Ok(part) } fn parse_matrix(&mut self) -> Result { let pos = self.lexer.peek_token()?.pos; self.force_token(T::LeftBrack)?; let mut parts = Vec::new(); loop { let part = self.parse_matrix_part()?; parts.push(part); let next = self.lexer.next_token()?; match next.data { T::SemiColon => continue, T::RightBrack => break, _ => return Err(error!("unexpected token '{next}'").pos(next.pos)) }; } if parts.len() == 1 { Ok((E::List(parts.pop().expect("bypassed vec length")), pos).into()) } else { let codomain = parts.len(); let domain = parts[0].len(); for part in parts.iter() { if part.len() != domain { return Err(error!("matrix row domains do not match: {} != {}", domain, part.len()).pos(pos)) } } let mut data = Vec::new(); parts.reverse(); while let Some(part) = parts.pop() { data.extend(part); } Ok((E::Matrix((domain, codomain, data)), pos).into()) } } fn parse_table_key(&mut self) -> Result { let tok = self.lexer.next_token()?; Ok(match tok.data { T::LeftBrack => { let expr = self.parse_expr()?; self.force_token(T::RightBrack)?; expr }, T::Ident(ident) => (E::Literal(V::String(ident.to_string().into())), tok.pos).into(), T::String(string) => (E::Literal(V::String(string.to_string().into())), tok.pos).into(), t => return Err(error!("unexpected token '{t}'").pos(tok.pos)) }) } fn parse_table(&mut self) -> Result { let pos = self.lexer.peek_token()?.pos; self.force_token(T::Colon)?; self.force_token(T::LeftBrace)?; let mut table = Vec::new(); if self.lexer.peek_token()?.data == T::RightBrace { self.lexer.next_token()?; return Ok((E::Table(table), pos).into()) } loop { let key = self.parse_table_key()?; self.force_token(T::Assign)?; let value = self.parse_expr()?; table.push((key, value)); let next = self.lexer.next_token()?; match next.data { T::Comma => continue, T::RightBrace => break, _ => return Err(error!("unexpected token '{next}'").pos(next.pos)) } } Ok((E::Table(table), pos).into()) } fn parse_paren(&mut self) -> Result { self.force_token(T::LeftParen)?; let expr = self.parse_expr()?; self.force_token(T::RightParen)?; Ok(expr) } fn parse_params(&mut self) -> Result<(Vec<(Rc, Position)>, bool)> { let tok = self.lexer.next_token()?; match tok.data { T::Ident(ident) => { let params = vec![(ident, tok.pos)]; if self.lexer.peek_token()?.data == T::Varadic { self.lexer.next_token()?; return Ok((params, true)) } else { return Ok((params, false)) } } T::LeftParen => (), t => return Err(error!("unexpected token '{t}'").pos(tok.pos)) } let mut params = Vec::new(); let mut varadic = false; if self.lexer.peek_token()?.data == T::RightParen { return Ok((params, varadic)); } loop { let ident = self.parse_ident()?; params.push(ident); let next = self.lexer.next_token()?; match next.data { T::Varadic => { varadic = true; self.force_token(T::RightParen)?; break; } T::Comma => continue, T::RightParen => break, _ => return Err(error!("unexpected token '{next}'").pos(next.pos)) } } Ok((params, varadic)) } fn parse_ident(&mut self) -> Result { let next = self.lexer.next_token()?; if let T::Ident(ident) = next.data { Ok((ident, next.pos)) } else { Err(error!("unexpected token '{next}'").pos(next.pos)) } } fn parse_wrapped_ident(&mut self) -> Result { if self.lexer.peek_token()?.data == T::LeftParen { self.lexer.next_token()?; let ident = self.parse_ident()?; self.force_token(T::RightParen)?; Ok(ident) } else { self.parse_ident() } } fn parse_ident_nl(&mut self) -> Result { let next = self.lexer.next_token_nl()?; if let T::Ident(ident) = next.data { Ok((ident, next.pos)) } else { Err(error!("unexpected token '{next}'").pos(next.pos)) } } fn parse_function(&mut self) -> Result { let pos = self.lexer.peek_token()?.pos; self.force_token(T::Function)?; let ident = self.parse_ident()?; let (params, varadic) = match self.lexer.peek_token()?.data { T::LeftBrace => (vec![], false), _ => self.parse_params()?, }; let expr = self.parse_expr()?; Ok((E::Function(ident, params, Box::new(expr), varadic), pos).into()) } fn parse_lambda(&mut self) -> Result { let pos = self.lexer.peek_token()?.pos; self.force_token(T::Backslash)?; let (params, varadic) = match self.lexer.peek_token()?.data { T::Arrow => (vec![], false), _ => self.parse_params()?, }; self.force_token(T::Arrow)?; let expr = self.parse_expr()?; Ok((E::Lambda(params, Box::new(expr), varadic), pos).into()) } fn parse_do_while(&mut self) -> Result { let pos = self.lexer.peek_token()?.pos; self.force_token(T::Do)?; let expr = Box::new(self.parse_expr()?); self.force_token(T::While)?; let cond = Box::new(self.parse_expr()?); Ok((E::DoWhile(expr, cond), pos).into()) } fn parse_while(&mut self) -> Result { let pos = self.lexer.peek_token()?.pos; self.force_token(T::While)?; let cond = Box::new(self.parse_expr()?); let expr = Box::new(self.parse_expr()?); Ok((E::While(cond, expr), pos).into()) } fn parse_loop(&mut self) -> Result { let pos = self.lexer.peek_token()?.pos; self.force_token(T::Loop)?; let expr = self.parse_expr()?; Ok((E::Loop(Box::new(expr)), pos).into()) } fn parse_for(&mut self) -> Result { let pos = self.lexer.peek_token()?.pos; self.force_token(T::For)?; let name = self.parse_ident()?; self.force_token(T::In)?; let cond = Box::new(self.parse_expr()?); let expr = Box::new(self.parse_expr()?); Ok((E::For(name, cond, expr), pos).into()) } fn parse_if(&mut self) -> Result { let pos = self.lexer.peek_token()?.pos; self.force_token(T::If)?; let cond = Box::new(self.parse_expr()?); let expr = Box::new(self.parse_expr()?); if self.lexer.peek_token()?.data != T::Else { return Ok((E::If(cond, expr, None), pos).into()) } self.lexer.next_token()?; if self.lexer.peek_token()?.data == T::If { Ok((E::If(cond, expr, Some(Box::new(self.parse_if()?))), pos).into()) } else { Ok((E::If(cond, expr, Some(Box::new(self.parse_expr()?))), pos).into()) } } fn parse_let(&mut self) -> Result { let pos = self.lexer.peek_token()?.pos; self.force_token(T::Let)?; let ident = self.parse_ident_nl()?; if self.lexer.peek_token_nl()?.data == T::Assign { self.force_token_nl(T::Assign)?; Ok((E::Let(ident, Box::new(self.parse_expr()?)), pos).into()) } else { Ok((E::Let(ident, Box::new((E::Literal(V::Nil), pos).into())), pos).into()) } } fn parse_const(&mut self) -> Result { let pos = self.lexer.peek_token()?.pos; self.force_token(T::Const)?; let ident = self.parse_ident_nl()?; self.force_token(T::Assign)?; let expr = self.parse_expr()?; Ok((E::Const(ident, Box::new(expr)), pos).into()) } fn parse_return(&mut self) -> Result { let pos = self.lexer.peek_token()?.pos; self.force_token(T::Return)?; Ok((E::Return(Box::new(self.parse_expr()?)), pos).into()) } fn parse_block(&mut self) -> Result { let mut block = Vec::new(); let pos = self.lexer.peek_token()?.pos; self.force_token(T::LeftBrace)?; loop { let expr = match self.lexer.peek_token()?.data { T::RightBrace => { self.lexer.next_token()?; break }, T::SemiColon => { self.lexer.next_token()?; continue; } _ => self.parse_expr()? }; block.push(expr); let next = self.lexer.next_token_nl()?; match next.data { T::SemiColon => continue, T::RightBrace => break, _ => return Err(error!("expected a semicolon").pos(next.pos)) } } Ok((E::Block(block), pos).into()) } fn parse_try(&mut self) -> Result { let pos = self.lexer.peek_token()?.pos; self.force_token(T::Try)?; let expr = self.parse_expr()?; self.force_token(T::Catch)?; let ident = self.parse_wrapped_ident()?; let catch = self.parse_expr()?; Ok((E::Try(Box::new(expr), ident, Box::new(catch)), pos).into()) } fn parse_value(&mut self) -> Result { let tok = self.lexer.next_token()?; let data = match tok.data { T::Nil => E::Literal(V::Nil), T::Int(i) => E::Literal(V::Int(i)), T::Float(f) => E::Literal(V::Float(f)), T::Complex(c) => E::Literal(V::Complex(Complex64::new(0.0, c))), T::Regex(r) => E::Literal(V::Regex(Rc::new(r.into()))), T::String(s) => E::Literal(V::String(s.to_string().into())), T::True => E::Literal(V::Bool(true)), T::False => E::Literal(V::Bool(false)), T::Ident(ident) => E::Ident(ident), t => return Err(error!("unexpected token '{t}'").pos(tok.pos)) }; Ok((data, tok.pos).into()) } fn parse_term(&mut self) -> Result { use T::*; match self.lexer.peek_token()?.data { Function => self.parse_function(), Backslash => self.parse_lambda(), Do => self.parse_do_while(), While => self.parse_while(), For => self.parse_for(), Let => self.parse_let(), Const => self.parse_const(), LeftBrace => self.parse_block(), Return => self.parse_return(), If => self.parse_if(), Loop => self.parse_loop(), Try => self.parse_try(), Break => { let next = self.lexer.next_token()?; Ok((E::Break, next.pos).into()) }, Continue => { let next = self.lexer.next_token()?; Ok((E::Continue, next.pos).into()) }, LeftBrack => self.parse_matrix(), Colon => self.parse_table(), LeftParen => self.parse_paren(), _ => self.parse_value(), } } fn parse_expr_expr_access(&mut self) -> Result { let mut expr = self.parse_term()?; let pos = expr.pos; loop { let tok = self.lexer.peek_token()?; match tok.data { T::Access => { self.force_token(T::Access)?; let temp = self.parse_ident()?; expr = (E::FieldAccess(Box::new(expr), temp), pos).into(); }, _ => break } } Ok(expr) } fn parse_expr_call(&mut self) -> Result { let mut expr = self.parse_expr_expr_access()?; let pos = expr.pos; loop { let tok = self.lexer.peek_token()?; match tok.data { T::LeftBrack => { let index = self.parse_index()?; expr = (E::Index(Box::new(expr), index), pos).into(); }, T::LeftParen => { let params = self.parse_fn_call()?; expr = (E::FnCall(Box::new(expr), params), pos).into(); } _ => break } } Ok(expr) } fn parse_expr_unary(&mut self) -> Result { let tok = self.lexer.peek_token_nl()?; Ok(match tok.data { T::Not => { self.lexer.next_token()?; (E::UnaryOp(Box::new(self.parse_expr_unary()?), UnaryOp::Not), tok.pos).into() } T::Subtract => { self.lexer.next_token()?; (E::UnaryOp(Box::new(self.parse_expr_unary()?), UnaryOp::Negate), tok.pos).into() } _ => self.parse_expr_call()? }) } fn parse_expr_pow(&mut self) -> Result { expr_parser_reverse!( self, T::Power, parse_expr_unary, parse_expr_pow ) } fn parse_expr_mult(&mut self) -> Result { expr_parser!(self, T::Multiply | T::Divide | T::Modulo, parse_expr_pow) } fn parse_expr_add(&mut self) -> Result { expr_parser!(self, T::Add | T::Subtract, parse_expr_mult) } fn parse_expr_shift(&mut self) -> Result { expr_parser!( self, T::BitwiseShiftLeft | T::BitwiseShiftRight, parse_expr_add ) } fn parse_expr_bit_and(&mut self) -> Result { expr_parser!(self, T::BitwiseAnd, parse_expr_shift) } fn parse_expr_bit_xor(&mut self) -> Result { expr_parser!(self, T::BitwiseXor, parse_expr_bit_and) } fn parse_expr_bit_or(&mut self) -> Result { expr_parser!(self, T::BitwiseOr, parse_expr_bit_xor) } fn parse_expr_compare(&mut self) -> Result { expr_parser!( self, T::Equal | T::NotEqual | T::LessThan | T::GreaterThan | T::LessEqual | T::GreaterEqual, parse_expr_bit_or ) } fn parse_expr_and(&mut self) -> Result { let mut expr = self.parse_expr_compare()?; let pos = expr.pos; loop { let tok = self.lexer.peek_token()?; match tok.data { T::And => { self.force_token(T::And)?; let temp = self.parse_expr_compare()?; expr = (E::And(Box::new(expr), Box::new(temp)), pos).into(); }, _ => break } } Ok(expr) } fn parse_expr_or(&mut self) -> Result { let mut expr = self.parse_expr_and()?; let pos = expr.pos; loop { let tok = self.lexer.peek_token()?; match tok.data { T::Or => { self.force_token(T::Or)?; let temp = self.parse_expr_and()?; expr = (E::Or(Box::new(expr), Box::new(temp)), pos).into(); }, _ => break } } Ok(expr) } fn parse_expr_range(&mut self) -> Result { let expr = self.parse_expr_or()?; let pos = expr.pos; match self.lexer.peek_token()?.data { T::Range => { self.lexer.next_token()?; let temp = self.parse_expr_or()?; Ok((E::BinaryOp(Box::new(expr), Box::new(temp), BinaryOp::Range), pos).into()) }, T::RangeEq => { self.lexer.next_token()?; let temp = self.parse_expr_or()?; Ok((E::BinaryOp(Box::new(expr), Box::new(temp), BinaryOp::RangeEq), pos).into()) }, _ => Ok(expr) } } fn parse_expr_op_assign(&mut self) -> Result { use BinaryOp as B; let expr = self.parse_expr_range()?; let tok = self.lexer.peek_token_nl()?; let pos = tok.pos; let data: ExprData = match tok.data { T::Assign => { self.lexer.next_token_nl()?; E::Assign(Box::new(expr.clone()),Box::new(self.parse_expr()?)) }, T::AssignAnd => { self.lexer.next_token_nl()?; E::Assign(Box::new(expr.clone()),Box::new((E::And(Box::new(expr), Box::new(self.parse_expr()?)), pos).into())) }, T::AssignOr => { self.lexer.next_token_nl()?; E::Assign(Box::new(expr.clone()),Box::new((E::Or(Box::new(expr), Box::new(self.parse_expr()?)),pos).into())) }, T::AssignAdd => { self.lexer.next_token_nl()?; E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::Add),pos).into())) }, T::AssignSubtract => { self.lexer.next_token_nl()?; E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::Subtract),pos).into())) }, T::AssignMultiply => { self.lexer.next_token_nl()?; E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::Multiply),pos).into())) }, T::AssignDivide => { self.lexer.next_token_nl()?; E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::Divide),pos).into())) }, T::AssignModulo => { self.lexer.next_token_nl()?; E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::Modulo),pos).into())) }, T::AssignPower => { self.lexer.next_token_nl()?; E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::Power),pos).into())) }, T::AssignBitwiseAnd => { self.lexer.next_token_nl()?; E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::BitwiseAnd),pos).into())) }, T::AssignBitwiseOr => { self.lexer.next_token_nl()?; E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::BitwiseOr),pos).into())) }, T::AssignBitwiseXor => { self.lexer.next_token_nl()?; E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::BitwiseXor),pos).into())) }, T::AssignBitwiseShiftLeft => { self.lexer.next_token_nl()?; E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::BitwiseShiftLeft),pos).into())) }, T::AssignBitwiseShiftRight => { self.lexer.next_token_nl()?; E::Assign(Box::new(expr.clone()),Box::new((E::BinaryOp(Box::new(expr), Box::new(self.parse_expr()?), B::BitwiseShiftRight),pos).into())) }, _ => expr.data }; Ok((data, pos).into()) } fn parse_expr(&mut self) -> Result { let mut expr = self.parse_expr_op_assign()?; let pos = expr.pos; loop { let tok = self.lexer.peek_token()?; match tok.data { T::Pipe => { self.force_token(T::Pipe)?; let temp = self.parse_expr_op_assign()?; expr = (E::Pipeline(Box::new(expr), Box::new(temp)), pos).into(); }, _ => break } } Ok(expr) } fn parse_root(&mut self) -> Result { let mut block = Vec::new(); loop { match self.lexer.peek_token()?.data { T::Eof => break, T::SemiColon => { self.lexer.next_token()?; continue } _ => {} }; let expr = self.parse_expr()?; block.push(expr); let next = self.lexer.next_token_nl()?; match next.data { T::Eof => break, T::SemiColon => continue, _ => return Err(error!("expected a semicolon").pos(next.pos)) }; } Ok((E::Block(block), Position::default()).into()) } pub fn parse>(&mut self, into: T) -> Result { let lexer = Lexer::new(into); self.lexer = lexer; let ast = self.parse_root()?; if self.optimize { Ok(optimize(ast)?) } else { Ok(ast) } } }