diff options
Diffstat (limited to '')
-rw-r--r-- | matrix-lang/src/lex.rs (renamed from matrix/src/lex.rs) | 177 |
1 files changed, 91 insertions, 86 deletions
diff --git a/matrix/src/lex.rs b/matrix-lang/src/lex.rs index 8a07234..b2487ad 100644 --- a/matrix/src/lex.rs +++ b/matrix-lang/src/lex.rs @@ -1,6 +1,5 @@ -use std::{rc::Rc, fmt::Debug}; -use regex::Regex; -use crate::Result; +use std::fmt::{Debug, Display}; +use crate::prelude::*; pub struct RegexToken { regex: Regex @@ -30,6 +29,36 @@ impl From<RegexToken> for Regex { } } +#[derive(Debug, Clone, PartialEq, Eq, Copy)] +pub struct Position { + pub row: usize, + pub col: usize, +} + +impl Default for Position { + fn default() -> Self { + Self { row: 1, col: 1 } + } +} + +impl Display for Position { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}", self.row, self.col) + } +} + +impl Display for TokenData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{self:?}") + } +} + +impl Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.data) + } +} + #[derive(Debug, PartialEq)] pub enum TokenData { //syntax @@ -106,6 +135,7 @@ pub enum TokenData { Else, While, Let, + Const, Function, True, False, @@ -133,48 +163,6 @@ pub struct Token { pub blen: usize, } -#[derive(Debug)] -pub enum Error { - UnexpectedCharacter(char), - ExpectedChar(char, char), - InvalidCodepoint, - UnexpectedEof, - InvalidDigit(char), - InvalidStringEscape(char), - InvalidRegex(anyhow::Error), - InvalidNumber(String), -} - -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use Error::*; - match self { - UnexpectedCharacter(char) => write!(f, "Unexpected char: '{char}'"), - UnexpectedEof => write!(f, "Unexpected end of file"), - ExpectedChar(expected, got) => write!(f, "Expected char: '{expected}', instead got: '{got}'"), - InvalidCodepoint => write!(f, "Invalid codepoint"), - InvalidDigit(char) => write!(f, "Invalid digit: '{char}'"), - InvalidStringEscape(char) => write!(f, "Invalid string escape: '\\{char}"), - InvalidRegex(err) => write!(f, "{err}"), - InvalidNumber(num) => write!(f, "Invalid number: '{num}'") - } - } -} - -impl std::error::Error for Error {} - -#[derive(Debug, Clone, PartialEq, Eq, Copy)] -pub struct Position { - pub row: usize, - pub col: usize, -} - -impl Default for Position { - fn default() -> Self { - Self { row: 1, col: 1 } - } -} - pub struct Lexer { pub index: usize, len: usize, @@ -203,6 +191,12 @@ impl<T: Into<String>> From<T> for Lexer { } } +macro_rules! error { + ($($arg:tt)*) => { + exception!(PARSE_EXCEPTION, $($arg)*) + }; +} + impl Lexer { pub fn new<T: Into<String>>(input: T) -> Self { let data: Vec<char> = input.into().chars().collect(); @@ -239,7 +233,7 @@ impl Lexer { fn next_not_eof(&mut self) -> Result<char> { let c = self.next(); if c == '\0' { - return Err(Error::UnexpectedEof.into()) + return Err(error!("unexpected end of file")) } Ok(c) } @@ -247,7 +241,7 @@ impl Lexer { fn next_expect(&mut self, expected: char) -> Result<char> { let c = self.next(); if c != expected { - return Err(Error::ExpectedChar(expected, c).into()) + return Err(error!("expected character '{c}'")) } Ok(c) } @@ -259,7 +253,6 @@ impl Lexer { } fn lex_string(&mut self, delimit: char) -> Result<Rc<str>> { - use Error::*; let mut buf = String::new(); @@ -291,8 +284,8 @@ impl Lexer { let n1 = self.next_not_eof()?; let n2 = self.next_not_eof()?; buf.push(char::from_u32( - n1.to_digit(16).ok_or(InvalidDigit(n1))? * 16 + - n2.to_digit(16).ok_or(InvalidDigit(n2))? + n1.to_digit(16).ok_or(error!("invalid digit '{n1}'"))? * 16 + + n2.to_digit(16).ok_or(error!("invalid digit '{n2}'"))? ).unwrap()); }, 'u' => { @@ -302,15 +295,15 @@ impl Lexer { let c = self.next_not_eof()?; if c == '}' { break } if n >= 0x1000_0000_u32 { - return Err(InvalidCodepoint.into()) + return Err(error!("invalid utf8 codepoint '{n}'")) } - n = n * 16 + c.to_digit(16).ok_or::<crate::Error>(InvalidDigit(c).into())?; + n = n * 16 + c.to_digit(16).ok_or(error!("invalid digit '{c}'"))?; } - let ch = char::from_u32(n).ok_or::<crate::Error>(InvalidCodepoint.into())?; + let ch = char::from_u32(n).ok_or(error!("invalid codepoint '{n}'"))?; buf.push(ch); }, - _ => return Err(InvalidStringEscape(next).into()) + _ => return Err(error!("invalid string escape '\\{next}'")) } } @@ -318,13 +311,12 @@ impl Lexer { } fn lex_ident(&mut self, initial: char) -> Result<TokenData> { - use Error as E; use TokenData as T; let mut buf = std::string::String::new(); if !initial.is_initial_ident() { - return Err(E::UnexpectedCharacter(initial).into()) + return Err(error!("unexpected character '{initial}'")) } buf.push(initial); @@ -342,6 +334,7 @@ impl Lexer { "else" => T::Else, "while" => T::While, "let" => T::Let, + "const" => T::Const, "fn" | "function" => T::Function, "true" => T::True, "false" => T::False, @@ -364,7 +357,6 @@ impl Lexer { fn lex_radix(&mut self, radix: i64, radix_char: char) -> Result<TokenData> { use TokenData as T; - use Error as E; let mut n = 0i64; let mut char_found = false; @@ -375,7 +367,7 @@ impl Lexer { n = n * radix + (i as i64); char_found = true; } else if self.peek().is_ident() { - return Err(E::InvalidDigit(self.peek()).into()) + return Err(error!("invalid digit '{}'", self.peek())) } else { break; } @@ -384,13 +376,11 @@ impl Lexer { if char_found { Ok(T::Int(n)) } else { - Err(E::InvalidNumber(format!("0{radix_char}")).into()) + Err(error!("invalid number radix specifier '0{radix_char}'")) } } fn lex_number(&mut self, initial: char) -> Result<TokenData> { - use Error as E; - if initial == '0' { match self.peek() { 'x' => { @@ -459,7 +449,7 @@ impl Lexer { } if self.peek().is_ident() { - return Err(E::UnexpectedCharacter(self.peek()).into()) + return Err(error!("unexpected character '{}'", self.peek())) } if let Ok(int) = buf.parse::<i64>() { @@ -478,23 +468,11 @@ impl Lexer { return Ok(T::Float(float)) } - Err(E::InvalidNumber(buf).into()) + Err(error!("invalid number '{buf}'")) } - fn peek_token_impl(&mut self, ignore_newlines: bool) -> Result<Token> { - let idx = self.index; - let pos = self.pos; - let bidx = self.byte_len; - let token = self.next_token_impl(ignore_newlines); - self.index = idx; - self.pos = pos; - self.byte_len = bidx; - token - } - - fn next_token_impl(&mut self, ignore_newlines: bool) -> Result<Token> { + fn read_token(&mut self, ignore_newlines: bool) -> Result<Token> { use TokenData as T; - use Error as E; self.skip_whitespace(ignore_newlines); @@ -705,7 +683,7 @@ impl Lexer { self.next(); T::Regex(regex::Regex::new(&self.lex_string(next)?) .map(|e| e.into()) - .map_err(|e| E::InvalidRegex(e.into()))?) + .map_err(|e| error!("invalid regex: '{e}'"))?) } _ => { self.lex_ident(char)? @@ -744,7 +722,6 @@ impl Lexer { let str_end = self.index; let byte_end = self.byte_len; let str = self.data[str_start..str_end].to_owned().into_iter().collect(); - Ok(Token { data, pos, @@ -754,19 +731,47 @@ impl Lexer { }) } - pub fn peek_token(&mut self) -> Result<Token> { - self.peek_token_impl(true) + pub fn next_token(&mut self) -> Result<Token> { + let pos = self.pos; + match self.read_token(true) { + Ok(token) => Ok(token), + Err(e) => Err(e.pos(pos)), + } } - pub fn next_token(&mut self) -> Result<Token> { - self.next_token_impl(true) + pub fn next_token_nl(&mut self) -> Result<Token> { + let pos = self.pos; + match self.read_token(false) { + Ok(token) => Ok(token), + Err(e) => Err(e.pos(pos)), + } } - pub fn peek_token_nl(&mut self) -> Result<Token> { - self.peek_token_impl(false) + pub fn peek_token(&mut self) -> Result<Token> { + let idx = self.index; + let pos = self.pos; + let bidx = self.byte_len; + let token = self.read_token(true); + self.index = idx; + self.pos = pos; + self.byte_len = bidx; + match token { + Ok(token) => Ok(token), + Err(e) => Err(e.pos(pos)), + } } - pub fn next_token_nl(&mut self) -> Result<Token> { - self.next_token_impl(false) + pub fn peek_token_nl(&mut self) -> Result<Token> { + let idx = self.index; + let pos = self.pos; + let bidx = self.byte_len; + let token = self.read_token(false); + self.index = idx; + self.pos = pos; + self.byte_len = bidx; + match token { + Ok(token) => Ok(token), + Err(e) => Err(e.pos(pos)), + } } } |