summaryrefslogtreecommitdiff
path: root/matrix-lang/src/lex.rs
diff options
context:
space:
mode:
authorFreya Murphy <freya@freyacat.org>2024-02-29 17:04:28 -0500
committerFreya Murphy <freya@freyacat.org>2024-02-29 17:04:28 -0500
commit5d2747e26f51cc2344a6bd95f93457248fdfebd8 (patch)
tree8755b4068166c3854d26817683ce438a771ab319 /matrix-lang/src/lex.rs
parentmore mat, sys, and os stdlib functions, better matrix printing, other fixes (diff)
downloadmatrix-5d2747e26f51cc2344a6bd95f93457248fdfebd8.tar.gz
matrix-5d2747e26f51cc2344a6bd95f93457248fdfebd8.tar.bz2
matrix-5d2747e26f51cc2344a6bd95f93457248fdfebd8.zip
fin prob
Diffstat (limited to '')
-rw-r--r--matrix-lang/src/lex.rs (renamed from matrix/src/lex.rs)177
1 files changed, 91 insertions, 86 deletions
diff --git a/matrix/src/lex.rs b/matrix-lang/src/lex.rs
index 8a07234..b2487ad 100644
--- a/matrix/src/lex.rs
+++ b/matrix-lang/src/lex.rs
@@ -1,6 +1,5 @@
-use std::{rc::Rc, fmt::Debug};
-use regex::Regex;
-use crate::Result;
+use std::fmt::{Debug, Display};
+use crate::prelude::*;
pub struct RegexToken {
regex: Regex
@@ -30,6 +29,36 @@ impl From<RegexToken> for Regex {
}
}
+#[derive(Debug, Clone, PartialEq, Eq, Copy)]
+pub struct Position {
+ pub row: usize,
+ pub col: usize,
+}
+
+impl Default for Position {
+ fn default() -> Self {
+ Self { row: 1, col: 1 }
+ }
+}
+
+impl Display for Position {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}:{}", self.row, self.col)
+ }
+}
+
+impl Display for TokenData {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{self:?}")
+ }
+}
+
+impl Display for Token {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self.data)
+ }
+}
+
#[derive(Debug, PartialEq)]
pub enum TokenData {
//syntax
@@ -106,6 +135,7 @@ pub enum TokenData {
Else,
While,
Let,
+ Const,
Function,
True,
False,
@@ -133,48 +163,6 @@ pub struct Token {
pub blen: usize,
}
-#[derive(Debug)]
-pub enum Error {
- UnexpectedCharacter(char),
- ExpectedChar(char, char),
- InvalidCodepoint,
- UnexpectedEof,
- InvalidDigit(char),
- InvalidStringEscape(char),
- InvalidRegex(anyhow::Error),
- InvalidNumber(String),
-}
-
-impl std::fmt::Display for Error {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- use Error::*;
- match self {
- UnexpectedCharacter(char) => write!(f, "Unexpected char: '{char}'"),
- UnexpectedEof => write!(f, "Unexpected end of file"),
- ExpectedChar(expected, got) => write!(f, "Expected char: '{expected}', instead got: '{got}'"),
- InvalidCodepoint => write!(f, "Invalid codepoint"),
- InvalidDigit(char) => write!(f, "Invalid digit: '{char}'"),
- InvalidStringEscape(char) => write!(f, "Invalid string escape: '\\{char}"),
- InvalidRegex(err) => write!(f, "{err}"),
- InvalidNumber(num) => write!(f, "Invalid number: '{num}'")
- }
- }
-}
-
-impl std::error::Error for Error {}
-
-#[derive(Debug, Clone, PartialEq, Eq, Copy)]
-pub struct Position {
- pub row: usize,
- pub col: usize,
-}
-
-impl Default for Position {
- fn default() -> Self {
- Self { row: 1, col: 1 }
- }
-}
-
pub struct Lexer {
pub index: usize,
len: usize,
@@ -203,6 +191,12 @@ impl<T: Into<String>> From<T> for Lexer {
}
}
+macro_rules! error {
+ ($($arg:tt)*) => {
+ exception!(PARSE_EXCEPTION, $($arg)*)
+ };
+}
+
impl Lexer {
pub fn new<T: Into<String>>(input: T) -> Self {
let data: Vec<char> = input.into().chars().collect();
@@ -239,7 +233,7 @@ impl Lexer {
fn next_not_eof(&mut self) -> Result<char> {
let c = self.next();
if c == '\0' {
- return Err(Error::UnexpectedEof.into())
+ return Err(error!("unexpected end of file"))
}
Ok(c)
}
@@ -247,7 +241,7 @@ impl Lexer {
fn next_expect(&mut self, expected: char) -> Result<char> {
let c = self.next();
if c != expected {
- return Err(Error::ExpectedChar(expected, c).into())
+ return Err(error!("expected character '{c}'"))
}
Ok(c)
}
@@ -259,7 +253,6 @@ impl Lexer {
}
fn lex_string(&mut self, delimit: char) -> Result<Rc<str>> {
- use Error::*;
let mut buf = String::new();
@@ -291,8 +284,8 @@ impl Lexer {
let n1 = self.next_not_eof()?;
let n2 = self.next_not_eof()?;
buf.push(char::from_u32(
- n1.to_digit(16).ok_or(InvalidDigit(n1))? * 16 +
- n2.to_digit(16).ok_or(InvalidDigit(n2))?
+ n1.to_digit(16).ok_or(error!("invalid digit '{n1}'"))? * 16 +
+ n2.to_digit(16).ok_or(error!("invalid digit '{n2}'"))?
).unwrap());
},
'u' => {
@@ -302,15 +295,15 @@ impl Lexer {
let c = self.next_not_eof()?;
if c == '}' { break }
if n >= 0x1000_0000_u32 {
- return Err(InvalidCodepoint.into())
+ return Err(error!("invalid utf8 codepoint '{n}'"))
}
- n = n * 16 + c.to_digit(16).ok_or::<crate::Error>(InvalidDigit(c).into())?;
+ n = n * 16 + c.to_digit(16).ok_or(error!("invalid digit '{c}'"))?;
}
- let ch = char::from_u32(n).ok_or::<crate::Error>(InvalidCodepoint.into())?;
+ let ch = char::from_u32(n).ok_or(error!("invalid codepoint '{n}'"))?;
buf.push(ch);
},
- _ => return Err(InvalidStringEscape(next).into())
+ _ => return Err(error!("invalid string escape '\\{next}'"))
}
}
@@ -318,13 +311,12 @@ impl Lexer {
}
fn lex_ident(&mut self, initial: char) -> Result<TokenData> {
- use Error as E;
use TokenData as T;
let mut buf = std::string::String::new();
if !initial.is_initial_ident() {
- return Err(E::UnexpectedCharacter(initial).into())
+ return Err(error!("unexpected character '{initial}'"))
}
buf.push(initial);
@@ -342,6 +334,7 @@ impl Lexer {
"else" => T::Else,
"while" => T::While,
"let" => T::Let,
+ "const" => T::Const,
"fn" | "function" => T::Function,
"true" => T::True,
"false" => T::False,
@@ -364,7 +357,6 @@ impl Lexer {
fn lex_radix(&mut self, radix: i64, radix_char: char) -> Result<TokenData> {
use TokenData as T;
- use Error as E;
let mut n = 0i64;
let mut char_found = false;
@@ -375,7 +367,7 @@ impl Lexer {
n = n * radix + (i as i64);
char_found = true;
} else if self.peek().is_ident() {
- return Err(E::InvalidDigit(self.peek()).into())
+ return Err(error!("invalid digit '{}'", self.peek()))
} else {
break;
}
@@ -384,13 +376,11 @@ impl Lexer {
if char_found {
Ok(T::Int(n))
} else {
- Err(E::InvalidNumber(format!("0{radix_char}")).into())
+ Err(error!("invalid number radix specifier '0{radix_char}'"))
}
}
fn lex_number(&mut self, initial: char) -> Result<TokenData> {
- use Error as E;
-
if initial == '0' {
match self.peek() {
'x' => {
@@ -459,7 +449,7 @@ impl Lexer {
}
if self.peek().is_ident() {
- return Err(E::UnexpectedCharacter(self.peek()).into())
+ return Err(error!("unexpected character '{}'", self.peek()))
}
if let Ok(int) = buf.parse::<i64>() {
@@ -478,23 +468,11 @@ impl Lexer {
return Ok(T::Float(float))
}
- Err(E::InvalidNumber(buf).into())
+ Err(error!("invalid number '{buf}'"))
}
- fn peek_token_impl(&mut self, ignore_newlines: bool) -> Result<Token> {
- let idx = self.index;
- let pos = self.pos;
- let bidx = self.byte_len;
- let token = self.next_token_impl(ignore_newlines);
- self.index = idx;
- self.pos = pos;
- self.byte_len = bidx;
- token
- }
-
- fn next_token_impl(&mut self, ignore_newlines: bool) -> Result<Token> {
+ fn read_token(&mut self, ignore_newlines: bool) -> Result<Token> {
use TokenData as T;
- use Error as E;
self.skip_whitespace(ignore_newlines);
@@ -705,7 +683,7 @@ impl Lexer {
self.next();
T::Regex(regex::Regex::new(&self.lex_string(next)?)
.map(|e| e.into())
- .map_err(|e| E::InvalidRegex(e.into()))?)
+ .map_err(|e| error!("invalid regex: '{e}'"))?)
}
_ => {
self.lex_ident(char)?
@@ -744,7 +722,6 @@ impl Lexer {
let str_end = self.index;
let byte_end = self.byte_len;
let str = self.data[str_start..str_end].to_owned().into_iter().collect();
-
Ok(Token {
data,
pos,
@@ -754,19 +731,47 @@ impl Lexer {
})
}
- pub fn peek_token(&mut self) -> Result<Token> {
- self.peek_token_impl(true)
+ pub fn next_token(&mut self) -> Result<Token> {
+ let pos = self.pos;
+ match self.read_token(true) {
+ Ok(token) => Ok(token),
+ Err(e) => Err(e.pos(pos)),
+ }
}
- pub fn next_token(&mut self) -> Result<Token> {
- self.next_token_impl(true)
+ pub fn next_token_nl(&mut self) -> Result<Token> {
+ let pos = self.pos;
+ match self.read_token(false) {
+ Ok(token) => Ok(token),
+ Err(e) => Err(e.pos(pos)),
+ }
}
- pub fn peek_token_nl(&mut self) -> Result<Token> {
- self.peek_token_impl(false)
+ pub fn peek_token(&mut self) -> Result<Token> {
+ let idx = self.index;
+ let pos = self.pos;
+ let bidx = self.byte_len;
+ let token = self.read_token(true);
+ self.index = idx;
+ self.pos = pos;
+ self.byte_len = bidx;
+ match token {
+ Ok(token) => Ok(token),
+ Err(e) => Err(e.pos(pos)),
+ }
}
- pub fn next_token_nl(&mut self) -> Result<Token> {
- self.next_token_impl(false)
+ pub fn peek_token_nl(&mut self) -> Result<Token> {
+ let idx = self.index;
+ let pos = self.pos;
+ let bidx = self.byte_len;
+ let token = self.read_token(false);
+ self.index = idx;
+ self.pos = pos;
+ self.byte_len = bidx;
+ match token {
+ Ok(token) => Ok(token),
+ Err(e) => Err(e.pos(pos)),
+ }
}
}