diff options
Diffstat (limited to 'audio/src/parse/lexer.rs')
| -rw-r--r-- | audio/src/parse/lexer.rs | 264 |
1 files changed, 264 insertions, 0 deletions
diff --git a/audio/src/parse/lexer.rs b/audio/src/parse/lexer.rs new file mode 100644 index 0000000..59bd264 --- /dev/null +++ b/audio/src/parse/lexer.rs @@ -0,0 +1,264 @@ +use std::{fmt, iter::Peekable, str::Chars}; + +use super::{ + ParserError, Result, + pos::{Pos, Span}, +}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum TokenKind { + Eof, + LineSeparator, + + MacroDefine, + MacroEnd, + Argument, + + PulseA, + PulseB, + Triangle, + Noise, + + Volume, + Pitch, + DutyCycle, + Mode, + PauseLen, + + Identifier, + Integer, + Dash, +} +impl TokenKind { + pub const fn name(self) -> &'static str { + match self { + Self::Eof => "end of file", + Self::LineSeparator => "line seperator", + + Self::MacroDefine => "%define", + Self::MacroEnd => "%end", + Self::Argument => "$", + + Self::PulseA => "pulsea", + Self::PulseB => "pulseb", + Self::Triangle => "triangle", + Self::Noise => "noise", + + Self::Volume => "volume", + Self::Pitch => "pitch", + Self::DutyCycle => "duty cycle", + Self::Mode => "mode", + Self::PauseLen => "pause len", + + Self::Identifier => "identifier", + Self::Integer => "integer", + Self::Dash => "dash", + } + } +} +impl fmt::Display for TokenKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.name()) + } +} +use TokenKind as K; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Token<'s> { + pub span: Span, + pub content: &'s str, + pub kind: TokenKind, +} + +pub struct Lexer<'s> { + src: &'s str, + chars: Peekable<Chars<'s>>, + start_pos: Pos, + pos: Pos, +} +impl<'s> Lexer<'s> { + pub fn new(src: &'s str) -> Self { + Self::new_at(src, 1) + } + + pub fn new_at(src: &'s str, line: u32) -> Self { + let pos = Pos { + line, + col: 1, + idx: 0, + }; + Self { + src, + chars: src.chars().peekable(), + start_pos: pos, + pos, + } + } + + fn invalid_char(&self, ch: char) -> ParserError { + let span = Span::new(self.pos, self.pos.advance(ch)); + let msg = match ch as u32 { + c @ 0x00..=0x7f => format!("invalid character (codepoint 0x{c:2x})"), + c => format!("invalid character (codepoint U+{c:04x})"), + }; + ParserError { + span, + msg, + file: None, + } + } + + fn filter_char(&mut self, ch: Option<char>, advance: bool) -> Result<char> { + match ch { + Some(c) if c.is_control() && !matches!(c, '\n' | '\r' | '\t') => { + Err(self.invalid_char(c)) + } + Some(c) => { + if advance { + self.pos = self.pos.advance(c); + } + Ok(c) + } + None => Ok('\0'), + } + } + + fn peek(&mut self) -> Result<char> { + let c = self.chars.peek().copied(); + self.filter_char(c, false) + } + + fn next(&mut self) -> Result<char> { + let c = self.chars.next(); + self.filter_char(c, true) + } + + fn emit(&self, kind: TokenKind) -> Result<Token<'s>> { + let span = Span::new(self.start_pos, self.pos); + Ok(Token { + span, + content: span.of(self.src), + kind, + }) + } + + fn and_emit(&mut self, kind: TokenKind) -> Result<Token<'s>> { + self.next()?; + self.emit(kind) + } + + fn unexpected(&mut self) -> Result<Token<'s>> { + let c = self.peek()?; + let span = Span::new(self.pos, self.pos.advance(c)); + let msg = match c { + '\0' => "unexpected end of file".to_owned(), + '\n' => "unexpected newline character".to_owned(), + '\t' => "unexpected tab character".to_owned(), + '\r' => "unexpected return character".to_owned(), + c => format!("unexpected character {c}"), + }; + Err(ParserError { + span, + msg, + file: None, + }) + } + + fn err<T>(&self, msg: &str) -> Result<T> { + Err(ParserError { + span: Span::new(self.start_pos, self.pos), + msg: msg.to_owned(), + file: None, + }) + } + + fn next_ident(&mut self) -> Result<Token<'s>> { + let first = self.next()?; + loop { + let c = self.peek()?; + let cond = if ('a'..='g').contains(&first) { + c.is_ascii_alphanumeric() || c == '#' + } else { + c.is_ascii_alphabetic() + }; + if !cond { + break; + } + self.next()?; + } + let kind = match Span::new(self.start_pos, self.pos).of(self.src) { + "pulsea" | "a" => K::PulseA, + "pulseb" | "b" => K::PulseB, + "triangle" | "t" => K::Triangle, + "noise" | "n" => K::Noise, + "volume" | "v" => K::Volume, + "pitch" | "p" => K::Pitch, + "dutycycle" | "dc" | "w" => K::DutyCycle, + "mode" | "m" => K::Mode, + "puselen" | "P" => K::PauseLen, + _ => K::Identifier, + }; + self.emit(kind) + } + + fn next_macro_ident(&mut self) -> Result<Token<'s>> { + self.next()?; + let ident = self.next_ident()?; + let kind = match ident.content { + "%macro" => K::MacroDefine, + "%endmacro" => K::MacroEnd, + _ => self.err("expected %macro or %endmacro")?, + }; + self.start_pos = ident.span.start; + self.emit(kind) + } + + fn next_int(&mut self) -> Result<Token<'s>> { + loop { + let c = self.peek()?; + if c.is_ascii_digit() { + self.next()?; + } else { + return self.emit(K::Integer); + } + } + } + + fn next_comment(&mut self) -> Result<Token<'s>> { + while !matches!(self.peek()?, '\0' | '\n') { + self.next()?; + } + self.next_token() + } + + pub fn next_token(&mut self) -> Result<Token<'s>> { + while matches!(self.peek()?, ' ' | '\t' | '\r') { + self.next()?; + } + self.start_pos = self.pos; + match self.peek()? { + // misc + '\0' => self.emit(K::Eof), + '\n' => self.and_emit(K::LineSeparator), + ';' => self.next_comment(), + // macros + '%' => self.next_macro_ident(), + '$' => self.and_emit(K::Argument), + // pause + '-' => self.and_emit(K::Dash), + // integer + c if c.is_ascii_digit() => self.next_int(), + // ident + c if c.is_ascii_alphabetic() => self.next_ident(), + // rest + _ => self.unexpected(), + } + } +} +impl<'s> Iterator for Lexer<'s> { + type Item = Result<Token<'s>>; + + fn next(&mut self) -> Option<Self::Item> { + Some(self.next_token()) + } +} |