use std::{fmt, iter::Peekable, str::Chars}; use super::{ ParserError, Result, pos::{Pos, Span}, }; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum TokenKind { Eof, LineSeparator, MacroDefine, MacroEnd, Argument, PulseA, PulseB, Triangle, Noise, Volume, Pitch, DutyCycle, Mode, PauseLen, Identifier, Integer, Dash, } impl TokenKind { pub const fn name(self) -> &'static str { match self { Self::Eof => "end of file", Self::LineSeparator => "line seperator", Self::MacroDefine => "%define", Self::MacroEnd => "%end", Self::Argument => "$", Self::PulseA => "pulsea", Self::PulseB => "pulseb", Self::Triangle => "triangle", Self::Noise => "noise", Self::Volume => "volume", Self::Pitch => "pitch", Self::DutyCycle => "duty cycle", Self::Mode => "mode", Self::PauseLen => "pause len", Self::Identifier => "identifier", Self::Integer => "integer", Self::Dash => "dash", } } } impl fmt::Display for TokenKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(self.name()) } } use TokenKind as K; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Token<'s> { pub span: Span, pub content: &'s str, pub kind: TokenKind, } pub struct Lexer<'s> { src: &'s str, chars: Peekable>, start_pos: Pos, pos: Pos, } impl<'s> Lexer<'s> { pub fn new(src: &'s str) -> Self { Self::new_at(src, 1) } pub fn new_at(src: &'s str, line: u32) -> Self { let pos = Pos { line, col: 1, idx: 0, }; Self { src, chars: src.chars().peekable(), start_pos: pos, pos, } } fn invalid_char(&self, ch: char) -> ParserError { let span = Span::new(self.pos, self.pos.advance(ch)); let msg = match ch as u32 { c @ 0x00..=0x7f => format!("invalid character (codepoint 0x{c:2x})"), c => format!("invalid character (codepoint U+{c:04x})"), }; ParserError { span, msg, file: None, } } fn filter_char(&mut self, ch: Option, advance: bool) -> Result { match ch { Some(c) if c.is_control() && !matches!(c, '\n' | '\r' | '\t') => { Err(self.invalid_char(c)) } Some(c) => { if advance { self.pos = self.pos.advance(c); } Ok(c) } None => Ok('\0'), } } fn peek(&mut self) -> Result { let c = self.chars.peek().copied(); self.filter_char(c, false) } fn next(&mut self) -> Result { let c = self.chars.next(); self.filter_char(c, true) } fn emit(&self, kind: TokenKind) -> Result> { let span = Span::new(self.start_pos, self.pos); Ok(Token { span, content: span.of(self.src), kind, }) } fn and_emit(&mut self, kind: TokenKind) -> Result> { self.next()?; self.emit(kind) } fn unexpected(&mut self) -> Result> { let c = self.peek()?; let span = Span::new(self.pos, self.pos.advance(c)); let msg = match c { '\0' => "unexpected end of file".to_owned(), '\n' => "unexpected newline character".to_owned(), '\t' => "unexpected tab character".to_owned(), '\r' => "unexpected return character".to_owned(), c => format!("unexpected character {c}"), }; Err(ParserError { span, msg, file: None, }) } fn err(&self, msg: &str) -> Result { Err(ParserError { span: Span::new(self.start_pos, self.pos), msg: msg.to_owned(), file: None, }) } fn next_ident(&mut self) -> Result> { let first = self.next()?; loop { let c = self.peek()?; let cond = if ('a'..='g').contains(&first) { c.is_ascii_alphanumeric() || c == '#' } else { c.is_ascii_alphabetic() }; if !cond { break; } self.next()?; } let kind = match Span::new(self.start_pos, self.pos).of(self.src) { "pulsea" | "a" => K::PulseA, "pulseb" | "b" => K::PulseB, "triangle" | "t" => K::Triangle, "noise" | "n" => K::Noise, "volume" | "v" => K::Volume, "pitch" | "p" => K::Pitch, "dutycycle" | "dc" | "w" => K::DutyCycle, "mode" | "m" => K::Mode, "puselen" | "P" => K::PauseLen, _ => K::Identifier, }; self.emit(kind) } fn next_macro_ident(&mut self) -> Result> { self.next()?; let ident = self.next_ident()?; let kind = match ident.content { "%macro" => K::MacroDefine, "%endmacro" => K::MacroEnd, _ => self.err("expected %macro or %endmacro")?, }; self.start_pos = ident.span.start; self.emit(kind) } fn next_int(&mut self) -> Result> { loop { let c = self.peek()?; if c.is_ascii_digit() { self.next()?; } else { return self.emit(K::Integer); } } } fn next_comment(&mut self) -> Result> { while !matches!(self.peek()?, '\0' | '\n') { self.next()?; } self.next_token() } pub fn next_token(&mut self) -> Result> { while matches!(self.peek()?, ' ' | '\t' | '\r') { self.next()?; } self.start_pos = self.pos; match self.peek()? { // misc '\0' => self.emit(K::Eof), '\n' => self.and_emit(K::LineSeparator), ';' => self.next_comment(), // macros '%' => self.next_macro_ident(), '$' => self.and_emit(K::Argument), // pause '-' => self.and_emit(K::Dash), // integer c if c.is_ascii_digit() => self.next_int(), // ident c if c.is_ascii_alphabetic() => self.next_ident(), // rest _ => self.unexpected(), } } } impl<'s> Iterator for Lexer<'s> { type Item = Result>; fn next(&mut self) -> Option { Some(self.next_token()) } }