summaryrefslogtreecommitdiff
path: root/audio/src/parse/lexer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'audio/src/parse/lexer.rs')
-rw-r--r--audio/src/parse/lexer.rs264
1 files changed, 264 insertions, 0 deletions
diff --git a/audio/src/parse/lexer.rs b/audio/src/parse/lexer.rs
new file mode 100644
index 0000000..59bd264
--- /dev/null
+++ b/audio/src/parse/lexer.rs
@@ -0,0 +1,264 @@
+use std::{fmt, iter::Peekable, str::Chars};
+
+use super::{
+ ParserError, Result,
+ pos::{Pos, Span},
+};
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum TokenKind {
+ Eof,
+ LineSeparator,
+
+ MacroDefine,
+ MacroEnd,
+ Argument,
+
+ PulseA,
+ PulseB,
+ Triangle,
+ Noise,
+
+ Volume,
+ Pitch,
+ DutyCycle,
+ Mode,
+ PauseLen,
+
+ Identifier,
+ Integer,
+ Dash,
+}
+impl TokenKind {
+ pub const fn name(self) -> &'static str {
+ match self {
+ Self::Eof => "end of file",
+ Self::LineSeparator => "line seperator",
+
+ Self::MacroDefine => "%define",
+ Self::MacroEnd => "%end",
+ Self::Argument => "$",
+
+ Self::PulseA => "pulsea",
+ Self::PulseB => "pulseb",
+ Self::Triangle => "triangle",
+ Self::Noise => "noise",
+
+ Self::Volume => "volume",
+ Self::Pitch => "pitch",
+ Self::DutyCycle => "duty cycle",
+ Self::Mode => "mode",
+ Self::PauseLen => "pause len",
+
+ Self::Identifier => "identifier",
+ Self::Integer => "integer",
+ Self::Dash => "dash",
+ }
+ }
+}
+impl fmt::Display for TokenKind {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.write_str(self.name())
+ }
+}
+use TokenKind as K;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct Token<'s> {
+ pub span: Span,
+ pub content: &'s str,
+ pub kind: TokenKind,
+}
+
+pub struct Lexer<'s> {
+ src: &'s str,
+ chars: Peekable<Chars<'s>>,
+ start_pos: Pos,
+ pos: Pos,
+}
+impl<'s> Lexer<'s> {
+ pub fn new(src: &'s str) -> Self {
+ Self::new_at(src, 1)
+ }
+
+ pub fn new_at(src: &'s str, line: u32) -> Self {
+ let pos = Pos {
+ line,
+ col: 1,
+ idx: 0,
+ };
+ Self {
+ src,
+ chars: src.chars().peekable(),
+ start_pos: pos,
+ pos,
+ }
+ }
+
+ fn invalid_char(&self, ch: char) -> ParserError {
+ let span = Span::new(self.pos, self.pos.advance(ch));
+ let msg = match ch as u32 {
+ c @ 0x00..=0x7f => format!("invalid character (codepoint 0x{c:2x})"),
+ c => format!("invalid character (codepoint U+{c:04x})"),
+ };
+ ParserError {
+ span,
+ msg,
+ file: None,
+ }
+ }
+
+ fn filter_char(&mut self, ch: Option<char>, advance: bool) -> Result<char> {
+ match ch {
+ Some(c) if c.is_control() && !matches!(c, '\n' | '\r' | '\t') => {
+ Err(self.invalid_char(c))
+ }
+ Some(c) => {
+ if advance {
+ self.pos = self.pos.advance(c);
+ }
+ Ok(c)
+ }
+ None => Ok('\0'),
+ }
+ }
+
+ fn peek(&mut self) -> Result<char> {
+ let c = self.chars.peek().copied();
+ self.filter_char(c, false)
+ }
+
+ fn next(&mut self) -> Result<char> {
+ let c = self.chars.next();
+ self.filter_char(c, true)
+ }
+
+ fn emit(&self, kind: TokenKind) -> Result<Token<'s>> {
+ let span = Span::new(self.start_pos, self.pos);
+ Ok(Token {
+ span,
+ content: span.of(self.src),
+ kind,
+ })
+ }
+
+ fn and_emit(&mut self, kind: TokenKind) -> Result<Token<'s>> {
+ self.next()?;
+ self.emit(kind)
+ }
+
+ fn unexpected(&mut self) -> Result<Token<'s>> {
+ let c = self.peek()?;
+ let span = Span::new(self.pos, self.pos.advance(c));
+ let msg = match c {
+ '\0' => "unexpected end of file".to_owned(),
+ '\n' => "unexpected newline character".to_owned(),
+ '\t' => "unexpected tab character".to_owned(),
+ '\r' => "unexpected return character".to_owned(),
+ c => format!("unexpected character {c}"),
+ };
+ Err(ParserError {
+ span,
+ msg,
+ file: None,
+ })
+ }
+
+ fn err<T>(&self, msg: &str) -> Result<T> {
+ Err(ParserError {
+ span: Span::new(self.start_pos, self.pos),
+ msg: msg.to_owned(),
+ file: None,
+ })
+ }
+
+ fn next_ident(&mut self) -> Result<Token<'s>> {
+ let first = self.next()?;
+ loop {
+ let c = self.peek()?;
+ let cond = if ('a'..='g').contains(&first) {
+ c.is_ascii_alphanumeric() || c == '#'
+ } else {
+ c.is_ascii_alphabetic()
+ };
+ if !cond {
+ break;
+ }
+ self.next()?;
+ }
+ let kind = match Span::new(self.start_pos, self.pos).of(self.src) {
+ "pulsea" | "a" => K::PulseA,
+ "pulseb" | "b" => K::PulseB,
+ "triangle" | "t" => K::Triangle,
+ "noise" | "n" => K::Noise,
+ "volume" | "v" => K::Volume,
+ "pitch" | "p" => K::Pitch,
+ "dutycycle" | "dc" | "w" => K::DutyCycle,
+ "mode" | "m" => K::Mode,
+ "puselen" | "P" => K::PauseLen,
+ _ => K::Identifier,
+ };
+ self.emit(kind)
+ }
+
+ fn next_macro_ident(&mut self) -> Result<Token<'s>> {
+ self.next()?;
+ let ident = self.next_ident()?;
+ let kind = match ident.content {
+ "%macro" => K::MacroDefine,
+ "%endmacro" => K::MacroEnd,
+ _ => self.err("expected %macro or %endmacro")?,
+ };
+ self.start_pos = ident.span.start;
+ self.emit(kind)
+ }
+
+ fn next_int(&mut self) -> Result<Token<'s>> {
+ loop {
+ let c = self.peek()?;
+ if c.is_ascii_digit() {
+ self.next()?;
+ } else {
+ return self.emit(K::Integer);
+ }
+ }
+ }
+
+ fn next_comment(&mut self) -> Result<Token<'s>> {
+ while !matches!(self.peek()?, '\0' | '\n') {
+ self.next()?;
+ }
+ self.next_token()
+ }
+
+ pub fn next_token(&mut self) -> Result<Token<'s>> {
+ while matches!(self.peek()?, ' ' | '\t' | '\r') {
+ self.next()?;
+ }
+ self.start_pos = self.pos;
+ match self.peek()? {
+ // misc
+ '\0' => self.emit(K::Eof),
+ '\n' => self.and_emit(K::LineSeparator),
+ ';' => self.next_comment(),
+ // macros
+ '%' => self.next_macro_ident(),
+ '$' => self.and_emit(K::Argument),
+ // pause
+ '-' => self.and_emit(K::Dash),
+ // integer
+ c if c.is_ascii_digit() => self.next_int(),
+ // ident
+ c if c.is_ascii_alphabetic() => self.next_ident(),
+ // rest
+ _ => self.unexpected(),
+ }
+ }
+}
+impl<'s> Iterator for Lexer<'s> {
+ type Item = Result<Token<'s>>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ Some(self.next_token())
+ }
+}