summaryrefslogtreecommitdiff
path: root/masm/lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'masm/lex.c')
-rw-r--r--masm/lex.c223
1 files changed, 146 insertions, 77 deletions
diff --git a/masm/lex.c b/masm/lex.c
index a7707d6..b835a7f 100644
--- a/masm/lex.c
+++ b/masm/lex.c
@@ -2,6 +2,10 @@
#include <mlimits.h>
#include <merror.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
static struct {
int x;
@@ -46,64 +50,24 @@ static void skip_comment(struct lexer *lexer)
}
}
-/* lexes text until whitespace
- * returns error on zero length or too long */
-static int lex_ident(struct lexer *lexer, char text[MAX_LEX_LENGTH])
-{
- int len = 0;
- char *ptr = text;
- int c;
-
- while (1) {
- c = lex_peek(lexer);
- if (!(
- (c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z') ||
- (c >= '0' && c <= '9') ||
- (c == '_')
- )) {
- break;
- }
-
- // pop char out of lexer
- lex_next(lexer);
-
- if (len + 1 == MAX_LEX_LENGTH) {
- ERROR_POS(pos, "ident has max length of %d",
- MAX_LEX_LENGTH);
- return M_ERROR;
- }
-
- *ptr++ = c;
- len++;
- }
-
- if (len == 0) {
- ERROR_POS(pos, "attempted to lex empty ident %d",
- MAX_LEX_LENGTH);
- return M_ERROR;
- }
-
- *ptr = '\0';
- return M_SUCCESS;
-}
-
/* lexes a string until closing quote
* returns error if string is too long or hit newline */
-static int lex_string(struct lexer *lexer,char text[MAX_LEX_LENGTH])
+static int lex_string(struct lexer *lexer, struct string *string)
{
- int len = 0;
- char *ptr = text;
- int c;
+ char c;
+ string_init(string);
while (1) {
c = lex_next(lexer);
+
+ // stop on ending quote
if (c == '"')
break;
// strings cannot span multiple lines
if (c == '\n') {
ERROR_POS(pos, "reached newline before end of string");
+ string_free(string);
return M_ERROR;
}
@@ -129,20 +93,73 @@ static int lex_string(struct lexer *lexer,char text[MAX_LEX_LENGTH])
}
}
- if (len + 1 == MAX_LEX_LENGTH) {
- ERROR_POS(pos, "string has max length of %d",
- MAX_LEX_LENGTH);
+ // push char into string
+ if (string_push(string, c)) {
+ string_free(string);
+ return M_ERROR;
+ }
+ }
+
+ // null terminate string
+ if (string_push(string, '\0')) {
+ free(string->str);
+ return M_ERROR;
+ }
+
+ return M_SUCCESS;
+}
+
+/* lexes text until whitespace
+ * returns error on zero length or too long */
+static int lex_ident(struct lexer *lexer, struct string *string,
+ char prefix)
+{
+ char c;
+ string_init(string);
+
+ if (prefix != '\0' && string_push(string, prefix)) {
+ string_free(string);
+ return M_ERROR;
+ }
+
+ while (1) {
+ c = lex_peek(lexer);
+ if (!(
+ (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ (c == '_')
+ )) {
+ break;
+ }
+
+ // pop char out of lexer
+ lex_next(lexer);
+
+ // push char into string
+ if (string_push(string, c)) {
+ free(string->str);
return M_ERROR;
}
+ }
+
+ // empty idents are not allowed
+ if (string->len < 1) {
+ string_free(string);
+ ERROR("empty ident tokens are not allowed");
+ return M_ERROR;
+ }
- *ptr++ = c;
- len++;
+ // null terminate string
+ if (string_push(string, '\0')) {
+ string_free(string);
+ return M_ERROR;
}
- *ptr = '\0';
return M_SUCCESS;
}
+
/* lexes a integer number in base 2,8,10, or 16,
* uses base 10 by default but chan be changed by 0b, 0o, and 0x */
static int lex_number(struct lexer *lexer, int64_t *n)
@@ -221,6 +238,7 @@ int lexer_next(struct lexer *lexer, struct token *token)
again: // use label to avoid whitespace recursion
token->x = lexer->x;
token->y = lexer->y;
+ token->off = ftell(lexer->file);
pos.x = lexer->x;
pos.y = lexer->y;
token->type = TOK_EOF;
@@ -231,54 +249,80 @@ again: // use label to avoid whitespace recursion
switch (c) {
case EOF:
+
+ // return a EOF token
case '\0':
token->type = TOK_EOF;
break;
+
+ // skip the comment
+ // .. and return a NL token
case ';':
case '#':
skip_comment(lexer);
token->type = TOK_NL;
break;
+
+ // skip the whitespace and
+ // try to parse the next character
case ' ':
case '\t':
// skip white space
lex_next(lexer);
goto again;
+
+ // return a NL token
case '\n':
lex_next(lexer);
token->type = TOK_NL;
break;
+
+ // return a comma token
case ',':
lex_next(lexer);
token->type = TOK_COMMA;
break;
+
+ // return a equal token
case '=':
lex_next(lexer);
token->type = TOK_EQUAL;
break;
+
+ // return a left paren token
case '(':
lex_next(lexer);
token->type = TOK_LPAREN;
break;
+
+ // return a right paren token
case ')':
token->type = TOK_RPAREN;
lex_next(lexer);
break;
+
+ // return a register token
case '$':
token->type = TOK_REG;
lex_next(lexer);
- res = lex_ident(lexer, token->text);
+ res = lex_ident(lexer, &token->string, '\0');
break;
+
+ // return a directive token
case '.':
token->type = TOK_DIRECTIVE;
lex_next(lexer);
- res = lex_ident(lexer, token->text);
+ res = lex_ident(lexer, &token->string, '.');
break;
+
+ // return a string token
case '"':
token->type = TOK_STRING;
lex_next(lexer);
- res = lex_string(lexer, token->text);
+ res = lex_string(lexer, &token->string);
break;
+
+ // return a number token
case '-':
case '0':
case '1':
@@ -293,68 +337,78 @@ again: // use label to avoid whitespace recursion
token->type = TOK_NUMBER;
res = lex_number(lexer, &token->number);
break;
+
+ // return a ident or label token depending
+ // if it ends with a colon
default:
token->type = TOK_IDENT;
- res = lex_ident(lexer, token->text);
+ res = lex_ident(lexer, &token->string, '\0');
if (lex_peek(lexer) == ':') {
lex_next(lexer);
token->type = TOK_LABEL;
}
break;
}
+
return res;
}
int lexer_init(const char *path, struct lexer *lexer)
{
- FILE *file = fopen(path, "r");
- if (file == NULL) {
- PERROR("cannot read '%s'", path);
- return M_ERROR;
- }
- lexer->file = file;
+ /// defaults
+ lexer->file = NULL;
lexer->peek = EOF;
lexer->x = 1;
lexer->y = 1;
+
+ /// load file
+ lexer->file = fopen(path, "r");
+ if (lexer->file == NULL) {
+ PERROR("cannot read");
+ return M_ERROR;
+ }
+
return M_SUCCESS;
}
-int lexer_free(struct lexer *lexer)
+void lexer_free(struct lexer *lexer)
{
- return fclose(lexer->file);
+ if (lexer->file)
+ fclose(lexer->file);
}
char *token_str(enum token_type type)
{
switch (type) {
- case TOK_IDENT:
+ case TOK_IDENT:
return "ident";
- case TOK_REG:
+ case TOK_REG:
return "register";
- case TOK_LABEL:
+ case TOK_LABEL:
return "label";
- case TOK_STRING:
+ case TOK_STRING:
return "string";
- case TOK_COMMA:
+ case TOK_COMMA:
return "comma";
- case TOK_EQUAL:
+ case TOK_EQUAL:
return "equal";
- case TOK_LPAREN:
+ case TOK_LPAREN:
return "left parentheses";
- case TOK_RPAREN:
+ case TOK_RPAREN:
return "right parentheses";
- case TOK_NUMBER:
+ case TOK_NUMBER:
return "number";
- case TOK_EOF:
+ case TOK_EOF:
return "end of file";
- case TOK_NL:
+ case TOK_NL:
return "new line";
- case TOK_DIRECTIVE:
+ case TOK_DIRECTIVE:
return "directive";
- }
+ }
return "unknown";
}
+/* save the current state from the lexer */
void lexer_save(struct lexer *lexer, struct lexer_state *state)
{
state->x = lexer->x;
@@ -371,3 +425,18 @@ void lexer_load(struct lexer *lexer, const struct lexer_state *state)
lexer->peek = state->peek;
fseek(lexer->file, state->offset, SEEK_SET);
}
+
+void token_free(struct token *token)
+{
+ switch (token->type) {
+ case TOK_REG:
+ case TOK_IDENT:
+ case TOK_LABEL:
+ case TOK_STRING:
+ case TOK_DIRECTIVE:
+ if (token->string.str)
+ free(token->string.str);
+ break;
+ default:
+ }
+}