diff options
Diffstat (limited to '')
-rw-r--r-- | masm/lex.c | 223 |
1 files changed, 146 insertions, 77 deletions
@@ -2,6 +2,10 @@ #include <mlimits.h> #include <merror.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/mman.h> +#include <sys/stat.h> static struct { int x; @@ -46,64 +50,24 @@ static void skip_comment(struct lexer *lexer) } } -/* lexes text until whitespace - * returns error on zero length or too long */ -static int lex_ident(struct lexer *lexer, char text[MAX_LEX_LENGTH]) -{ - int len = 0; - char *ptr = text; - int c; - - while (1) { - c = lex_peek(lexer); - if (!( - (c >= 'a' && c <= 'z') || - (c >= 'A' && c <= 'Z') || - (c >= '0' && c <= '9') || - (c == '_') - )) { - break; - } - - // pop char out of lexer - lex_next(lexer); - - if (len + 1 == MAX_LEX_LENGTH) { - ERROR_POS(pos, "ident has max length of %d", - MAX_LEX_LENGTH); - return M_ERROR; - } - - *ptr++ = c; - len++; - } - - if (len == 0) { - ERROR_POS(pos, "attempted to lex empty ident %d", - MAX_LEX_LENGTH); - return M_ERROR; - } - - *ptr = '\0'; - return M_SUCCESS; -} - /* lexes a string until closing quote * returns error if string is too long or hit newline */ -static int lex_string(struct lexer *lexer,char text[MAX_LEX_LENGTH]) +static int lex_string(struct lexer *lexer, struct string *string) { - int len = 0; - char *ptr = text; - int c; + char c; + string_init(string); while (1) { c = lex_next(lexer); + + // stop on ending quote if (c == '"') break; // strings cannot span multiple lines if (c == '\n') { ERROR_POS(pos, "reached newline before end of string"); + string_free(string); return M_ERROR; } @@ -129,20 +93,73 @@ static int lex_string(struct lexer *lexer,char text[MAX_LEX_LENGTH]) } } - if (len + 1 == MAX_LEX_LENGTH) { - ERROR_POS(pos, "string has max length of %d", - MAX_LEX_LENGTH); + // push char into string + if (string_push(string, c)) { + string_free(string); + return M_ERROR; + } + } + + // null terminate string + if (string_push(string, '\0')) { + free(string->str); + return M_ERROR; + } + + return M_SUCCESS; +} + +/* lexes text until whitespace + * returns error on zero length or too long */ +static int lex_ident(struct lexer *lexer, struct string *string, + char prefix) +{ + char c; + string_init(string); + + if (prefix != '\0' && string_push(string, prefix)) { + string_free(string); + return M_ERROR; + } + + while (1) { + c = lex_peek(lexer); + if (!( + (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + (c == '_') + )) { + break; + } + + // pop char out of lexer + lex_next(lexer); + + // push char into string + if (string_push(string, c)) { + free(string->str); return M_ERROR; } + } + + // empty idents are not allowed + if (string->len < 1) { + string_free(string); + ERROR("empty ident tokens are not allowed"); + return M_ERROR; + } - *ptr++ = c; - len++; + // null terminate string + if (string_push(string, '\0')) { + string_free(string); + return M_ERROR; } - *ptr = '\0'; return M_SUCCESS; } + /* lexes a integer number in base 2,8,10, or 16, * uses base 10 by default but chan be changed by 0b, 0o, and 0x */ static int lex_number(struct lexer *lexer, int64_t *n) @@ -221,6 +238,7 @@ int lexer_next(struct lexer *lexer, struct token *token) again: // use label to avoid whitespace recursion token->x = lexer->x; token->y = lexer->y; + token->off = ftell(lexer->file); pos.x = lexer->x; pos.y = lexer->y; token->type = TOK_EOF; @@ -231,54 +249,80 @@ again: // use label to avoid whitespace recursion switch (c) { case EOF: + + // return a EOF token case '\0': token->type = TOK_EOF; break; + + // skip the comment + // .. and return a NL token case ';': case '#': skip_comment(lexer); token->type = TOK_NL; break; + + // skip the whitespace and + // try to parse the next character case ' ': case '\t': // skip white space lex_next(lexer); goto again; + + // return a NL token case '\n': lex_next(lexer); token->type = TOK_NL; break; + + // return a comma token case ',': lex_next(lexer); token->type = TOK_COMMA; break; + + // return a equal token case '=': lex_next(lexer); token->type = TOK_EQUAL; break; + + // return a left paren token case '(': lex_next(lexer); token->type = TOK_LPAREN; break; + + // return a right paren token case ')': token->type = TOK_RPAREN; lex_next(lexer); break; + + // return a register token case '$': token->type = TOK_REG; lex_next(lexer); - res = lex_ident(lexer, token->text); + res = lex_ident(lexer, &token->string, '\0'); break; + + // return a directive token case '.': token->type = TOK_DIRECTIVE; lex_next(lexer); - res = lex_ident(lexer, token->text); + res = lex_ident(lexer, &token->string, '.'); break; + + // return a string token case '"': token->type = TOK_STRING; lex_next(lexer); - res = lex_string(lexer, token->text); + res = lex_string(lexer, &token->string); break; + + // return a number token case '-': case '0': case '1': @@ -293,68 +337,78 @@ again: // use label to avoid whitespace recursion token->type = TOK_NUMBER; res = lex_number(lexer, &token->number); break; + + // return a ident or label token depending + // if it ends with a colon default: token->type = TOK_IDENT; - res = lex_ident(lexer, token->text); + res = lex_ident(lexer, &token->string, '\0'); if (lex_peek(lexer) == ':') { lex_next(lexer); token->type = TOK_LABEL; } break; } + return res; } int lexer_init(const char *path, struct lexer *lexer) { - FILE *file = fopen(path, "r"); - if (file == NULL) { - PERROR("cannot read '%s'", path); - return M_ERROR; - } - lexer->file = file; + /// defaults + lexer->file = NULL; lexer->peek = EOF; lexer->x = 1; lexer->y = 1; + + /// load file + lexer->file = fopen(path, "r"); + if (lexer->file == NULL) { + PERROR("cannot read"); + return M_ERROR; + } + return M_SUCCESS; } -int lexer_free(struct lexer *lexer) +void lexer_free(struct lexer *lexer) { - return fclose(lexer->file); + if (lexer->file) + fclose(lexer->file); } char *token_str(enum token_type type) { switch (type) { - case TOK_IDENT: + case TOK_IDENT: return "ident"; - case TOK_REG: + case TOK_REG: return "register"; - case TOK_LABEL: + case TOK_LABEL: return "label"; - case TOK_STRING: + case TOK_STRING: return "string"; - case TOK_COMMA: + case TOK_COMMA: return "comma"; - case TOK_EQUAL: + case TOK_EQUAL: return "equal"; - case TOK_LPAREN: + case TOK_LPAREN: return "left parentheses"; - case TOK_RPAREN: + case TOK_RPAREN: return "right parentheses"; - case TOK_NUMBER: + case TOK_NUMBER: return "number"; - case TOK_EOF: + case TOK_EOF: return "end of file"; - case TOK_NL: + case TOK_NL: return "new line"; - case TOK_DIRECTIVE: + case TOK_DIRECTIVE: return "directive"; - } + } return "unknown"; } +/* save the current state from the lexer */ void lexer_save(struct lexer *lexer, struct lexer_state *state) { state->x = lexer->x; @@ -371,3 +425,18 @@ void lexer_load(struct lexer *lexer, const struct lexer_state *state) lexer->peek = state->peek; fseek(lexer->file, state->offset, SEEK_SET); } + +void token_free(struct token *token) +{ + switch (token->type) { + case TOK_REG: + case TOK_IDENT: + case TOK_LABEL: + case TOK_STRING: + case TOK_DIRECTIVE: + if (token->string.str) + free(token->string.str); + break; + default: + } +} |