#include #include #include #include #include "lex.h" #include "parse.h" /// /// Token Functions /// either get a token, peek a token, /// or assert a token was returned /// /* get the next token from the lexer */ static int next_token(struct parser *parser, struct token *tok) { // return peeked first if (parser->peek.type != TOK_EOF) { if (tok != NULL) *tok = parser->peek; else token_free(&parser->peek); parser->peek.type = TOK_EOF; return M_SUCCESS; } // get next token struct token token; if (lexer_next(&parser->lexer, &token)) return M_ERROR; // return value if given pointer // else free if (tok != NULL) { *tok = token; } else { token_free(&token); } return M_SUCCESS; } /* peek the next token from the lexer */ static int peek_token(struct parser *parser, struct token *tok) { // if we dont have a saved token // get the next one if (parser->peek.type == TOK_EOF) { if (next_token(parser, &parser->peek)) return M_ERROR; } // return it if we were given // a pointer if (tok != NULL) *tok = parser->peek; return M_SUCCESS; } /* get the next token from the lexer, and assert its of type */ static int assert_token(struct parser *parser, enum token_type type, struct token *tok) { // get next token struct token token; if (next_token(parser, &token)) return M_ERROR; // assert its of type if (token.type != type) { ERROR_POS(token, "expected a token of type '%s', got '%s'", token_str(type), token_str(token.type)); token_free(&token); return M_ERROR; } // return value if given pointer // else free if (tok != NULL) { *tok = token; } else { token_free(&token); } return M_SUCCESS; } /* get the next token from the lexer, and assert its of type NL */ static int assert_eol(struct parser *parser) { struct token token; if (next_token(parser, &token)) return M_ERROR; if (token.type != TOK_NL && token.type != TOK_EOF) { ERROR_POS(token, "expected a new line or end of file, got '%s'", token_str(token.type)); return M_ERROR; } token_free(&token); return M_SUCCESS; } /* peek the next token and return SUCCESS on eol */ static int peek_eol(struct parser *parser) { struct token token; if (peek_token(parser, &token)) return M_ERROR; int res = (token.type == TOK_NL || token.type == TOK_EOF) ? M_SUCCESS : M_ERROR; return res; } /// /// PARSER FUNCTIONS /// parses each type of expression /// static int parse_directive_whb(struct parser *parser, void *data, uint32_t *res, size_t length, size_t max_size) { struct token token; int len = 0; while (1) { if (peek_eol(parser) == M_SUCCESS) break; if (assert_token(parser, TOK_NUMBER, &token)) return M_ERROR; if ((uint64_t)token.number > max_size) { ERROR_POS(token, "number cannot exceed max size of %zu", max_size); return M_ERROR; } if (len >= MAX_ARG_LENGTH) { ERROR_POS(token, "exceeded max argument length for " "directives"); return M_ERROR; } // BUG: does this only work on little endian??? memcpy((uint8_t *) data + (len++ * length), &token.number, max_size); } *res = len; return M_SUCCESS; } static int parse_immd(struct parser *parser, uint16_t *num) { struct token token; if (assert_token(parser, TOK_NUMBER, &token)) return M_ERROR; // TOK_NUMBER does not need to be freed *num = token.number; return M_SUCCESS; } static int parse_ident(struct parser *parser, struct string *ident) { struct token token; if (assert_token(parser, TOK_IDENT, &token)) return M_ERROR; string_move(ident, &token.string); return M_SUCCESS; } static int parse_string(struct parser *parser, struct string *string) { struct token token; if (assert_token(parser, TOK_STRING, &token)) return M_ERROR; string_move(string, &token.string); return M_SUCCESS; } /* parses a directive */ static int parse_directive(struct parser *parser, struct string *name, struct expr_directive *expr) { #define CHK(n) if (strcmp(name->str, #n) == 0) CHK(.align) { expr->type = EXPR_DIRECTIVE_ALIGN; return parse_immd(parser, &expr->align); } else CHK(.space) { expr->type = EXPR_DIRECTIVE_SPACE; return parse_immd(parser, &expr->space); } else CHK(.word) { expr->type = EXPR_DIRECTIVE_WORD; return parse_directive_whb(parser, expr->words, &expr->len, sizeof(uint32_t), UINT32_MAX); } else CHK(.half) { expr->type = EXPR_DIRECTIVE_HALF; return parse_directive_whb(parser, expr->halfs, &expr->len, sizeof(uint16_t), UINT16_MAX); } else CHK(.byte) { expr->type = EXPR_DIRECTIVE_BYTE; return parse_directive_whb(parser, expr->bytes, &expr->len, sizeof(uint8_t), UINT8_MAX); } else CHK(.extern) { expr->type = EXPR_DIRECTIVE_EXTERN; return parse_ident(parser, &expr->label); } else CHK(.globl) { expr->type = EXPR_DIRECTIVE_GLOBL; return parse_ident(parser, &expr->label); } else CHK(.ascii) { expr->type = EXPR_DIRECTIVE_ASCII; return parse_string(parser, &expr->string); } else CHK(.asciiz) { expr->type = EXPR_DIRECTIVE_ASCIIZ; return parse_string(parser, &expr->string); } else { expr->type = EXPR_DIRECTIVE_SECTION; string_move(&expr->section, name); return M_SUCCESS; } #undef CHK } static int parse_constant(struct parser *parser, struct string *name, struct expr_const *constant) { if (assert_token(parser, TOK_EQUAL, NULL)) return M_ERROR; struct token token; if (assert_token(parser, TOK_NUMBER, &token)) return M_ERROR; string_move(&constant->name, name); constant->num = token.number; return M_SUCCESS; } static int parse_offset(struct parser *parser, struct expr_ins_arg *arg, uint64_t immd) { // the immediate has already been parsed // now parse (REG) if (assert_token(parser, TOK_LPAREN, NULL)) return M_ERROR; struct token token; if (assert_token(parser, TOK_REG, &token)) return M_ERROR; // set values string_move(&arg->offset.reg, &token.string); arg->offset.immd = immd; if (assert_token(parser, TOK_RPAREN, NULL)) { string_free(&arg->offset.reg); return M_ERROR; } return M_SUCCESS; } static int parse_instruction_arg(struct parser *parser, struct expr_ins_arg *arg) { // allowed token matches: // // register: // REG // // label: // IDENT // // immediate: // IMMD // // offset: // (REG) // IMMD(REG) struct token token; if (peek_token(parser, &token)) return M_ERROR; // if its a left paren, were parsing // an offset if (token.type == TOK_LPAREN) { arg->type = EXPR_INS_ARG_OFFSET; return parse_offset(parser, arg, 0); } // token must now be either a number (immediate) // register, or label,,, // ... take ownership of the next token if (next_token(parser, &token)) return M_ERROR; // if its a register... return // there are no other pathways if (token.type == TOK_REG) { arg->type = EXPR_INS_ARG_REGISTER; string_move(&arg->reg, &token.string); return M_SUCCESS; } // if it is a label... return // therea are no other pathways if (token.type == TOK_IDENT) { arg->type = EXPR_INS_ARG_LABEL; string_move(&arg->label, &token.string); return M_SUCCESS; } // now it must be a number... // throw a error if its now if (token.type != TOK_NUMBER) { ERROR_POS(token, "expected number, got %s", token_str(token.type)); token_free(&token); return M_ERROR; } uint64_t immd = (uint64_t)token.number; // now if the next token is a lparen // parse offset, else return immd if (peek_token(parser, &token)) return M_ERROR; if (token.type == TOK_LPAREN) { arg->type = EXPR_INS_ARG_OFFSET; return parse_offset(parser, arg, immd); } else { arg->type = EXPR_INS_ARG_IMMEDIATE; arg->immd = immd; return M_SUCCESS; } } static int parse_instruction(struct parser *parser, struct string *name, struct expr_ins *ins) { int len = 0; if (peek_eol(parser) == M_SUCCESS) goto skip_args; while (1) { if (len >= MAX_ARG_LENGTH) { ERROR_POS(parser->lexer, "reached max argument length"); return M_ERROR; } if (parse_instruction_arg(parser, &ins->args[len++])) return M_ERROR; if (peek_eol(parser) == M_SUCCESS) break; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; } skip_args: string_move(&ins->name, name); ins->args_len = len; return M_SUCCESS; } /* gets the next value from the parser */ int parser_next(struct parser *parser, struct expr *expr) { // the next token being looked at struct token token = { .type = TOK_NL }; // the result to return int res = M_SUCCESS; // skip all new lines while (1) { if (next_token(parser, &token)) return M_ERROR; if (token.type != TOK_NL) break; token_free(&token); } expr->line_no = parser->lexer.y; expr->byte_start = token.off; expr->byte_end = token.off; // if EOF, return M_EOF if (token.type == TOK_EOF) return M_EOF; // when a ident ends with a colon // parse a lebel else if (token.type == TOK_LABEL) { expr->type = EXPR_LABEL; // label now owns string string_move(&expr->label, &token.string); } // when a ident starts with a dot // parse a directive else if (token.type == TOK_DIRECTIVE) { expr->type = EXPR_DIRECTIVE; res = parse_directive(parser, &token.string, &expr->directive); } // peek the next token: // 1. = means parse constant // 2. else parse instruction else { if (token.type != TOK_IDENT) { ERROR_POS(token, "expected ident, got %s", token_str(token.type)); token_free(&token); return M_ERROR; } struct token peek; if (peek_token(parser, &peek)) { token_free(&token); return M_ERROR; } if (peek.type == TOK_EQUAL) { expr->type = EXPR_CONSTANT; res = parse_constant(parser, &token.string, &expr->constant); } else { expr->type = EXPR_INS; res = parse_instruction(parser, &token.string, &expr->instruction); } } // update byte end for expr expr->byte_end = ftell(parser->lexer.file); // free tokens token_free(&token); // everything must end in a new line if (res == M_SUCCESS && assert_eol(parser)) return M_ERROR; return res; } int parser_init(const char *file, struct parser *parser) { parser->peek.type = TOK_EOF; if (lexer_init(file, &parser->lexer)) return M_ERROR; return M_SUCCESS; } void parser_free(struct parser *parser) { token_free(&parser->peek); lexer_free(&parser->lexer); } static inline void expr_directive_free(struct expr_directive *dir) { switch (dir->type) { case EXPR_DIRECTIVE_SECTION: string_free(&dir->section); break; case EXPR_DIRECTIVE_EXTERN: case EXPR_DIRECTIVE_GLOBL: string_free(&dir->label); break; case EXPR_DIRECTIVE_ASCII: case EXPR_DIRECTIVE_ASCIIZ: string_free(&dir->string); break; default: } } static inline void expr_ins_arg_free(struct expr_ins_arg *arg) { switch (arg->type) { case EXPR_INS_ARG_REGISTER: string_free(&arg->reg); break; case EXPR_INS_ARG_IMMEDIATE: break; case EXPR_INS_ARG_LABEL: string_free(&arg->label); break; case EXPR_INS_ARG_OFFSET: string_free(&arg->offset.reg); break; } } void expr_free(struct expr *expr) { switch (expr->type) { case EXPR_DIRECTIVE: expr_directive_free(&expr->directive); break; case EXPR_CONSTANT: string_free(&expr->constant.name); break; case EXPR_INS: string_free(&expr->instruction.name); for (uint32_t i = 0; i < expr->instruction.args_len; i++) expr_ins_arg_free(&expr->instruction.args[i]); break; case EXPR_LABEL: string_free(&expr->label); break; } }