diff options
Diffstat (limited to '')
-rw-r--r-- | masm/parse.c | 1459 |
1 files changed, 333 insertions, 1126 deletions
diff --git a/masm/parse.c b/masm/parse.c index dbe6ade..b36aa1e 100644 --- a/masm/parse.c +++ b/masm/parse.c @@ -1,1326 +1,533 @@ #include <mlimits.h> -#include <merror.h> #include <stdint.h> -#include <stdio.h> -#include <string.h> -#include <elf.h> +#include <merror.h> +#include <stddef.h> -#include "parse.h" #include "lex.h" -#include "mips.h" +#include "parse.h" -#define B16(x) (x) -#define B32(x) (x) +/// +/// Token Functions +/// either get a token, peek a token, +/// or assert a token was returned +/// +/* get the next token from the lexer */ static int next_token(struct parser *parser, struct token *tok) { + // return peeked first if (parser->peek.type != TOK_EOF) { if (tok != NULL) *tok = parser->peek; + else + token_free(&parser->peek); + parser->peek.type = TOK_EOF; return M_SUCCESS; } + + // get next token struct token token; - if (lexer_next(parser->lexer, &token)) + if (lexer_next(&parser->lexer, &token)) return M_ERROR; - if (tok != NULL) + + // return value if given pointer + // else free + if (tok != NULL) { *tok = token; + } else { + token_free(&token); + } + return M_SUCCESS; } - +/* peek the next token from the lexer */ static int peek_token(struct parser *parser, struct token *tok) { + // if we dont have a saved token + // get the next one if (parser->peek.type == TOK_EOF) { if (next_token(parser, &parser->peek)) return M_ERROR; } + + // return it if we were given + // a pointer if (tok != NULL) *tok = parser->peek; + return M_SUCCESS; } - +/* get the next token from the lexer, and assert its of type <type> */ static int assert_token(struct parser *parser, enum token_type type, struct token *tok) { + // get next token struct token token; if (next_token(parser, &token)) return M_ERROR; + + // assert its of type <type> if (token.type != type) { ERROR_POS(token, "expected a token of type '%s', got '%s'", token_str(type), token_str(token.type)); + token_free(&token); return M_ERROR; } - if (tok != NULL) + + // return value if given pointer + // else free + if (tok != NULL) { *tok = token; + } else { + token_free(&token); + } + return M_SUCCESS; } +/* get the next token from the lexer, and assert its of type NL */ static int assert_eol(struct parser *parser) { struct token token; if (next_token(parser, &token)) return M_ERROR; if (token.type != TOK_NL && token.type != TOK_EOF) { - ERROR_POS(token, "expected a new line or end of file"); + ERROR_POS(token, "expected a new line or end of file, got '%s'", + token_str(token.type)); return M_ERROR; } + token_free(&token); return M_SUCCESS; } -/* each instruction has a given parse format - * internal to the parser */ -enum mips_parse_format { - // register type: rs, rt, td - MIPS_PARSE_R, - // register type: rs, rt - MIPS_PARSE_R2, - // register type: rd - MIPS_PARSE_RD, - // register type: rs - MIPS_PARSE_RS, - // imeediate type: rs, rt, immd - MIPS_PARSE_I, - // jump type: offset - MIPS_PARSE_J, - // offset 16b type: offset - MIPS_PARSE_O16, - // offset 26b type: offset - MIPS_PARSE_O26, - // breanch equal type: rs, rt, offset - MIPS_PARSE_BE, - // branch zero type: rs, offset - MIPS_PARSE_BZ, - // store and load: rt, offset(base) - MIPS_PARSE_SL, - // store and load immediate: rt, immediate - MIPS_PARSE_SLI, - // shift: rd, rt, sa - MIPS_PARSE_S, - // shift variable: rd, rt, rs - MIPS_PARSE_SV, - // none: - MIPS_PARSE_NONE, -}; - -#define FORMAT(ins, format) \ - [MIPS_INS_##ins] = MIPS_PARSE_##format, \ - -const enum mips_parse_format mips_parse_formats[] = { - FORMAT(ADD, R) - FORMAT(ADDI, I) - FORMAT(ADDIU, I) - FORMAT(ADDU, R) - FORMAT(AND, R) - FORMAT(ANDI, I) - FORMAT(BAL, O16) - FORMAT(BALC, O26) - FORMAT(BC, O26) - FORMAT(BEQ, BE) - FORMAT(BEQL, BE) - FORMAT(BGEZ, BZ) - FORMAT(BGEZAL, BZ) - FORMAT(BGEZALL, BZ) - FORMAT(BGEZL, BZ) - FORMAT(BGTZ, BZ) - FORMAT(BGTZL, BZ) - FORMAT(BLEZ, BZ) - FORMAT(BLEZL, BZ) - FORMAT(BLTZ, BZ) - FORMAT(BLTZAL, BZ) - FORMAT(BLTZALL, BZ) - FORMAT(BLTZL, BZ) - FORMAT(BNE, BE) - FORMAT(BNEL, BE) - FORMAT(DIV, R) - FORMAT(MOD, R) - FORMAT(DIVU, R) - FORMAT(MODU, R) - FORMAT(J, J) - FORMAT(JAL, J) - FORMAT(JALR, RS) // TODO: handle rd - FORMAT(JALX, J) - FORMAT(JR, RS) - FORMAT(LB, SL) - FORMAT(LBU, SL) - FORMAT(LH, SL) - FORMAT(LHU, SL) - FORMAT(LUI, SLI) - FORMAT(LW, SL) - FORMAT(MFHI, RD) - FORMAT(MFLO, RD) - FORMAT(MTHI, RS) - FORMAT(MTLO, RS) - FORMAT(MUL, R) - FORMAT(MUH, R) - FORMAT(MULU, R) - FORMAT(MUHU, R) - FORMAT(SB, SL) - FORMAT(SH, SL) - FORMAT(SW, SL) - FORMAT(SLL, S) - FORMAT(SLLV, SV) - FORMAT(SLT, R) - FORMAT(SLTI, I) - FORMAT(SLTIU, I) - FORMAT(SLTU, R) - FORMAT(SRA, S) - FORMAT(SRAV, SV) - FORMAT(SRL, S) - FORMAT(SRLV, SV) - FORMAT(SYSCALL, NONE) - FORMAT(OR, R) - FORMAT(ORI, I) - FORMAT(NOR, R) - FORMAT(SUB, R) - FORMAT(SUBU, R) - FORMAT(XOR, R) - FORMAT(XORI, I) -}; - -#undef FORMAT - -#define MAX5 (1 << 5) -#define MAX16 (1 << 16) -#define MAX26 (1 << 25) -#define MAX32 (1 << 31) - -static int get_reference(struct parser *parser, uint64_t *offset, - struct reference *ref, unsigned char type) +/* peek the next token and return SUCCESS on eol */ +static int peek_eol(struct parser *parser) { struct token token; - - if (next_token(parser, &token)) - return M_ERROR; - - if (token.type == TOK_NUMBER) { - - *offset = token.number; - return M_SUCCESS; - } - - if (token.type != TOK_IDENT) { - ERROR_POS(token, "unexpected token of type '%s'", - token_str(token.type)); - return M_ERROR; - } - - strcpy(ref->name, token.text); - ref->type = type; - ref->addend = 0; - - // return zero for now - *offset = 0; - return M_SUCCESS; -} - -static int get_offset(struct parser *parser, int32_t *offset, - struct reference *ref) -{ - uint64_t off; - if (get_reference(parser, &off, ref, R_MIPS_PC16)) - return M_ERROR; - - if (off % 4) { - ERROR_POS((*parser->lexer), "cannot use offset of '%ld', must " - "be divisble by four", off); - return M_ERROR; - } - - if (off > MAX16) { - ERROR("offset '%d' cannot be larger than 16 bits", off); - return M_ERROR; - } - - *offset = off; - return M_SUCCESS; -} - -static int get_offset_26(struct parser *parser, int32_t *offset, - struct reference *ref) -{ - uint64_t off; - if (get_reference(parser, &off, ref, R_MIPS_PC26_S2)) - return M_ERROR; - - if (off % 4) { - ERROR_POS((*parser->lexer), "cannot use offset of '%ld', must " - "be divisble by four", off); - return M_ERROR; - } - - if (off > MAX26) { - ERROR("offset '%d' cannot be larger than 26 bits", off); - return M_ERROR; - } - - *offset = off; - return M_SUCCESS; -} - -static int get_target(struct parser *parser, uint32_t *offset, - struct reference *ref) -{ - uint64_t off; - if (get_reference(parser, &off, ref, R_MIPS_26)) - return M_ERROR; - - if (off > MAX26) { - ERROR("target '%d' cannot be larger than 26 bits", off); + if (peek_token(parser, &token)) return M_ERROR; - } - - *offset = off; - return M_SUCCESS; + int res = (token.type == TOK_NL || token.type == TOK_EOF) ? + M_SUCCESS : M_ERROR; + return res; } -static int get_instruction(const char *ident, struct mips_instruction *res) -{ - for (int i = 0; i < __MIPS_INS_LEN; i++) { - struct mips_instruction ins = - mips_instructions[i]; - if (strcasecmp(ident, ins.name) == 0) { - if (res != NULL) - *res = ins; - return M_SUCCESS; - } - } - return M_ERROR; -} +/// +/// PARSER FUNCTIONS +/// parses each type of expression +/// -static int parse_register(struct parser *parser, enum mips_register *reg) +static int parse_directive_whb(struct parser *parser, void *data, uint32_t *res, + size_t length, size_t max_size) { struct token token; - if (assert_token(parser, TOK_REG, &token)) - return M_ERROR; - - int len = strlen(token.text); - int c0 = len > 0 ? token.text[0] : '\0', - c1 = len > 1 ? token.text[1] : '\0', - c2 = len > 2 ? token.text[2] : '\0', - c3 = len > 3 ? token.text[3] : '\0'; - - // $zero - if (c0 == 'z') { - if (c1 == 'e' && c2 == 'r' && c3 == 'o') { - *reg = MIPS_REG_ZERO; - return M_SUCCESS; - } - } - - // $a0-a3 $at - else if (c0 == 'a') { - if (c1 == 't') { - *reg = MIPS_REG_AT; - return M_SUCCESS; - } - if (c1 >= '0' && c1 <= '3') { - *reg = MIPS_REG_A0; - *reg += c1 - '0'; - return M_SUCCESS; - } - } - - // $v0-v1 - else if (c0 == 'v') { - if (c1 >= '0' && c1 <= '1') { - *reg = MIPS_REG_V0; - *reg += c1 - '0'; - return M_SUCCESS; - } - } - - // $t0-t9 - else if (c0 == 't') { - if (c1 >= '0' && c1 <= '7') { - *reg = MIPS_REG_T0; - *reg += c1 - '0'; - return M_SUCCESS; - } - // reg T8-T9 are not in order with T0-T7 - if (c1 >= '8' && c1 <= '9') { - *reg = MIPS_REG_T8; - *reg += c1 - '8'; - return M_SUCCESS; - } - } - - // $s0-s7 $sp - else if (c0 == 's') { - if (c1 >= '0' && c1 <= '7') { - *reg = MIPS_REG_S0; - *reg += c1 - '0'; - return M_SUCCESS; - } - if (c1 == 'p') { - *reg = MIPS_REG_SP; - return M_SUCCESS; - } - } + int len = 0; - // $k0-k1 - else if (c0 == 'k') { - if (c1 >= '0' && c1 <= '1') { - *reg = MIPS_REG_K0; - *reg += c1 - '0'; - return M_SUCCESS; - } - } + while (1) { + if (peek_eol(parser) == M_SUCCESS) + break; - // $gp - else if (c0 == 'g') { - if (c1 == 'p') { - *reg = MIPS_REG_GP; - return M_SUCCESS; - } - } + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; - // $fp - else if (c0 == 'f') { - if (c1 == 'p') { - *reg = MIPS_REG_FP; - return M_SUCCESS; + if ((uint64_t)token.number > max_size) { + ERROR_POS(token, "number cannot exceed max size of %zu", + max_size); + return M_ERROR; } - } - // $rp - else if (c0 == 'r') { - if (c1 == 'a') { - *reg = MIPS_REG_RA; - return M_SUCCESS; + if (len >= MAX_ARG_LENGTH) { + ERROR_POS(token, "exceeded max argument length for " + "directives"); + return M_ERROR; } - } - // $0-31 (non aliased register names) - else if (c0 >= '0' && c0 <= '9') { - int i = c0 - '0'; - if (c1 >= '0' && c1 <= '9') { - i *= 10; - i += c1 - '0'; - } - if (i <= 31) { - *reg = i; - return M_SUCCESS; - } + // BUG: does this only work on little endian??? + memcpy((uint8_t *) data + (len++ * length), &token.number, + max_size); } - ERROR_POS(token, "unknown register $%s", token.text); - return M_ERROR; -} - -static int get_reg_offset(struct parser *parser, - struct ins_expr *expr) -{ - struct token token; - enum mips_register reg; - - struct mips_instruction *fi = &expr->ins[0]; - struct mips_instruction *si = &expr->ins[1]; // possibly pseudo - struct reference *fr = &expr->ref[0]; - struct reference *sr = &expr->ref[1]; - - expr->ins_len = 1; - fr->type = R_MIPS_NONE; - -// ============================================= - - // defaults - fi->data.rs = MIPS_REG_ZERO; - fi->data.immd = 0; - - if (peek_token(parser, &token)) - return M_ERROR; - - if (token.type == TOK_IDENT) - goto label; - else if (token.type == TOK_LPAREN) - goto reg; - else - goto off; - -// ============================================= - -label: - - next_token(parser, &token); - - expr->ins_len = 2; - - // move over first instruction to add in a LUI - *si = *fi; - si->data.rs = MIPS_REG_AT; - si->data.offset = 0; - - // update LUI - *fi = mips_instructions[MIPS_INS_LUI]; - fi->data.rt = MIPS_REG_AT; - fi->data.immd = 0; - - // update references - strcpy(fr->name, token.text); - fr->type = R_MIPS_HI16; - fr->addend = 0; - strcpy(sr->name, token.text); - sr->type = R_MIPS_LO16; - sr->addend = 0; - - goto end; - -// ============================================= - -off: - - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - fi->data.immd = B16(token.number); - - if (peek_token(parser, &token)) - return M_ERROR; - - if (token.type == TOK_LPAREN) - goto reg; - else - goto end; - -// ============================================= - -reg: - if (assert_token(parser, TOK_LPAREN, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - fi->data.rs = reg; - - if (assert_token(parser, TOK_RPAREN, NULL)) - return M_ERROR; - -// ============================================= -end: - if (peek_token(parser, &token)) - return M_ERROR; - + *res = len; return M_SUCCESS; } -static int parse_number(struct parser *parser, uint32_t *n, uint32_t max) +static int parse_immd(struct parser *parser, uint16_t *num) { struct token token; if (assert_token(parser, TOK_NUMBER, &token)) return M_ERROR; - if (max && token.number > max) { - ERROR_POS(token, "number cannot be larger than '%d'", max); - return M_ERROR; - } - *n = token.number; - return M_SUCCESS; -} - -static int parse_instruction_r(struct parser *parser, - struct mips_instruction *ins) -{ - // format: rs, rt, rd - enum mips_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rd = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - return M_SUCCESS; -} - -static int parse_instruction_r2(struct parser *parser, - struct mips_instruction *ins) -{ - // format: rs, rt - enum mips_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - return M_SUCCESS; -} - -static int parse_instruction_rs(struct parser *parser, - struct mips_instruction *ins) -{ - // format: rs - enum mips_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rs = reg; + // TOK_NUMBER does not need to be freed + *num = token.number; return M_SUCCESS; } -static int parse_instruction_rd(struct parser *parser, - struct mips_instruction *ins) +static int parse_ident(struct parser *parser, struct string *ident) { - // format: rd - enum mips_register reg; - - if (parse_register(parser, ®)) + struct token token; + if (assert_token(parser, TOK_IDENT, &token)) return M_ERROR; - ins->data.rd = reg; - + string_move(ident, &token.string); return M_SUCCESS; } -static int parse_instruction_i(struct parser *parser, - struct mips_instruction *ins) +static int parse_string(struct parser *parser, struct string *string) { - // format: rs, rt, immd - enum mips_register reg; struct token token; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - - if (token.number >= MAX16) + if (assert_token(parser, TOK_STRING, &token)) return M_ERROR; - ins->data.immd = B16(token.number); - + string_move(string, &token.string); return M_SUCCESS; } -static int parse_instruction_offset(struct parser *parser, - uint32_t max, - struct mips_instruction *ins, - struct reference *ref) +/* parses a directive */ +static int parse_directive(struct parser *parser, struct string *name, + struct expr_directive *expr) { - int32_t n; + #define CHK(n) if (strcmp(name->str, #n) == 0) - switch (max) { - case MAX26: - if (get_offset_26(parser, &n, ref)) - return M_ERROR; - ins->data.offs26 = B32(n); - break; - case MAX16: - if (get_offset(parser, &n, ref)) - return M_ERROR; - ins->data.offset = B16(n); - break; - default: - return M_ERROR; + CHK(.align) { + expr->type = EXPR_DIRECTIVE_ALIGN; + return parse_immd(parser, &expr->align); + } else CHK(.space) { + expr->type = EXPR_DIRECTIVE_SPACE; + return parse_immd(parser, &expr->space); + } else CHK(.word) { + expr->type = EXPR_DIRECTIVE_WORD; + return parse_directive_whb(parser, expr->words, &expr->len, + sizeof(uint32_t), UINT32_MAX); + } else CHK(.half) { + expr->type = EXPR_DIRECTIVE_HALF; + return parse_directive_whb(parser, expr->halfs, &expr->len, + sizeof(uint16_t), UINT16_MAX); + } else CHK(.byte) { + expr->type = EXPR_DIRECTIVE_BYTE; + return parse_directive_whb(parser, expr->bytes, &expr->len, + sizeof(uint8_t), UINT8_MAX); + } else CHK(.extern) { + expr->type = EXPR_DIRECTIVE_EXTERN; + return parse_ident(parser, &expr->label); + } else CHK(.globl) { + expr->type = EXPR_DIRECTIVE_GLOBL; + return parse_ident(parser, &expr->label); + } else CHK(.ascii) { + expr->type = EXPR_DIRECTIVE_ASCII; + return parse_string(parser, &expr->string); + } else CHK(.asciiz) { + expr->type = EXPR_DIRECTIVE_ASCIIZ; + return parse_string(parser, &expr->string); + } else { + expr->type = EXPR_DIRECTIVE_SECTION; + string_move(&expr->section, name); + return M_SUCCESS; } - return M_SUCCESS; -} - -static int parse_instruction_j(struct parser *parser, - struct mips_instruction *ins, - struct reference *ref) -{ - uint32_t n; - if (get_target(parser, &n, ref) || n > MAX26) - return M_ERROR; - ins->data.target = n; - - return M_SUCCESS; -} - -static int parse_instruction_branch_equal(struct parser *parser, - struct mips_instruction *ins, - struct reference *ref) -{ - enum mips_register reg; - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - int32_t off; - if (get_offset(parser, &off, ref)) - return M_ERROR; - ins->data.offset = B16(off); - - return M_SUCCESS; -} - -static int parse_instruction_branch(struct parser *parser, - struct mips_instruction *ins, - struct reference *ref) -{ - enum mips_register reg; - int32_t n; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (get_offset(parser, &n, ref)) - return M_ERROR; - ins->data.offset = B16(n); - - return M_SUCCESS; -} - -static int parse_instruction_sl(struct parser *parser, - struct ins_expr *expr) -{ - enum mips_register reg; - struct mips_instruction *ins = &expr->ins[0]; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (get_reg_offset(parser, expr)) - return M_ERROR; - - return M_SUCCESS; + #undef CHK } -static int parse_instruction_sli(struct parser *parser, - struct mips_instruction *ins) +static int parse_constant(struct parser *parser, struct string *name, + struct expr_const *constant) { - enum mips_register reg; - struct token token; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX16) + if (assert_token(parser, TOK_EQUAL, NULL)) return M_ERROR; - ins->data.immd = B16(token.number); - return M_SUCCESS; -} - -static int parse_instruction_s(struct parser *parser, - struct mips_instruction *ins) -{ - enum mips_register reg; struct token token; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rd = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX5) + if (assert_token(parser, TOK_NUMBER, &token)) return M_ERROR; - ins->data.shamt = token.number; + string_move(&constant->name, name); + constant->num = token.number; return M_SUCCESS; } -static int parse_instruction_sv(struct parser *parser, - struct mips_instruction *ins) +static int parse_offset(struct parser *parser, + struct expr_ins_arg *arg, + uint64_t immd) { - enum mips_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rd = reg; + // the immediate has already been parsed + // now parse (REG) - if (assert_token(parser, TOK_COMMA, NULL)) + if (assert_token(parser, TOK_LPAREN, NULL)) return M_ERROR; - if (parse_register(parser, ®)) + struct token token; + if (assert_token(parser, TOK_REG, &token)) return M_ERROR; - ins->data.rt = reg; - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; + // set values + string_move(&arg->offset.reg, &token.string); + arg->offset.immd = immd; - if (parse_register(parser, ®)) + if (assert_token(parser, TOK_RPAREN, NULL)) { + string_free(&arg->offset.reg); return M_ERROR; - ins->data.rs = reg; + } return M_SUCCESS; } -static int parse_pseudo_li(struct parser *parser, struct ins_expr *expr) +static int parse_instruction_arg(struct parser *parser, + struct expr_ins_arg *arg) { - enum mips_register reg; - uint32_t immd; - - if (parse_register(parser, ®)) - return M_ERROR; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; + // allowed token matches: + // + // register: + // REG + // + // label: + // IDENT + // + // immediate: + // IMMD + // + // offset: + // (REG) + // IMMD(REG) - if (parse_number(parser, &immd, MAX16)) - return M_ERROR; - - expr->ins_len = 1; - expr->ins[0] = mips_instructions[MIPS_INS_ORI]; - expr->ins[0].data.rt = reg; - expr->ins[0].data.rs = MIPS_REG_ZERO; - expr->ins[0].data.immd = B16(immd); - expr->ref[0].type = R_MIPS_NONE; - - return M_SUCCESS; -} - -static int parse_pseudo_la(struct parser *parser, struct ins_expr *expr) -{ - enum mips_register reg; struct token token; - - uint16_t hi = 0, lo = 0; - - if (parse_register(parser, ®)) + if (peek_token(parser, &token)) return M_ERROR; - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; + // if its a left paren, were parsing + // an offset + if (token.type == TOK_LPAREN) { + arg->type = EXPR_INS_ARG_OFFSET; + return parse_offset(parser, arg, 0); + } + // token must now be either a number (immediate) + // register, or label,,, + // ... take ownership of the next token if (next_token(parser, &token)) return M_ERROR; - if (token.type == TOK_IDENT) { - expr->ref[0].type = R_MIPS_HI16; - expr->ref[0].addend = 0; - strcpy(expr->ref[0].name, token.text); - expr->ref[1].type = R_MIPS_LO16; - expr->ref[1].addend = 0; - strcpy(expr->ref[1].name, token.text); - } else if (token.type == TOK_NUMBER && token.number > MAX32) { - hi = token.number >> 16; - lo = token.number & 0x0000ffff; - expr->ref[0].type = R_MIPS_NONE; - expr->ref[1].type = R_MIPS_NONE; - } else { - return M_ERROR; + // if its a register... return + // there are no other pathways + if (token.type == TOK_REG) { + arg->type = EXPR_INS_ARG_REGISTER; + string_move(&arg->reg, &token.string); + return M_SUCCESS; } - expr->ins_len = 2; - expr->ins[0] = mips_instructions[MIPS_INS_LUI]; - expr->ins[0].data.rt = reg; - expr->ins[0].data.immd = B16(hi); - expr->ins[1] = mips_instructions[MIPS_INS_ADDI]; - expr->ins[1].data.rt = reg; - expr->ins[1].data.rs = reg; - expr->ins[1].data.immd = B16(lo); - - return M_SUCCESS; -} - -static int parse_pseudo_move(struct parser *parser, struct ins_expr *expr) -{ - enum mips_register rd, rs; - - if (parse_register(parser, &rd)) - return M_ERROR; + // if it is a label... return + // therea are no other pathways + if (token.type == TOK_IDENT) { + arg->type = EXPR_INS_ARG_LABEL; + string_move(&arg->label, &token.string); + return M_SUCCESS; + } - if (assert_token(parser, TOK_COMMA, NULL)) + // now it must be a number... + // throw a error if its now + if (token.type != TOK_NUMBER) { + ERROR_POS(token, "expected number, got %s", + token_str(token.type)); + token_free(&token); return M_ERROR; + } - if (parse_register(parser, &rs)) + uint64_t immd = (uint64_t)token.number; + // now if the next token is a lparen + // parse offset, else return immd + if (peek_token(parser, &token)) return M_ERROR; - expr->ins_len = 1; - expr->ins[0] = mips_instructions[MIPS_INS_OR]; - expr->ins[0].data.rs = rs; - expr->ins[0].data.rt = MIPS_REG_ZERO; - expr->ins[0].data.rd = rd; - expr->ref[0].type = R_MIPS_NONE; - - return M_SUCCESS; -} - -static int parse_pseudo_nop(struct parser *parser, struct ins_expr *expr) -{ - (void) parser; - - expr->ins_len = 1; - expr->ins[0] = mips_instructions[MIPS_INS_SLL]; - expr->ref[0].type = R_MIPS_NONE; - - return M_SUCCESS; -} - -static int parse_pseudo_instruction(struct parser *parser, - struct ins_expr *expr, - struct token ident) -{ - // disablle logging in the logging - // module - extern int log_disabled; - log_disabled = 1; - - int res = M_ERROR; - - struct lexer_state state; - lexer_save(parser->lexer, &state); - - #define CHK(name) if (strcmp(ident.text, #name) == 0) - - CHK(li) - res = parse_pseudo_li(parser, expr); - else CHK(la) - res = parse_pseudo_la(parser, expr); - else CHK(move) - res = parse_pseudo_move(parser, expr); - else CHK(nop) - res = parse_pseudo_nop(parser, expr); - - #undef CHK - - if (res) { - // reset on fail - lexer_load(parser->lexer, &state); - expr->ins[0].data.raw = 0; - expr->ins[1].data.raw = 0; - expr->ref[0] = (struct reference) {0}; - expr->ref[1] = (struct reference) {0}; + if (token.type == TOK_LPAREN) { + arg->type = EXPR_INS_ARG_OFFSET; + return parse_offset(parser, arg, immd); + } else { + arg->type = EXPR_INS_ARG_IMMEDIATE; + arg->immd = immd; + return M_SUCCESS; } - log_disabled = 0; - return res; } -static int parse_instruction(struct parser *parser, - struct ins_expr *expr, - struct token ident) +static int parse_instruction(struct parser *parser, struct string *name, + struct expr_ins *ins) { - struct mips_instruction instruction; - enum mips_parse_format format; - int res = M_SUCCESS; - - if (parse_pseudo_instruction(parser, expr, ident) == M_SUCCESS) - return M_SUCCESS; - - if (get_instruction(ident.text, &instruction)) { - ERROR_POS(ident, "unknown instruction '%s'", ident.text); - return M_ERROR; - } - - struct mips_instruction *ins = &expr->ins[0]; - struct reference *ref = &expr->ref[0]; - - // this will only ever generate one instruction - expr->ins_len = 1; - *ins = instruction; - ref->type = R_MIPS_NONE; - - format = mips_parse_formats[instruction.type]; - switch (format) { - case MIPS_PARSE_R: - res = parse_instruction_r(parser, ins); - break; - case MIPS_PARSE_R2: - res = parse_instruction_r2(parser, ins); - break; - case MIPS_PARSE_RS: - res = parse_instruction_rs(parser, ins); - break; - case MIPS_PARSE_RD: - res = parse_instruction_rd(parser, ins); - break; - case MIPS_PARSE_I: - res = parse_instruction_i(parser, ins); - break; - case MIPS_PARSE_J: - res = parse_instruction_j(parser, ins, ref); - break; - case MIPS_PARSE_O16: - res = parse_instruction_offset(parser, MAX16, ins, ref); - break; - case MIPS_PARSE_O26: - res = parse_instruction_offset(parser, MAX26, ins, ref); - break; - case MIPS_PARSE_BE: - res = parse_instruction_branch_equal(parser, ins, ref); - break; - case MIPS_PARSE_BZ: - res = parse_instruction_branch(parser, ins, ref); - break; - case MIPS_PARSE_SL: - res = parse_instruction_sl(parser, expr); - break; - case MIPS_PARSE_SLI: - res = parse_instruction_sli(parser, ins); - break; - case MIPS_PARSE_S: - res = parse_instruction_s(parser, ins); - break; - case MIPS_PARSE_SV: - res = parse_instruction_sv(parser, ins); - break; - case MIPS_PARSE_NONE: - res = M_SUCCESS; - break; - } + int len = 0; - if (res == M_SUCCESS && assert_eol(parser)) - return M_ERROR; + if (peek_eol(parser) == M_SUCCESS) + goto skip_args; - return res; -} + while (1) { + if (len >= MAX_ARG_LENGTH) { + ERROR_POS(parser->lexer, + "reached max argument length"); + return M_ERROR; + } -static int parse_directive_align(struct parser *parser, - struct mips_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; + if (parse_instruction_arg(parser, &ins->args[len++])) + return M_ERROR; - if (token.number < 0) { - ERROR_POS(token, "cannot align negative"); - return M_ERROR; - } + if (peek_eol(parser) == M_SUCCESS) + break; - if (token.number > MAX16) { - ERROR_POS(token, "cannot align more than 65kb"); - return M_ERROR; + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; } - directive->type = MIPS_DIRECTIVE_ALIGN; - directive->align = token.number; +skip_args: + string_move(&ins->name, name); + ins->args_len = len; return M_SUCCESS; } -static int parse_directive_space(struct parser *parser, - struct mips_directive *directive) +/* gets the next value from the parser */ +int parser_next(struct parser *parser, struct expr *expr) { - struct token token; - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; + // the next token being looked at + struct token token = { + .type = TOK_NL + }; - if (token.number < 0) { - ERROR_POS(token, "cannot reserve negative"); - return M_ERROR; - } + // the result to return + int res = M_SUCCESS; - if (token.number > MAX16) { - ERROR_POS(token, "cannot reserve more than 65kb"); - return M_ERROR; + // skip all new lines + while (1) { + if (next_token(parser, &token)) + return M_ERROR; + if (token.type != TOK_NL) + break; + token_free(&token); } - directive->type = MIPS_DIRECTIVE_SPACE; - directive->space = token.number; - - return M_SUCCESS; -} + expr->line_no = parser->lexer.y; + expr->byte_start = token.off; + expr->byte_end = token.off; -static int parse_directive_whb(struct parser *parser, - struct mips_directive *directive, - enum mips_directive_type type) -{ - struct token token; - uint32_t size = 0; - uint32_t len = 0; + // if EOF, return M_EOF + if (token.type == TOK_EOF) + return M_EOF; - switch (type) { - case MIPS_DIRECTIVE_WORD: - size = UINT32_MAX; - break; - case MIPS_DIRECTIVE_HALF: - size = UINT16_MAX; - break; - case MIPS_DIRECTIVE_BYTE: - size = UINT8_MAX; - break; - default: + // when a ident ends with a colon + // parse a lebel + else if (token.type == TOK_LABEL) { + expr->type = EXPR_LABEL; + // label now owns string + string_move(&expr->label, &token.string); } - directive->type = type; - - while (1) { - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; + // when a ident starts with a dot + // parse a directive + else if (token.type == TOK_DIRECTIVE) { + expr->type = EXPR_DIRECTIVE; + res = parse_directive(parser, &token.string, &expr->directive); + } - if (len >= MAX_ARG_LENGTH) { - ERROR_POS(token, "directives cannot be longer than " - "%d arguments", MAX_ARG_LENGTH); + // peek the next token: + // 1. = means parse constant + // 2. else parse instruction + else { + if (token.type != TOK_IDENT) { + ERROR_POS(token, "expected ident, got %s", + token_str(token.type)); + token_free(&token); return M_ERROR; } - if (token.number > size) { - ERROR_POS(token, "number cannot execede max size of: " - "%d", size); + struct token peek; + if (peek_token(parser, &peek)) { + token_free(&token); return M_ERROR; } - switch (type) { - case MIPS_DIRECTIVE_WORD: - directive->words[len++] = token.number; - - break; - case MIPS_DIRECTIVE_HALF: - directive->halfs[len++] = token.number; - break; - case MIPS_DIRECTIVE_BYTE: - directive->bytes[len++] = token.number; - break; - default: - } - - if (peek_token(parser, &token)) - return M_ERROR; - - if (token.type == TOK_COMMA) { - next_token(parser, NULL); - continue; + if (peek.type == TOK_EQUAL) { + expr->type = EXPR_CONSTANT; + res = parse_constant(parser, &token.string, + &expr->constant); + } else { + expr->type = EXPR_INS; + res = parse_instruction(parser, &token.string, + &expr->instruction); } - - break; } - directive->len = len; + // update byte end for expr + expr->byte_end = ftell(parser->lexer.file); - return M_SUCCESS; -} - -static int parse_directive_extern(struct parser *parser, - struct mips_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_IDENT, &token)) - return M_ERROR; - - directive->type = MIPS_DIRECTIVE_EXTERN; - strcpy(directive->name, token.text); + // free tokens + token_free(&token); - return M_SUCCESS; -} - -static int parse_directive_globl(struct parser *parser, - struct mips_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_IDENT, &token)) + // everything must end in a new line + if (res == M_SUCCESS && assert_eol(parser)) return M_ERROR; - directive->type = MIPS_DIRECTIVE_GLOBL; - strcpy(directive->name, token.text); - - return M_SUCCESS; + return res; } -static int parse_directive_ascii(struct parser *parser, - struct mips_directive *directive, - enum mips_directive_type type) +int parser_init(const char *file, struct parser *parser) { - struct token token; - if (assert_token(parser, TOK_STRING, &token)) + parser->peek.type = TOK_EOF; + if (lexer_init(file, &parser->lexer)) return M_ERROR; - - directive->type = type; - strcpy(directive->name, token.text); - - return M_SUCCESS; -} - -static int parse_section(struct mips_directive *directive, - char name[MAX_LEX_LENGTH]) -{ - directive->type = MIPS_DIRECTIVE_SECTION; - strcpy(directive->name, name); - return M_SUCCESS; } -static int parse_directive(struct parser *parser, - struct mips_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_DIRECTIVE, &token)) - return M_ERROR; - - // .align n - if (strcmp(token.text, "align") == 0) - return parse_directive_align(parser, directive); - else if (strcmp(token.text, "space") == 0) - return parse_directive_space(parser, directive); - else if (strcmp(token.text, "word") == 0) - return parse_directive_whb(parser, directive, - MIPS_DIRECTIVE_WORD); - else if (strcmp(token.text, "half") == 0) - return parse_directive_whb(parser, directive, - MIPS_DIRECTIVE_HALF); - else if (strcmp(token.text, "byte") == 0) - return parse_directive_whb(parser, directive, - MIPS_DIRECTIVE_BYTE); - else if (strcmp(token.text, "extern") == 0) - return parse_directive_extern(parser, directive); - else if (strcmp(token.text, "globl") == 0) - return parse_directive_globl(parser, directive); - else if (strcmp(token.text, "ascii") == 0) - return parse_directive_ascii(parser, directive, - MIPS_DIRECTIVE_ASCII); - else if (strcmp(token.text, "asciiz") == 0) - return parse_directive_ascii(parser, directive, - MIPS_DIRECTIVE_ASCIIZ); - else - return parse_section(directive, token.text); -} -static int parse_constant(struct parser *parser, struct const_expr *expr, - struct token ident) +void parser_free(struct parser *parser) { - struct token number; - - if (assert_token(parser, TOK_EQUAL, NULL)) - return M_ERROR; - - if (assert_token(parser, TOK_NUMBER, &number)) - return M_ERROR; - - strcpy(expr->name,ident.text); - expr->value = number.number; - - return M_SUCCESS; + token_free(&parser->peek); + lexer_free(&parser->lexer); } -static int parser_handle_ident(struct parser *parser, struct expr *expr) +static inline void expr_directive_free(struct expr_directive *dir) { - struct token ident; - struct token peek; - - if (assert_token(parser, TOK_IDENT, &ident)) - return M_ERROR; - - if (peek_token(parser, &peek)) - return M_ERROR; - - if (peek.type == TOK_EQUAL) { - expr->type = EXPR_CONSTANT; - return parse_constant(parser, &expr->constant, ident); - } else { - expr->type = EXPR_INS; - return parse_instruction(parser, &expr->ins, ident); + switch (dir->type) { + case EXPR_DIRECTIVE_SECTION: + string_free(&dir->section); + break; + case EXPR_DIRECTIVE_EXTERN: + case EXPR_DIRECTIVE_GLOBL: + string_free(&dir->label); + break; + case EXPR_DIRECTIVE_ASCII: + case EXPR_DIRECTIVE_ASCIIZ: + string_free(&dir->string); + break; + default: } } - -static int parse_label(struct parser *parser, - struct expr *expr) -{ - struct token token; - - if (assert_token(parser, TOK_LABEL, &token)) - return M_ERROR; - strcpy(expr->label, token.text); - - return M_SUCCESS; -} - - -int parser_next(struct parser *parser, struct expr *expr) +static inline void expr_ins_arg_free(struct expr_ins_arg *arg) { - struct token token; - int res = M_SUCCESS; - -again: - if (peek_token(parser, &token)) - return M_ERROR; - - switch (token.type) { - case TOK_NL: - next_token(parser, NULL); - goto again; - - case TOK_EOF: - res = M_EOF; - break; - - case TOK_LABEL: - expr->type = EXPR_LABEL; - res = parse_label(parser, expr); - break; - - case TOK_DIRECTIVE: - expr->type = EXPR_DIRECTIVE; - res = parse_directive(parser, &expr->directive); - break; - - case TOK_IDENT: - res = parser_handle_ident(parser, expr); - break; - - default: - ERROR_POS(token, "unexpected token '%s'", - token_str(token.type)); - return M_ERROR; - + switch (arg->type) { + case EXPR_INS_ARG_REGISTER: + string_free(&arg->reg); + break; + case EXPR_INS_ARG_IMMEDIATE: + break; + case EXPR_INS_ARG_LABEL: + string_free(&arg->label); + break; + case EXPR_INS_ARG_OFFSET: + string_free(&arg->offset.reg); + break; } - - return res; -} - -int parser_init(struct lexer *lexer, struct parser *parser) -{ - parser->lexer = lexer; - parser->peek.type = TOK_EOF; - return M_SUCCESS; } - -void parser_free(struct parser *parser) +void expr_free(struct expr *expr) { - (void) parser; + switch (expr->type) { + case EXPR_DIRECTIVE: + expr_directive_free(&expr->directive); + break; + case EXPR_CONSTANT: + string_free(&expr->constant.name); + break; + case EXPR_INS: + string_free(&expr->instruction.name); + for (uint32_t i = 0; i < expr->instruction.args_len; i++) + expr_ins_arg_free(&expr->instruction.args[i]); + break; + case EXPR_LABEL: + string_free(&expr->label); + break; + } } |