#include #include #include #include #include #include #include "parse_mips32.h" #include "parse.h" #include "mlimits.h" #include "parse.h" #include "lex.h" /* each instruction has a given parse format * internal to the parser */ enum mips32_parse_format { // register type: rs, rt, td MIPS32_PARSE_R, // register type: rs, rt MIPS32_PARSE_R2, // register type: rd MIPS32_PARSE_RD, // register type: rs MIPS32_PARSE_RS, // imeediate type: rs, rt, immd MIPS32_PARSE_I, // jump type: offset MIPS32_PARSE_J, // jump type: register MIPS32_PARSE_JR, // offset 16b type: offset MIPS32_PARSE_O16, // offset 26b type: offset MIPS32_PARSE_O26, // breanch equal type: rs, rt, offset MIPS32_PARSE_BE, // branch zero type: rs, offset MIPS32_PARSE_BZ, // store and load: rt, offset(base) MIPS32_PARSE_SL, // store and load immediate: rt, immediate MIPS32_PARSE_SLI, // shift: rd, rt, sa MIPS32_PARSE_S, // shift variable: rd, rt, rs MIPS32_PARSE_SV, // none: MIPS32_PARSE_NONE, }; #define FORMAT(ins, format) \ [MIPS32_INS_##ins] = MIPS32_PARSE_##format, \ const enum mips32_parse_format mips32_parse_formats[] = { FORMAT(ADD, R) FORMAT(ADDI, I) FORMAT(ADDIU, I) FORMAT(ADDU, R) FORMAT(AND, R) FORMAT(ANDI, I) FORMAT(BAL, O16) FORMAT(BALC, O26) FORMAT(BC, O26) FORMAT(BEQ, BE) FORMAT(BEQL, BE) FORMAT(BGEZ, BZ) FORMAT(BGEZAL, BZ) FORMAT(BGEZALL, BZ) FORMAT(BGEZL, BZ) FORMAT(BGTZ, BZ) FORMAT(BGTZL, BZ) FORMAT(BLEZ, BZ) FORMAT(BLEZL, BZ) FORMAT(BLTZ, BZ) FORMAT(BLTZAL, BZ) FORMAT(BLTZALL, BZ) FORMAT(BLTZL, BZ) FORMAT(BNE, BE) FORMAT(BNEL, BE) FORMAT(DDIV, R2) FORMAT(DDIVU, R2) FORMAT(DIV, R2) FORMAT(DIVU, R2) FORMAT(J, J) FORMAT(JAL, J) FORMAT(JALR, JR) // TODO: handle rd FORMAT(JALX, J) FORMAT(JR, JR) FORMAT(LB, SL) FORMAT(LBU, SL) FORMAT(LH, SL) FORMAT(LHU, SL) FORMAT(LUI, SLI) FORMAT(LW, SL) FORMAT(LWL, SL) FORMAT(LWR, SL) FORMAT(MFHI, RD) FORMAT(MFLO, RD) FORMAT(MTHI, RS) FORMAT(MTLO, RS) FORMAT(MULT, R2) FORMAT(MULTU, R2) FORMAT(SB, SL) FORMAT(SH, SL) FORMAT(SW, SL) FORMAT(SWL, SL) FORMAT(SLL, S) FORMAT(SLLV, SV) FORMAT(SLT, R) FORMAT(SLTI, I) FORMAT(SLTIU, I) FORMAT(SLTU, R) FORMAT(SRA, S) FORMAT(SRAV, SV) FORMAT(SRL, S) FORMAT(SRLV, SV) FORMAT(SYSCALL, NONE) FORMAT(OR, R) FORMAT(ORI, I) FORMAT(NOR, R) FORMAT(SUB, R) FORMAT(SUBU, R) FORMAT(XOR, R) FORMAT(XORI, I) }; #undef FORMAT #define MAX5 32 #define MAX16 65536 #define MAX26 67108864 static int get_reference(struct parser *parser, uint32_t *offset, enum reference_type type) { struct token token; if (next_token(parser, &token)) return M_ERROR; if (token.type == TOK_NUMBER) { *offset = token.number; return M_SUCCESS; } if (token.type != TOK_IDENT) { ERROR_POS(token, "unexpected token of type '%s'", token_str(token.type)); return M_ERROR; } struct reference reference = { .section = parser->sec_tbl.current, .index = parser->sec_tbl.current->count, .type = type, }; strcpy(reference.name, token.text); if (reftbl_push(&parser->ref_tbl, reference)) return M_ERROR; *offset = 0; return M_SUCCESS; } static int get_offset(struct parser *parser, uint32_t *offset) { return get_reference(parser, offset, REF_OFFESET); } static int get_target(struct parser *parser, uint32_t *offset) { return get_reference(parser, offset, REF_TARGET); } static int get_instruction(const char *ident, struct mips32_instruction *res) { for (int i = 0; i < __MIPS32_INS_LEN; i++) { struct mips32_instruction ins = mips32_instructions[i]; if (strcasecmp(ident, ins.name) == 0) { if (res != NULL) *res = ins; return M_SUCCESS; } } return M_ERROR; } static int is_instruction(const char *ident) { return get_instruction(ident, NULL); } static int parse_register(struct parser *parser, enum mips32_register *reg) { struct token token; if (assert_token(parser, TOK_REG, &token)) return M_ERROR; int len = strlen(token.text); int c0 = len > 0 ? token.text[0] : '\0', c1 = len > 1 ? token.text[1] : '\0', c2 = len > 2 ? token.text[2] : '\0', c3 = len > 3 ? token.text[3] : '\0'; // $zero if (c0 == 'z') { if (c1 == 'e' && c2 == 'r' && c3 == 'o') { *reg = MIPS32_REG_ZERO; return M_SUCCESS; } } // $a0-a3 $at else if (c0 == 'a') { if (c1 == 't') { *reg = MIPS32_REG_AT; return M_SUCCESS; } if (c1 >= '0' && c1 <= '3') { *reg = MIPS32_REG_A0; *reg += c1 - '0'; return M_SUCCESS; } } // $v0-v1 else if (c0 == 'v') { if (c1 >= '0' && c1 <= '1') { *reg = MIPS32_REG_V0; *reg += c1 - '0'; return M_SUCCESS; } } // $t0-t9 else if (c0 == 't') { if (c1 >= '0' && c1 <= '7') { *reg = MIPS32_REG_T0; *reg += c1 - '0'; return M_SUCCESS; } // reg T8-T9 are not in order with T0-T7 if (c1 >= '8' && c1 <= '9') { *reg = MIPS32_REG_T8; *reg += c1 - '8'; return M_SUCCESS; } } // $s0-s7 $sp else if (c0 == 's') { if (c1 >= '0' && c1 <= '7') { *reg = MIPS32_REG_S0; *reg += c1 - '0'; return M_SUCCESS; } if (c1 == 'p') { *reg = MIPS32_REG_SP; return M_SUCCESS; } } // $k0-k1 else if (c0 == 'k') { if (c1 >= '0' && c1 <= '1') { *reg = MIPS32_REG_K0; *reg += c1 - '0'; return M_SUCCESS; } } // $gp else if (c0 == 'g') { if (c1 == 'p') { *reg = MIPS32_REG_GP; return M_SUCCESS; } } // $fp else if (c0 == 'f') { if (c1 == 'p') { *reg = MIPS32_REG_FP; return M_SUCCESS; } } // $rp else if (c0 == 'r') { if (c1 == 'p') { *reg = MIPS32_REG_RA; return M_SUCCESS; } } // $0-31 (non aliased register names) else if (c0 >= '0' && c0 <= '9') { int i = c0 - '0'; if (c1 >= '0' && c1 <= '9') { i *= 10; i += c1 - '0'; } if (i <= 31) { *reg = i; return M_SUCCESS; } } ERROR_POS(token, "unknown register $%s", token.text); return M_ERROR; } static int parse_instruction_r(struct parser *parser, struct mips32_instruction *ins) { // format: rs, rt, rd enum mips32_register reg; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rd = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rs = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rt = reg; return M_SUCCESS; } static int parse_instruction_r2(struct parser *parser, struct mips32_instruction *ins) { // format: rs, rt enum mips32_register reg; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rs = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rt = reg; return M_SUCCESS; } static int parse_instruction_rs(struct parser *parser, struct mips32_instruction *ins) { // format: rs enum mips32_register reg; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rs = reg; return M_SUCCESS; } static int parse_instruction_rd(struct parser *parser, struct mips32_instruction *ins) { // format: rd enum mips32_register reg; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rd = reg; return M_SUCCESS; } static int parse_instruction_i(struct parser *parser, struct mips32_instruction *ins) { // format: rs, rt, immd enum mips32_register reg; struct token token; if (parse_register(parser, ®)) return M_ERROR; ins->I_data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->I_data.rs = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (assert_token(parser, TOK_NUMBER, &token)) return M_ERROR; if (token.number >= MAX16) return M_ERROR; ins->I_data.immd = token.number; return M_SUCCESS; } static int parse_instruction_offset(struct parser *parser, uint32_t max, struct mips32_instruction *ins) { uint32_t n; if (get_offset(parser, &n) || n > max) return M_ERROR; switch (max) { case MAX26: ins->J_data.target = n; break; case MAX16: ins->B_data.offset = n; break; } return M_SUCCESS; } static int parse_instruction_j(struct parser *parser, struct mips32_instruction *ins) { uint32_t n; if (get_target(parser, &n) || n > MAX26) return M_ERROR; ins->J_data.target = n; return M_SUCCESS; } static int parse_instruction_jr(struct parser *parser, struct mips32_instruction *ins) { uint32_t n; if (get_target(parser, &n) || n > MAX26) return M_ERROR; ins->J_data.target = n; return M_SUCCESS; } static int parse_instruction_branch_equal(struct parser *parser, struct mips32_instruction *ins) { enum mips32_register reg; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rs = reg; return M_SUCCESS; } static int parse_instruction_branch(struct parser *parser, struct mips32_instruction *ins) { enum mips32_register reg; uint32_t n; if (parse_register(parser, ®)) return M_ERROR; ins->B_data.rs = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (get_offset(parser, &n) || n > MAX16) return M_ERROR; ins->B_data.offset = n; return M_SUCCESS; } static int parse_instruction_sl(struct parser *parser, struct mips32_instruction *ins) { enum mips32_register reg; uint32_t offset = 0; struct token token; if (parse_register(parser, ®)) return M_ERROR; ins->I_data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (peek_token(parser, &token)) return M_ERROR; if (token.type != TOK_LPAREN) if (get_offset(parser, &offset)) return M_ERROR; ins->I_data.immd = offset; if (peek_token(parser, &token)) return M_ERROR; if (token.type == TOK_NL) { ins->I_data.rs = MIPS32_REG_ZERO; return M_SUCCESS; } if (assert_token(parser, TOK_LPAREN, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->I_data.rs = reg; if (assert_token(parser, TOK_RPAREN, NULL)) return M_ERROR; return M_SUCCESS; } static int parse_instruction_sli(struct parser *parser, struct mips32_instruction *ins) { enum mips32_register reg; struct token token; if (parse_register(parser, ®)) return M_ERROR; ins->I_data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX16) return M_ERROR; ins->I_data.immd = token.number; return M_SUCCESS; } static int parse_instruction_s(struct parser *parser, struct mips32_instruction *ins) { enum mips32_register reg; struct token token; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rd = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX5) return M_ERROR; ins->R_data.shamt = token.number; return M_SUCCESS; } static int parse_instruction_sv(struct parser *parser, struct mips32_instruction *ins) { enum mips32_register reg; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rd = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rs = reg; return M_SUCCESS; } static int parse_instruction(struct parser *parser, union mips_instruction *ins, struct token ident) { struct mips32_instruction instruction; enum mips32_parse_format format; int res = M_SUCCESS; if (get_instruction(ident.text, &instruction)) { ERROR_POS(ident, "unknown instruction '%s'", ident.text); return M_ERROR; } ins->mips32 = instruction; format = mips32_parse_formats[instruction.type]; switch (format) { case MIPS32_PARSE_R: res = parse_instruction_r(parser, &ins->mips32); break; case MIPS32_PARSE_R2: res = parse_instruction_r2(parser, &ins->mips32); break; case MIPS32_PARSE_RS: res = parse_instruction_rs(parser, &ins->mips32); break; case MIPS32_PARSE_RD: res = parse_instruction_rd(parser, &ins->mips32); break; case MIPS32_PARSE_I: res = parse_instruction_i(parser, &ins->mips32); break; case MIPS32_PARSE_J: res = parse_instruction_j(parser, &ins->mips32); break; case MIPS32_PARSE_JR: res = parse_instruction_jr(parser, &ins->mips32); break; case MIPS32_PARSE_O16: res = parse_instruction_offset(parser, MAX16, &ins->mips32); break; case MIPS32_PARSE_O26: res = parse_instruction_offset(parser, MAX26, &ins->mips32); break; case MIPS32_PARSE_BE: res = parse_instruction_branch_equal(parser, &ins->mips32); break; case MIPS32_PARSE_BZ: res = parse_instruction_branch(parser, &ins->mips32); break; case MIPS32_PARSE_SL: res = parse_instruction_sl(parser, &ins->mips32); break; case MIPS32_PARSE_SLI: res = parse_instruction_sli(parser, &ins->mips32); break; case MIPS32_PARSE_S: res = parse_instruction_s(parser, &ins->mips32); break; case MIPS32_PARSE_SV: res = parse_instruction_sv(parser, &ins->mips32); break; case MIPS32_PARSE_NONE: res = M_SUCCESS; break; } if (res == M_SUCCESS && assert_eol(parser)) return M_ERROR; return res; } static int parse_directive_align(struct parser *parser, struct mips32_directive *directive) { struct token token; if (assert_token(parser, TOK_NUMBER, &token)) return M_ERROR; if (token.number < 0) { ERROR_POS(token, "cannot align negative"); return M_ERROR; } if (token.number > MAX16) { ERROR_POS(token, "cannot align more than 65kb"); return M_ERROR; } directive->type = MIPS32_DIRECTIVE_ALIGN; directive->align = token.number; return M_SUCCESS; } static int parse_directive_space(struct parser *parser, struct mips32_directive *directive) { struct token token; if (assert_token(parser, TOK_NUMBER, &token)) return M_ERROR; if (token.number < 0) { ERROR_POS(token, "cannot reserve negative"); return M_ERROR; } if (token.number > MAX16) { ERROR_POS(token, "cannot reserve more than 65kb"); return M_ERROR; } directive->type = MIPS32_DIRECTIVE_SPACE; directive->space = token.number; return M_SUCCESS; } static int parse_directive_whb(struct parser *parser, struct mips32_directive *directive, enum mips32_directive_type type) { struct token token; uint32_t size = 0; uint32_t len = 0; switch (type) { case MIPS32_DIRECTIVE_WORD: size = UINT32_MAX; break; case MIPS32_DIRECTIVE_HALF: size = UINT16_MAX; break; case MIPS32_DIRECTIVE_BYTE: size = UINT8_MAX; break; default: } directive->type = type; while (1) { if (assert_token(parser, TOK_NUMBER, &token)) return M_ERROR; if (len >= MAX_ARG_LENGTH) { ERROR_POS(token, "directives cannot be longer than " "%d arguments", MAX_ARG_LENGTH); return M_ERROR; } if (token.number > size) { ERROR_POS(token, "number cannot execede max size of: " "%d", size); return M_ERROR; } switch (type) { case MIPS32_DIRECTIVE_WORD: directive->words[len++] = token.number; break; case MIPS32_DIRECTIVE_HALF: directive->halfs[len++] = token.number; break; case MIPS32_DIRECTIVE_BYTE: directive->bytes[len++] = token.number; break; default: } if (peek_token(parser, &token)) return M_ERROR; if (token.type == TOK_COMMA) { next_token(parser, NULL); continue; } break; } return M_SUCCESS; } static int parse_section(struct parser *parser, struct mips32_directive *directive, char name[MAX_LEX_LENGTH]) { directive->type = MIPS32_DIRECTIVE_SECTION; strcpy(directive->name, name); struct section *sec; if (sectbl_get(&parser->sec_tbl, &sec, name) == M_SUCCESS) { parser->sec_tbl.current = sec; return M_SUCCESS; } if (sectbl_alloc(&parser->sec_tbl, &sec, name)) return M_ERROR; parser->sec_tbl.current = sec; return M_SUCCESS; } static int parse_directive(struct parser *parser, union mips_directive *directive) { struct token token; if (assert_token(parser, TOK_DIRECTIVE, &token)) return M_ERROR; // .align n if (strcmp(token.text, "align") == 0) return parse_directive_align(parser, &directive->mips32); else if (strcmp(token.text, "space") == 0) return parse_directive_space(parser, &directive->mips32); else if (strcmp(token.text, "word") == 0) return parse_directive_whb(parser, &directive->mips32, MIPS32_DIRECTIVE_WORD); else if (strcmp(token.text, "half") == 0) return parse_directive_whb(parser, &directive->mips32, MIPS32_DIRECTIVE_HALF); else if (strcmp(token.text, "byte") == 0) return parse_directive_whb(parser, &directive->mips32, MIPS32_DIRECTIVE_BYTE); else return parse_section(parser, &directive->mips32, token.text); } int mips32_parser_init(struct lexer *lexer, struct parser *parser) { if (parser_init(lexer, parser)) return M_ERROR; parser->parse_instruction = parse_instruction; parser->is_instruction = is_instruction; parser->parse_directive = parse_directive; return M_SUCCESS; } void mips32_parser_free(struct parser *parser) { parser_free(parser); }