#include #include #include #include #include "parse.h" #include "lex.h" static int next_token(struct parser *parser, struct token *tok) { if (parser->peek.type != TOK_EOF) { if (tok != NULL) *tok = parser->peek; parser->peek.type = TOK_EOF; return M_SUCCESS; } struct token token; if (lexer_next(parser->lexer, &token)) return M_ERROR; if (tok != NULL) *tok = token; return M_SUCCESS; } static int peek_token(struct parser *parser, struct token *tok) { if (parser->peek.type == TOK_EOF) { if (next_token(parser, &parser->peek)) return M_ERROR; } if (tok != NULL) *tok = parser->peek; return M_SUCCESS; } static int assert_token(struct parser *parser, enum token_type type, struct token *tok) { struct token token; if (next_token(parser, &token)) return M_ERROR; if (token.type != type) { ERROR_POS(token, "expected a token of type '%s', got '%s'", token_str(type), token_str(token.type)); return M_ERROR; } if (tok != NULL) *tok = token; return M_SUCCESS; } static int assert_eol(struct parser *parser) { struct token token; if (next_token(parser, &token)) return M_ERROR; if (token.type != TOK_NL && token.type != TOK_EOF) { ERROR_POS(token, "expected a new line or end of file"); return M_ERROR; } return M_SUCCESS; } /* each instruction has a given parse format * internal to the parser */ enum mips_parse_format { // register type: rs, rt, td MIPS_PARSE_R, // register type: rs, rt MIPS_PARSE_R2, // register type: rd MIPS_PARSE_RD, // register type: rs MIPS_PARSE_RS, // imeediate type: rs, rt, immd MIPS_PARSE_I, // jump type: offset MIPS_PARSE_J, // jump type: register MIPS_PARSE_JR, // offset 16b type: offset MIPS_PARSE_O16, // offset 26b type: offset MIPS_PARSE_O26, // breanch equal type: rs, rt, offset MIPS_PARSE_BE, // branch zero type: rs, offset MIPS_PARSE_BZ, // store and load: rt, offset(base) MIPS_PARSE_SL, // store and load immediate: rt, immediate MIPS_PARSE_SLI, // shift: rd, rt, sa MIPS_PARSE_S, // shift variable: rd, rt, rs MIPS_PARSE_SV, // none: MIPS_PARSE_NONE, }; #define FORMAT(ins, format) \ [MIPS_INS_##ins] = MIPS_PARSE_##format, \ const enum mips_parse_format mips_parse_formats[] = { FORMAT(ADD, R) FORMAT(ADDI, I) FORMAT(ADDIU, I) FORMAT(ADDU, R) FORMAT(AND, R) FORMAT(ANDI, I) FORMAT(BAL, O16) FORMAT(BALC, O26) FORMAT(BC, O26) FORMAT(BEQ, BE) FORMAT(BEQL, BE) FORMAT(BGEZ, BZ) FORMAT(BGEZAL, BZ) FORMAT(BGEZALL, BZ) FORMAT(BGEZL, BZ) FORMAT(BGTZ, BZ) FORMAT(BGTZL, BZ) FORMAT(BLEZ, BZ) FORMAT(BLEZL, BZ) FORMAT(BLTZ, BZ) FORMAT(BLTZAL, BZ) FORMAT(BLTZALL, BZ) FORMAT(BLTZL, BZ) FORMAT(BNE, BE) FORMAT(BNEL, BE) FORMAT(DDIV, R2) FORMAT(DDIVU, R2) FORMAT(DIV, R2) FORMAT(DIVU, R2) FORMAT(J, J) FORMAT(JAL, J) FORMAT(JALR, JR) // TODO: handle rd FORMAT(JALX, J) FORMAT(JR, JR) FORMAT(LB, SL) FORMAT(LBU, SL) FORMAT(LH, SL) FORMAT(LHU, SL) FORMAT(LUI, SLI) FORMAT(LW, SL) FORMAT(LWL, SL) FORMAT(LWR, SL) FORMAT(MFHI, RD) FORMAT(MFLO, RD) FORMAT(MTHI, RS) FORMAT(MTLO, RS) FORMAT(MULT, R2) FORMAT(MULTU, R2) FORMAT(SB, SL) FORMAT(SH, SL) FORMAT(SW, SL) FORMAT(SWL, SL) FORMAT(SLL, S) FORMAT(SLLV, SV) FORMAT(SLT, R) FORMAT(SLTI, I) FORMAT(SLTIU, I) FORMAT(SLTU, R) FORMAT(SRA, S) FORMAT(SRAV, SV) FORMAT(SRL, S) FORMAT(SRLV, SV) FORMAT(SYSCALL, NONE) FORMAT(OR, R) FORMAT(ORI, I) FORMAT(NOR, R) FORMAT(SUB, R) FORMAT(SUBU, R) FORMAT(XOR, R) FORMAT(XORI, I) }; #undef FORMAT #define MAX5 32 #define MAX16 65536 #define MAX26 67108864 static int get_reference(struct parser *parser, uint32_t *offset, struct reference *ref, enum reference_type type) { struct token token; if (next_token(parser, &token)) return M_ERROR; if (token.type == TOK_NUMBER) { *offset = token.number; return M_SUCCESS; } if (token.type != TOK_IDENT) { ERROR_POS(token, "unexpected token of type '%s'", token_str(token.type)); return M_ERROR; } strcpy(ref->name, token.text); ref->type = type; ref->addend = 0; // return zero for now *offset = 0; return M_SUCCESS; } static int get_offset(struct parser *parser, uint32_t *offset, struct reference *ref) { return get_reference(parser, offset, ref, REF_OFFESET); } static int get_target(struct parser *parser, uint32_t *offset, struct reference *ref) { return get_reference(parser, offset, ref, REF_TARGET); } static int get_instruction(const char *ident, struct mips_instruction *res) { for (int i = 0; i < __MIPS_INS_LEN; i++) { struct mips_instruction ins = mips_instructions[i]; if (strcasecmp(ident, ins.name) == 0) { if (res != NULL) *res = ins; return M_SUCCESS; } } return M_ERROR; } static int parse_register(struct parser *parser, enum mips_register *reg) { struct token token; if (assert_token(parser, TOK_REG, &token)) return M_ERROR; int len = strlen(token.text); int c0 = len > 0 ? token.text[0] : '\0', c1 = len > 1 ? token.text[1] : '\0', c2 = len > 2 ? token.text[2] : '\0', c3 = len > 3 ? token.text[3] : '\0'; // $zero if (c0 == 'z') { if (c1 == 'e' && c2 == 'r' && c3 == 'o') { *reg = MIPS_REG_ZERO; return M_SUCCESS; } } // $a0-a3 $at else if (c0 == 'a') { if (c1 == 't') { *reg = MIPS_REG_AT; return M_SUCCESS; } if (c1 >= '0' && c1 <= '3') { *reg = MIPS_REG_A0; *reg += c1 - '0'; return M_SUCCESS; } } // $v0-v1 else if (c0 == 'v') { if (c1 >= '0' && c1 <= '1') { *reg = MIPS_REG_V0; *reg += c1 - '0'; return M_SUCCESS; } } // $t0-t9 else if (c0 == 't') { if (c1 >= '0' && c1 <= '7') { *reg = MIPS_REG_T0; *reg += c1 - '0'; return M_SUCCESS; } // reg T8-T9 are not in order with T0-T7 if (c1 >= '8' && c1 <= '9') { *reg = MIPS_REG_T8; *reg += c1 - '8'; return M_SUCCESS; } } // $s0-s7 $sp else if (c0 == 's') { if (c1 >= '0' && c1 <= '7') { *reg = MIPS_REG_S0; *reg += c1 - '0'; return M_SUCCESS; } if (c1 == 'p') { *reg = MIPS_REG_SP; return M_SUCCESS; } } // $k0-k1 else if (c0 == 'k') { if (c1 >= '0' && c1 <= '1') { *reg = MIPS_REG_K0; *reg += c1 - '0'; return M_SUCCESS; } } // $gp else if (c0 == 'g') { if (c1 == 'p') { *reg = MIPS_REG_GP; return M_SUCCESS; } } // $fp else if (c0 == 'f') { if (c1 == 'p') { *reg = MIPS_REG_FP; return M_SUCCESS; } } // $rp else if (c0 == 'r') { if (c1 == 'p') { *reg = MIPS_REG_RA; return M_SUCCESS; } } // $0-31 (non aliased register names) else if (c0 >= '0' && c0 <= '9') { int i = c0 - '0'; if (c1 >= '0' && c1 <= '9') { i *= 10; i += c1 - '0'; } if (i <= 31) { *reg = i; return M_SUCCESS; } } ERROR_POS(token, "unknown register $%s", token.text); return M_ERROR; } static int parse_instruction_r(struct parser *parser, struct mips_instruction *ins) { // format: rs, rt, rd enum mips_register reg; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rd = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rs = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rt = reg; return M_SUCCESS; } static int parse_instruction_r2(struct parser *parser, struct mips_instruction *ins) { // format: rs, rt enum mips_register reg; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rs = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rt = reg; return M_SUCCESS; } static int parse_instruction_rs(struct parser *parser, struct mips_instruction *ins) { // format: rs enum mips_register reg; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rs = reg; return M_SUCCESS; } static int parse_instruction_rd(struct parser *parser, struct mips_instruction *ins) { // format: rd enum mips_register reg; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rd = reg; return M_SUCCESS; } static int parse_instruction_i(struct parser *parser, struct mips_instruction *ins) { // format: rs, rt, immd enum mips_register reg; struct token token; if (parse_register(parser, ®)) return M_ERROR; ins->I_data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->I_data.rs = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (assert_token(parser, TOK_NUMBER, &token)) return M_ERROR; if (token.number >= MAX16) return M_ERROR; ins->I_data.immd = token.number; return M_SUCCESS; } static int parse_instruction_offset(struct parser *parser, uint32_t max, struct mips_instruction *ins, struct reference *ref) { uint32_t n; if (get_offset(parser, &n, ref) || n > max) return M_ERROR; switch (max) { case MAX26: ins->J_data.target = n; break; case MAX16: ins->B_data.offset = n; break; } return M_SUCCESS; } static int parse_instruction_j(struct parser *parser, struct mips_instruction *ins, struct reference *ref) { uint32_t n; if (get_target(parser, &n, ref) || n > MAX26) return M_ERROR; ins->J_data.target = n; return M_SUCCESS; } static int parse_instruction_jr(struct parser *parser, struct mips_instruction *ins, struct reference *ref) { uint32_t n; if (get_target(parser, &n, ref) || n > MAX26) return M_ERROR; ins->J_data.target = n; return M_SUCCESS; } static int parse_instruction_branch_equal(struct parser *parser, struct mips_instruction *ins) { enum mips_register reg; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rs = reg; return M_SUCCESS; } static int parse_instruction_branch(struct parser *parser, struct mips_instruction *ins, struct reference *ref) { enum mips_register reg; uint32_t n; if (parse_register(parser, ®)) return M_ERROR; ins->B_data.rs = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (get_offset(parser, &n, ref) || n > MAX16) return M_ERROR; ins->B_data.offset = n; return M_SUCCESS; } static int parse_instruction_sl(struct parser *parser, struct mips_instruction *ins, struct reference *ref) { enum mips_register reg; uint32_t offset = 0; struct token token; if (parse_register(parser, ®)) return M_ERROR; ins->I_data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (peek_token(parser, &token)) return M_ERROR; if (token.type != TOK_LPAREN) if (get_offset(parser, &offset, ref)) return M_ERROR; ins->I_data.immd = offset; if (peek_token(parser, &token)) return M_ERROR; if (token.type == TOK_NL) { ins->I_data.rs = MIPS_REG_ZERO; return M_SUCCESS; } if (assert_token(parser, TOK_LPAREN, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->I_data.rs = reg; if (assert_token(parser, TOK_RPAREN, NULL)) return M_ERROR; return M_SUCCESS; } static int parse_instruction_sli(struct parser *parser, struct mips_instruction *ins) { enum mips_register reg; struct token token; if (parse_register(parser, ®)) return M_ERROR; ins->I_data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX16) return M_ERROR; ins->I_data.immd = token.number; return M_SUCCESS; } static int parse_instruction_s(struct parser *parser, struct mips_instruction *ins) { enum mips_register reg; struct token token; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rd = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX5) return M_ERROR; ins->R_data.shamt = token.number; return M_SUCCESS; } static int parse_instruction_sv(struct parser *parser, struct mips_instruction *ins) { enum mips_register reg; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rd = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; ins->R_data.rs = reg; return M_SUCCESS; } static int parse_instruction(struct parser *parser, struct ins_expr *expr, struct token ident) { struct mips_instruction instruction; enum mips_parse_format format; int res = M_SUCCESS; if (get_instruction(ident.text, &instruction)) { ERROR_POS(ident, "unknown instruction '%s'", ident.text); return M_ERROR; } struct mips_instruction *ins = &expr->ins[0]; struct reference *ref = &expr->ref[0]; // this will only ever generate one instruction expr->ins_len = 1; *ins = instruction; ref->type = REF_NONE; format = mips_parse_formats[instruction.type]; switch (format) { case MIPS_PARSE_R: res = parse_instruction_r(parser, ins); break; case MIPS_PARSE_R2: res = parse_instruction_r2(parser, ins); break; case MIPS_PARSE_RS: res = parse_instruction_rs(parser, ins); break; case MIPS_PARSE_RD: res = parse_instruction_rd(parser, ins); break; case MIPS_PARSE_I: res = parse_instruction_i(parser, ins); break; case MIPS_PARSE_J: res = parse_instruction_j(parser, ins, ref); break; case MIPS_PARSE_JR: res = parse_instruction_jr(parser, ins, ref); break; case MIPS_PARSE_O16: res = parse_instruction_offset(parser, MAX16, ins, ref); break; case MIPS_PARSE_O26: res = parse_instruction_offset(parser, MAX26, ins, ref); break; case MIPS_PARSE_BE: res = parse_instruction_branch_equal(parser, ins); break; case MIPS_PARSE_BZ: res = parse_instruction_branch(parser, ins, ref); break; case MIPS_PARSE_SL: res = parse_instruction_sl(parser, ins, ref); break; case MIPS_PARSE_SLI: res = parse_instruction_sli(parser, ins); break; case MIPS_PARSE_S: res = parse_instruction_s(parser, ins); break; case MIPS_PARSE_SV: res = parse_instruction_sv(parser, ins); break; case MIPS_PARSE_NONE: res = M_SUCCESS; break; } if (res == M_SUCCESS && assert_eol(parser)) return M_ERROR; return res; } static int parse_directive_align(struct parser *parser, struct mips_directive *directive) { struct token token; if (assert_token(parser, TOK_NUMBER, &token)) return M_ERROR; if (token.number < 0) { ERROR_POS(token, "cannot align negative"); return M_ERROR; } if (token.number > MAX16) { ERROR_POS(token, "cannot align more than 65kb"); return M_ERROR; } directive->type = MIPS_DIRECTIVE_ALIGN; directive->align = token.number; return M_SUCCESS; } static int parse_directive_space(struct parser *parser, struct mips_directive *directive) { struct token token; if (assert_token(parser, TOK_NUMBER, &token)) return M_ERROR; if (token.number < 0) { ERROR_POS(token, "cannot reserve negative"); return M_ERROR; } if (token.number > MAX16) { ERROR_POS(token, "cannot reserve more than 65kb"); return M_ERROR; } directive->type = MIPS_DIRECTIVE_SPACE; directive->space = token.number; return M_SUCCESS; } static int parse_directive_whb(struct parser *parser, struct mips_directive *directive, enum mips_directive_type type) { struct token token; uint32_t size = 0; uint32_t len = 0; switch (type) { case MIPS_DIRECTIVE_WORD: size = UINT32_MAX; break; case MIPS_DIRECTIVE_HALF: size = UINT16_MAX; break; case MIPS_DIRECTIVE_BYTE: size = UINT8_MAX; break; default: } directive->type = type; while (1) { if (assert_token(parser, TOK_NUMBER, &token)) return M_ERROR; if (len >= MAX_ARG_LENGTH) { ERROR_POS(token, "directives cannot be longer than " "%d arguments", MAX_ARG_LENGTH); return M_ERROR; } if (token.number > size) { ERROR_POS(token, "number cannot execede max size of: " "%d", size); return M_ERROR; } switch (type) { case MIPS_DIRECTIVE_WORD: directive->words[len++] = token.number; break; case MIPS_DIRECTIVE_HALF: directive->halfs[len++] = token.number; break; case MIPS_DIRECTIVE_BYTE: directive->bytes[len++] = token.number; break; default: } if (peek_token(parser, &token)) return M_ERROR; if (token.type == TOK_COMMA) { next_token(parser, NULL); continue; } break; } directive->len = len; return M_SUCCESS; } static int parse_directive_extern(struct parser *parser, struct mips_directive *directive) { struct token token; if (assert_token(parser, TOK_IDENT, &token)) return M_ERROR; directive->type = MIPS_DIRECTIVE_EXTERN; strcpy(directive->name, token.text); return M_SUCCESS; } static int parse_directive_globl(struct parser *parser, struct mips_directive *directive) { struct token token; if (assert_token(parser, TOK_IDENT, &token)) return M_ERROR; directive->type = MIPS_DIRECTIVE_GLOBL; strcpy(directive->name, token.text); return M_SUCCESS; } static int parse_directive_ascii(struct parser *parser, struct mips_directive *directive, enum mips_directive_type type) { struct token token; if (assert_token(parser, TOK_STRING, &token)) return M_ERROR; directive->type = type; strcpy(directive->name, token.text); return M_SUCCESS; } static int parse_section(struct mips_directive *directive, char name[MAX_LEX_LENGTH]) { directive->type = MIPS_DIRECTIVE_SECTION; strcpy(directive->name, name); return M_SUCCESS; } static int parse_directive(struct parser *parser, struct mips_directive *directive) { struct token token; if (assert_token(parser, TOK_DIRECTIVE, &token)) return M_ERROR; // .align n if (strcmp(token.text, "align") == 0) return parse_directive_align(parser, directive); else if (strcmp(token.text, "space") == 0) return parse_directive_space(parser, directive); else if (strcmp(token.text, "word") == 0) return parse_directive_whb(parser, directive, MIPS_DIRECTIVE_WORD); else if (strcmp(token.text, "half") == 0) return parse_directive_whb(parser, directive, MIPS_DIRECTIVE_HALF); else if (strcmp(token.text, "byte") == 0) return parse_directive_whb(parser, directive, MIPS_DIRECTIVE_BYTE); else if (strcmp(token.text, "extern") == 0) return parse_directive_extern(parser, directive); else if (strcmp(token.text, "globl") == 0) return parse_directive_globl(parser, directive); else if (strcmp(token.text, "ascii") == 0) return parse_directive_ascii(parser, directive, MIPS_DIRECTIVE_ASCII); else if (strcmp(token.text, "asciiz") == 0) return parse_directive_ascii(parser, directive, MIPS_DIRECTIVE_ASCIIZ); else return parse_section(directive, token.text); } static int parse_constant(struct parser *parser, struct const_expr *expr, struct token ident) { struct token number; if (assert_token(parser, TOK_EQUAL, NULL)) return M_ERROR; if (assert_token(parser, TOK_NUMBER, &number)) return M_ERROR; strcpy(expr->name,ident.text); expr->value = number.number; return M_SUCCESS; } static int parser_handle_ident(struct parser *parser, struct expr *expr) { struct token ident; struct token peek; if (assert_token(parser, TOK_IDENT, &ident)) return M_ERROR; if (peek_token(parser, &peek)) return M_ERROR; if (peek.type == TOK_EQUAL) { expr->type = EXPR_CONSTANT; return parse_constant(parser, &expr->constant, ident); } else { expr->type = EXPR_INS; return parse_instruction(parser, &expr->ins, ident); } } static int parse_label(struct parser *parser, struct expr *expr) { struct token token; if (assert_token(parser, TOK_LABEL, &token)) return M_ERROR; strcpy(expr->label, token.text); return M_SUCCESS; } int parser_next(struct parser *parser, struct expr *expr) { struct token token; int res = M_SUCCESS; again: if (peek_token(parser, &token)) return M_ERROR; switch (token.type) { case TOK_NL: next_token(parser, NULL); goto again; case TOK_EOF: res = M_EOF; break; case TOK_LABEL: expr->type = EXPR_LABEL; res = parse_label(parser, expr); break; case TOK_DIRECTIVE: expr->type = EXPR_DIRECTIVE; res = parse_directive(parser, &expr->directive); break; case TOK_IDENT: res = parser_handle_ident(parser, expr); break; default: ERROR_POS(token, "unexpected token '%s'", token_str(token.type)); return M_ERROR; } return res; } int parser_init(struct lexer *lexer, struct parser *parser) { parser->lexer = lexer; parser->peek.type = TOK_EOF; return M_SUCCESS; } void parser_free(struct parser *parser) { (void) parser; }