diff options
Diffstat (limited to '')
-rw-r--r-- | masm/parse.c | 872 |
1 files changed, 858 insertions, 14 deletions
diff --git a/masm/parse.c b/masm/parse.c index 452045b..ccabf41 100644 --- a/masm/parse.c +++ b/masm/parse.c @@ -6,7 +6,7 @@ #include "parse.h" #include "lex.h" -int next_token(struct parser *parser, struct token *tok) +static int next_token(struct parser *parser, struct token *tok) { if (parser->peek.type != TOK_EOF) { if (tok != NULL) @@ -23,7 +23,7 @@ int next_token(struct parser *parser, struct token *tok) } -int peek_token(struct parser *parser, struct token *tok) +static int peek_token(struct parser *parser, struct token *tok) { if (parser->peek.type == TOK_EOF) { if (next_token(parser, &parser->peek)) @@ -35,7 +35,7 @@ int peek_token(struct parser *parser, struct token *tok) } -int assert_token(struct parser *parser, enum token_type type, +static int assert_token(struct parser *parser, enum token_type type, struct token *tok) { struct token token; @@ -51,7 +51,7 @@ int assert_token(struct parser *parser, enum token_type type, return M_SUCCESS; } -int assert_eol(struct parser *parser) +static int assert_eol(struct parser *parser) { struct token token; if (next_token(parser, &token)) @@ -63,6 +63,856 @@ int assert_eol(struct parser *parser) return M_SUCCESS; } +/* each instruction has a given parse format + * internal to the parser */ +enum mips_parse_format { + // register type: rs, rt, td + MIPS_PARSE_R, + // register type: rs, rt + MIPS_PARSE_R2, + // register type: rd + MIPS_PARSE_RD, + // register type: rs + MIPS_PARSE_RS, + // imeediate type: rs, rt, immd + MIPS_PARSE_I, + // jump type: offset + MIPS_PARSE_J, + // jump type: register + MIPS_PARSE_JR, + // offset 16b type: offset + MIPS_PARSE_O16, + // offset 26b type: offset + MIPS_PARSE_O26, + // breanch equal type: rs, rt, offset + MIPS_PARSE_BE, + // branch zero type: rs, offset + MIPS_PARSE_BZ, + // store and load: rt, offset(base) + MIPS_PARSE_SL, + // store and load immediate: rt, immediate + MIPS_PARSE_SLI, + // shift: rd, rt, sa + MIPS_PARSE_S, + // shift variable: rd, rt, rs + MIPS_PARSE_SV, + // none: + MIPS_PARSE_NONE, +}; + +#define FORMAT(ins, format) \ + [MIPS_INS_##ins] = MIPS_PARSE_##format, \ + +const enum mips_parse_format mips_parse_formats[] = { + FORMAT(ADD, R) + FORMAT(ADDI, I) + FORMAT(ADDIU, I) + FORMAT(ADDU, R) + FORMAT(AND, R) + FORMAT(ANDI, I) + FORMAT(BAL, O16) + FORMAT(BALC, O26) + FORMAT(BC, O26) + FORMAT(BEQ, BE) + FORMAT(BEQL, BE) + FORMAT(BGEZ, BZ) + FORMAT(BGEZAL, BZ) + FORMAT(BGEZALL, BZ) + FORMAT(BGEZL, BZ) + FORMAT(BGTZ, BZ) + FORMAT(BGTZL, BZ) + FORMAT(BLEZ, BZ) + FORMAT(BLEZL, BZ) + FORMAT(BLTZ, BZ) + FORMAT(BLTZAL, BZ) + FORMAT(BLTZALL, BZ) + FORMAT(BLTZL, BZ) + FORMAT(BNE, BE) + FORMAT(BNEL, BE) + FORMAT(DDIV, R2) + FORMAT(DDIVU, R2) + FORMAT(DIV, R2) + FORMAT(DIVU, R2) + FORMAT(J, J) + FORMAT(JAL, J) + FORMAT(JALR, JR) // TODO: handle rd + FORMAT(JALX, J) + FORMAT(JR, JR) + FORMAT(LB, SL) + FORMAT(LBU, SL) + FORMAT(LH, SL) + FORMAT(LHU, SL) + FORMAT(LUI, SLI) + FORMAT(LW, SL) + FORMAT(LWL, SL) + FORMAT(LWR, SL) + FORMAT(MFHI, RD) + FORMAT(MFLO, RD) + FORMAT(MTHI, RS) + FORMAT(MTLO, RS) + FORMAT(MULT, R2) + FORMAT(MULTU, R2) + FORMAT(SB, SL) + FORMAT(SH, SL) + FORMAT(SW, SL) + FORMAT(SWL, SL) + FORMAT(SLL, S) + FORMAT(SLLV, SV) + FORMAT(SLT, R) + FORMAT(SLTI, I) + FORMAT(SLTIU, I) + FORMAT(SLTU, R) + FORMAT(SRA, S) + FORMAT(SRAV, SV) + FORMAT(SRL, S) + FORMAT(SRLV, SV) + FORMAT(SYSCALL, NONE) + FORMAT(OR, R) + FORMAT(ORI, I) + FORMAT(NOR, R) + FORMAT(SUB, R) + FORMAT(SUBU, R) + FORMAT(XOR, R) + FORMAT(XORI, I) +}; + +#undef FORMAT + +#define MAX5 32 +#define MAX16 65536 +#define MAX26 67108864 + +static int get_reference(struct parser *parser, uint32_t *offset, + struct reference *ref, enum reference_type type) +{ + struct token token; + + if (next_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_NUMBER) { + *offset = token.number; + return M_SUCCESS; + } + + if (token.type != TOK_IDENT) { + ERROR_POS(token, "unexpected token of type '%s'", + token_str(token.type)); + return M_ERROR; + } + + strcpy(ref->name, token.text); + ref->type = type; + ref->addend = 0; + + // return zero for now + *offset = 0; + return M_SUCCESS; +} + +static int get_offset(struct parser *parser, uint32_t *offset, + struct reference *ref) +{ + return get_reference(parser, offset, ref, REF_OFFESET); +} + +static int get_target(struct parser *parser, uint32_t *offset, + struct reference *ref) +{ + return get_reference(parser, offset, ref, REF_TARGET); +} + +static int get_instruction(const char *ident, struct mips_instruction *res) +{ + for (int i = 0; i < __MIPS_INS_LEN; i++) { + struct mips_instruction ins = + mips_instructions[i]; + if (strcasecmp(ident, ins.name) == 0) { + if (res != NULL) + *res = ins; + return M_SUCCESS; + } + } + return M_ERROR; +} + +static int parse_register(struct parser *parser, enum mips_register *reg) +{ + struct token token; + if (assert_token(parser, TOK_REG, &token)) + return M_ERROR; + + int len = strlen(token.text); + int c0 = len > 0 ? token.text[0] : '\0', + c1 = len > 1 ? token.text[1] : '\0', + c2 = len > 2 ? token.text[2] : '\0', + c3 = len > 3 ? token.text[3] : '\0'; + + // $zero + if (c0 == 'z') { + if (c1 == 'e' && c2 == 'r' && c3 == 'o') { + *reg = MIPS_REG_ZERO; + return M_SUCCESS; + } + } + + // $a0-a3 $at + else if (c0 == 'a') { + if (c1 == 't') { + *reg = MIPS_REG_AT; + return M_SUCCESS; + } + if (c1 >= '0' && c1 <= '3') { + *reg = MIPS_REG_A0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $v0-v1 + else if (c0 == 'v') { + if (c1 >= '0' && c1 <= '1') { + *reg = MIPS_REG_V0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $t0-t9 + else if (c0 == 't') { + if (c1 >= '0' && c1 <= '7') { + *reg = MIPS_REG_T0; + *reg += c1 - '0'; + return M_SUCCESS; + } + // reg T8-T9 are not in order with T0-T7 + if (c1 >= '8' && c1 <= '9') { + *reg = MIPS_REG_T8; + *reg += c1 - '8'; + return M_SUCCESS; + } + } + + // $s0-s7 $sp + else if (c0 == 's') { + if (c1 >= '0' && c1 <= '7') { + *reg = MIPS_REG_S0; + *reg += c1 - '0'; + return M_SUCCESS; + } + if (c1 == 'p') { + *reg = MIPS_REG_SP; + return M_SUCCESS; + } + } + + // $k0-k1 + else if (c0 == 'k') { + if (c1 >= '0' && c1 <= '1') { + *reg = MIPS_REG_K0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $gp + else if (c0 == 'g') { + if (c1 == 'p') { + *reg = MIPS_REG_GP; + return M_SUCCESS; + } + } + + // $fp + else if (c0 == 'f') { + if (c1 == 'p') { + *reg = MIPS_REG_FP; + return M_SUCCESS; + } + } + + // $rp + else if (c0 == 'r') { + if (c1 == 'p') { + *reg = MIPS_REG_RA; + return M_SUCCESS; + } + } + + // $0-31 (non aliased register names) + else if (c0 >= '0' && c0 <= '9') { + int i = c0 - '0'; + if (c1 >= '0' && c1 <= '9') { + i *= 10; + i += c1 - '0'; + } + if (i <= 31) { + *reg = i; + return M_SUCCESS; + } + } + + ERROR_POS(token, "unknown register $%s", token.text); + return M_ERROR; +} + +static int parse_instruction_r(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rs, rt, rd + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + return M_SUCCESS; +} + +static int parse_instruction_r2(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rs, rt + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + return M_SUCCESS; +} + +static int parse_instruction_rs(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rs + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + return M_SUCCESS; +} + +static int parse_instruction_rd(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rd + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + return M_SUCCESS; +} + +static int parse_instruction_i(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rs, rt, immd + enum mips_register reg; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (token.number >= MAX16) + return M_ERROR; + ins->I_data.immd = token.number; + + return M_SUCCESS; +} + +static int parse_instruction_offset(struct parser *parser, + uint32_t max, + struct mips_instruction *ins, + struct reference *ref) +{ + uint32_t n; + if (get_offset(parser, &n, ref) || n > max) + return M_ERROR; + + switch (max) { + case MAX26: + ins->J_data.target = n; + break; + case MAX16: + ins->B_data.offset = n; + break; + } + + return M_SUCCESS; +} + +static int parse_instruction_j(struct parser *parser, + struct mips_instruction *ins, + struct reference *ref) +{ + uint32_t n; + if (get_target(parser, &n, ref) || n > MAX26) + return M_ERROR; + ins->J_data.target = n; + + return M_SUCCESS; +} + +static int parse_instruction_jr(struct parser *parser, + struct mips_instruction *ins, + struct reference *ref) +{ + uint32_t n; + if (get_target(parser, &n, ref) || n > MAX26) + return M_ERROR; + ins->J_data.target = n; + + return M_SUCCESS; +} + +static int parse_instruction_branch_equal(struct parser *parser, + struct mips_instruction *ins) +{ + enum mips_register reg; + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + return M_SUCCESS; +} + +static int parse_instruction_branch(struct parser *parser, + struct mips_instruction *ins, + struct reference *ref) +{ + enum mips_register reg; + uint32_t n; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->B_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (get_offset(parser, &n, ref) || n > MAX16) + return M_ERROR; + ins->B_data.offset = n; + + return M_SUCCESS; +} + +static int parse_instruction_sl(struct parser *parser, + struct mips_instruction *ins, + struct reference *ref) +{ + enum mips_register reg; + uint32_t offset = 0; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type != TOK_LPAREN) + if (get_offset(parser, &offset, ref)) + return M_ERROR; + ins->I_data.immd = offset; + + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_NL) { + ins->I_data.rs = MIPS_REG_ZERO; + return M_SUCCESS; + } + + if (assert_token(parser, TOK_LPAREN, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rs = reg; + + if (assert_token(parser, TOK_RPAREN, NULL)) + return M_ERROR; + + return M_SUCCESS; +} + +static int parse_instruction_sli(struct parser *parser, + struct mips_instruction *ins) +{ + enum mips_register reg; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX16) + return M_ERROR; + ins->I_data.immd = token.number; + + return M_SUCCESS; +} + +static int parse_instruction_s(struct parser *parser, + struct mips_instruction *ins) +{ + enum mips_register reg; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX5) + return M_ERROR; + ins->R_data.shamt = token.number; + + return M_SUCCESS; +} + +static int parse_instruction_sv(struct parser *parser, + struct mips_instruction *ins) +{ + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + return M_SUCCESS; +} + +static int parse_instruction(struct parser *parser, + struct ins_expr *expr, + struct token ident) +{ + struct mips_instruction instruction; + enum mips_parse_format format; + int res = M_SUCCESS; + + if (get_instruction(ident.text, &instruction)) { + ERROR_POS(ident, "unknown instruction '%s'", ident.text); + return M_ERROR; + } + + struct mips_instruction *ins = &expr->ins[0]; + struct reference *ref = &expr->ref[0]; + + // this will only ever generate one instruction + expr->ins_len = 1; + *ins = instruction; + ref->type = REF_NONE; + + format = mips_parse_formats[instruction.type]; + switch (format) { + case MIPS_PARSE_R: + res = parse_instruction_r(parser, ins); + break; + case MIPS_PARSE_R2: + res = parse_instruction_r2(parser, ins); + break; + case MIPS_PARSE_RS: + res = parse_instruction_rs(parser, ins); + break; + case MIPS_PARSE_RD: + res = parse_instruction_rd(parser, ins); + break; + case MIPS_PARSE_I: + res = parse_instruction_i(parser, ins); + break; + case MIPS_PARSE_J: + res = parse_instruction_j(parser, ins, ref); + break; + case MIPS_PARSE_JR: + res = parse_instruction_jr(parser, ins, ref); + break; + case MIPS_PARSE_O16: + res = parse_instruction_offset(parser, MAX16, ins, ref); + break; + case MIPS_PARSE_O26: + res = parse_instruction_offset(parser, MAX26, ins, ref); + break; + case MIPS_PARSE_BE: + res = parse_instruction_branch_equal(parser, ins); + break; + case MIPS_PARSE_BZ: + res = parse_instruction_branch(parser, ins, ref); + break; + case MIPS_PARSE_SL: + res = parse_instruction_sl(parser, ins, ref); + break; + case MIPS_PARSE_SLI: + res = parse_instruction_sli(parser, ins); + break; + case MIPS_PARSE_S: + res = parse_instruction_s(parser, ins); + break; + case MIPS_PARSE_SV: + res = parse_instruction_sv(parser, ins); + break; + case MIPS_PARSE_NONE: + res = M_SUCCESS; + break; + } + + if (res == M_SUCCESS && assert_eol(parser)) + return M_ERROR; + + return res; +} + + +static int parse_directive_align(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (token.number < 0) { + ERROR_POS(token, "cannot align negative"); + return M_ERROR; + } + + if (token.number > MAX16) { + ERROR_POS(token, "cannot align more than 65kb"); + return M_ERROR; + } + + directive->type = MIPS_DIRECTIVE_ALIGN; + directive->align = token.number; + + return M_SUCCESS; +} + +static int parse_directive_space(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (token.number < 0) { + ERROR_POS(token, "cannot reserve negative"); + return M_ERROR; + } + + if (token.number > MAX16) { + ERROR_POS(token, "cannot reserve more than 65kb"); + return M_ERROR; + } + + directive->type = MIPS_DIRECTIVE_SPACE; + directive->space = token.number; + + return M_SUCCESS; +} + +static int parse_directive_whb(struct parser *parser, + struct mips_directive *directive, + enum mips_directive_type type) +{ + struct token token; + uint32_t size = 0; + uint32_t len = 0; + + switch (type) { + case MIPS_DIRECTIVE_WORD: + size = UINT32_MAX; + break; + case MIPS_DIRECTIVE_HALF: + size = UINT16_MAX; + break; + case MIPS_DIRECTIVE_BYTE: + size = UINT8_MAX; + break; + default: + } + + directive->type = type; + + while (1) { + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (len >= MAX_ARG_LENGTH) { + ERROR_POS(token, "directives cannot be longer than " + "%d arguments", MAX_ARG_LENGTH); + return M_ERROR; + } + + if (token.number > size) { + ERROR_POS(token, "number cannot execede max size of: " + "%d", size); + return M_ERROR; + } + + switch (type) { + case MIPS_DIRECTIVE_WORD: + directive->words[len++] = token.number; + + break; + case MIPS_DIRECTIVE_HALF: + directive->halfs[len++] = token.number; + break; + case MIPS_DIRECTIVE_BYTE: + directive->bytes[len++] = token.number; + break; + default: + } + + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_COMMA) { + next_token(parser, NULL); + continue; + } + + break; + } + + directive->len = len; + + return M_SUCCESS; +} + +static int parse_directive_extern(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_IDENT, &token)) + return M_ERROR; + + directive->type = MIPS_DIRECTIVE_EXTERN; + strcpy(directive->name, token.text); + + return M_SUCCESS; +} + +static int parse_directive_globl(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_IDENT, &token)) + return M_ERROR; + + directive->type = MIPS_DIRECTIVE_GLOBL; + strcpy(directive->name, token.text); + + return M_SUCCESS; +} + +static int parse_section(struct mips_directive *directive, + char name[MAX_LEX_LENGTH]) +{ + directive->type = MIPS_DIRECTIVE_SECTION; + strcpy(directive->name, name); + + return M_SUCCESS; +} + +static int parse_directive(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_DIRECTIVE, &token)) + return M_ERROR; + + // .align n + if (strcmp(token.text, "align") == 0) + return parse_directive_align(parser, directive); + else if (strcmp(token.text, "space") == 0) + return parse_directive_space(parser, directive); + else if (strcmp(token.text, "word") == 0) + return parse_directive_whb(parser, directive, + MIPS_DIRECTIVE_WORD); + else if (strcmp(token.text, "half") == 0) + return parse_directive_whb(parser, directive, + MIPS_DIRECTIVE_HALF); + else if (strcmp(token.text, "byte") == 0) + return parse_directive_whb(parser, directive, + MIPS_DIRECTIVE_BYTE); + else if (strcmp(token.text, "extern") == 0) + return parse_directive_extern(parser, directive); + else if (strcmp(token.text, "globl") == 0) + return parse_directive_globl(parser, directive); + else + return parse_section(directive, token.text); +} + static int parse_constant(struct parser *parser, struct const_expr *expr, struct token ident) { @@ -96,7 +946,7 @@ static int parser_handle_ident(struct parser *parser, struct expr *expr) return parse_constant(parser, &expr->constant, ident); } else { expr->type = EXPR_INS; - return parser->parse_instruction(parser, &expr->ins, ident); + return parse_instruction(parser, &expr->ins, ident); } } @@ -108,7 +958,7 @@ static int parse_label(struct parser *parser, if (assert_token(parser, TOK_LABEL, &token)) return M_ERROR; - strcpy(expr->text, token.text); + strcpy(expr->label, token.text); return M_SUCCESS; } @@ -139,8 +989,7 @@ again: case TOK_DIRECTIVE: expr->type = EXPR_DIRECTIVE; - res = parser->parse_directive(parser, - &expr->directive); + res = parse_directive(parser, &expr->directive); break; case TOK_IDENT: @@ -161,16 +1010,11 @@ int parser_init(struct lexer *lexer, struct parser *parser) { parser->lexer = lexer; parser->peek.type = TOK_EOF; - if (sectbl_init(&parser->sec_tbl)) - return M_ERROR; - if (reftbl_init(&parser->ref_tbl)) - return M_ERROR; return M_SUCCESS; } void parser_free(struct parser *parser) { - sectbl_free(&parser->sec_tbl); - reftbl_free(&parser->ref_tbl); + (void) parser; } |