diff options
author | Freya Murphy <freya@freyacat.org> | 2024-09-13 11:11:18 -0400 |
---|---|---|
committer | Freya Murphy <freya@freyacat.org> | 2024-09-13 11:11:18 -0400 |
commit | df4a225ccf79dd9f5fa3faef4fd68ae87471f0ca (patch) | |
tree | 44629775a57e229024f56d087ab5d1aebed978d3 /masm | |
parent | dont upload those :p (diff) | |
download | mips-df4a225ccf79dd9f5fa3faef4fd68ae87471f0ca.tar.gz mips-df4a225ccf79dd9f5fa3faef4fd68ae87471f0ca.tar.bz2 mips-df4a225ccf79dd9f5fa3faef4fd68ae87471f0ca.zip |
better
Diffstat (limited to '')
-rw-r--r-- | masm/asm.c | 28 | ||||
-rw-r--r-- | masm/lex.c | 32 | ||||
-rw-r--r-- | masm/lex.h | 13 | ||||
-rw-r--r-- | masm/parse.c | 417 | ||||
-rw-r--r-- | masm/parse.h | 12 |
5 files changed, 386 insertions, 116 deletions
@@ -247,24 +247,15 @@ static int handle_ins(struct assembler *assembler, if (sec_push(sec, entry)) return M_ERROR; - unsigned char type = 0; - switch (ref->type) { - case REF_NONE: + if (ref->type == R_MIPS_NONE) continue; - case REF_OFFESET: - type = R_MIPS_PC16; - break; - case REF_TARGET: - type = R_MIPS_26; - break; - } size_t symidx; if (find_symbol_or_stub(assembler, ref->name, NULL, &symidx)) return M_ERROR; Elf32_Rela rel = { - .r_info = htonl(ELF32_R_INFO(symidx, type)), + .r_info = htonl(ELF32_R_INFO(symidx, ref->type)), .r_addend = htonl(ref->addend), .r_offset = htonl(sec_index(sec, secidx + i)), }; @@ -543,7 +534,7 @@ static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr) } // sections - size_t v_addr = 0x00400000; + size_t v_addr = 0; for (uint32_t i = 0; i < assembler->sectab.len; i++) { struct section *sec = &assembler->sectab.sections[i]; uint32_t idx = sec->shdr_idx; @@ -631,11 +622,14 @@ static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr, for (uint32_t j = 0; j < sec->len; j++) { struct section_entry *entry = &sec->entries[j]; size_t size = entry->size; - fwrite(&entry->data, size, 1, out); - while(size % sec->alignment) { - uint8_t zero = 0; - fwrite(&zero, 1, 1, out); - size++; + size_t zeros = size % sec->alignment;; + if (entry->type != ENT_NO_DATA) + fwrite(&entry->data, size, 1, out); + else + zeros += size; + while(zeros) { + fputc(0, out); + zeros--; } } } @@ -149,6 +149,14 @@ static int lex_number(struct lexer *lexer, int64_t *n) { int64_t number = 0; int base = 10; + int neg = 0; + + // check if negative + if (lex_peek(lexer) == '-') { + lex_next(lexer); + neg = 1; + } + // skip all leading zeros, they dont do anything. // this also allows us to directly check for 0b, 0o, and 0x @@ -200,6 +208,9 @@ static int lex_number(struct lexer *lexer, int64_t *n) number += n; } + if (neg) + number = -number; + *n = number; return M_SUCCESS; } @@ -226,7 +237,8 @@ again: // use label to avoid whitespace recursion case ';': case '#': skip_comment(lexer); - goto again; + token->type = TOK_NL; + break; case ' ': case '\t': // skip white space @@ -267,6 +279,7 @@ again: // use label to avoid whitespace recursion lex_next(lexer); res = lex_string(lexer, token->text); break; + case '-': case '0': case '1': case '2': @@ -341,3 +354,20 @@ char *token_str(enum token_type type) } return "unknown"; } + +void lexer_save(struct lexer *lexer, struct lexer_state *state) +{ + state->x = lexer->x; + state->y = lexer->y; + state->peek = lexer->peek; + state->offset = ftell(lexer->file); +} + +/* load a different state into a lexer */ +void lexer_load(struct lexer *lexer, const struct lexer_state *state) +{ + lexer->x = state->x; + lexer->y = state->y; + lexer->peek = state->peek; + fseek(lexer->file, state->offset, SEEK_SET); +} @@ -14,6 +14,13 @@ struct lexer { int y; }; +struct lexer_state { + long offset; + int peek; + int x; + int y; +}; + enum token_type { TOK_IDENT, TOK_REG, @@ -52,4 +59,10 @@ int lexer_next(struct lexer *lexer, struct token *token); /* token type to string */ char *token_str(enum token_type); +/* save the state of a lexer */ +void lexer_save(struct lexer *lexer, struct lexer_state *state); + +/* load a different state into a lexer */ +void lexer_load(struct lexer *lexer, const struct lexer_state *state); + #endif /* __LEX_H__ */ diff --git a/masm/parse.c b/masm/parse.c index d0a1f33..221ef6d 100644 --- a/masm/parse.c +++ b/masm/parse.c @@ -1,10 +1,14 @@ #include <mlimits.h> #include <merror.h> +#include <netinet/in.h> +#include <stdint.h> #include <stdio.h> #include <string.h> +#include <elf.h> #include "parse.h" #include "lex.h" +#include "mips.h" static int next_token(struct parser *parser, struct token *tok) { @@ -78,8 +82,6 @@ enum mips_parse_format { MIPS_PARSE_I, // jump type: offset MIPS_PARSE_J, - // jump type: register - MIPS_PARSE_JR, // offset 16b type: offset MIPS_PARSE_O16, // offset 26b type: offset @@ -129,15 +131,15 @@ const enum mips_parse_format mips_parse_formats[] = { FORMAT(BLTZL, BZ) FORMAT(BNE, BE) FORMAT(BNEL, BE) - FORMAT(DDIV, R2) - FORMAT(DDIVU, R2) - FORMAT(DIV, R2) - FORMAT(DIVU, R2) + FORMAT(DIV, R) + FORMAT(MOD, R) + FORMAT(DIVU, R) + FORMAT(MODU, R) FORMAT(J, J) FORMAT(JAL, J) - FORMAT(JALR, JR) // TODO: handle rd + FORMAT(JALR, RS) // TODO: handle rd FORMAT(JALX, J) - FORMAT(JR, JR) + FORMAT(JR, RS) FORMAT(LB, SL) FORMAT(LBU, SL) FORMAT(LH, SL) @@ -150,8 +152,11 @@ const enum mips_parse_format mips_parse_formats[] = { FORMAT(MFLO, RD) FORMAT(MTHI, RS) FORMAT(MTLO, RS) + FORMAT(MUL, R) + FORMAT(MUH, R) + FORMAT(MULU, R) + FORMAT(MUHU, R) FORMAT(MULT, R2) - FORMAT(MULTU, R2) FORMAT(SB, SL) FORMAT(SH, SL) FORMAT(SW, SL) @@ -178,12 +183,13 @@ const enum mips_parse_format mips_parse_formats[] = { #undef FORMAT -#define MAX5 32 -#define MAX16 65536 -#define MAX26 67108864 +#define MAX5 (1 << 5) +#define MAX16 (1 << 16) +#define MAX26 (1 << 25) +#define MAX32 (1 << 31) -static int get_reference(struct parser *parser, uint32_t *offset, - struct reference *ref, enum reference_type type) +static int get_reference(struct parser *parser, uint64_t *offset, + struct reference *ref, unsigned char type) { struct token token; @@ -191,6 +197,7 @@ static int get_reference(struct parser *parser, uint32_t *offset, return M_ERROR; if (token.type == TOK_NUMBER) { + *offset = token.number; return M_SUCCESS; } @@ -210,16 +217,64 @@ static int get_reference(struct parser *parser, uint32_t *offset, return M_SUCCESS; } -static int get_offset(struct parser *parser, uint32_t *offset, +static int get_offset(struct parser *parser, int32_t *offset, + struct reference *ref) +{ + uint64_t off; + if (get_reference(parser, &off, ref, R_MIPS_PC16)) + return M_ERROR; + + if (off % 4) { + ERROR_POS((*parser->lexer), "cannot use offset of '%ld', must " + "be divisble by four", off); + return M_ERROR; + } + + if (off > MAX16) { + ERROR("offset '%d' cannot be larger than 16 bits", off); + return M_ERROR; + } + + *offset = off; + return M_SUCCESS; +} + +static int get_offset_26(struct parser *parser, int32_t *offset, struct reference *ref) { - return get_reference(parser, offset, ref, REF_OFFESET); + uint64_t off; + if (get_reference(parser, &off, ref, R_MIPS_PC26_S2)) + return M_ERROR; + + if (off % 4) { + ERROR_POS((*parser->lexer), "cannot use offset of '%ld', must " + "be divisble by four", off); + return M_ERROR; + } + + if (off > MAX26) { + ERROR("offset '%d' cannot be larger than 26 bits", off); + return M_ERROR; + } + + *offset = off; + return M_SUCCESS; } static int get_target(struct parser *parser, uint32_t *offset, struct reference *ref) { - return get_reference(parser, offset, ref, REF_TARGET); + uint64_t off; + if (get_reference(parser, &off, ref, R_MIPS_26)) + return M_ERROR; + + if (off > MAX26) { + ERROR("target '%d' cannot be larger than 26 bits", off); + return M_ERROR; + } + + *offset = off; + return M_SUCCESS; } static int get_instruction(const char *ident, struct mips_instruction *res) @@ -333,7 +388,7 @@ static int parse_register(struct parser *parser, enum mips_register *reg) // $rp else if (c0 == 'r') { - if (c1 == 'p') { + if (c1 == 'a') { *reg = MIPS_REG_RA; return M_SUCCESS; } @@ -356,6 +411,77 @@ static int parse_register(struct parser *parser, enum mips_register *reg) return M_ERROR; } +static int get_reg_offset(struct parser *parser, struct mips_instruction *ins, + struct reference *ref) +{ + struct token token; + enum mips_register reg; + +// ============================================= + + // defaults + ins->data.rs = MIPS_REG_ZERO; + ins->data.immd = 0; + + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_LPAREN) + goto reg; + else + goto off; + +// ============================================= + +off: + + uint64_t immd; + if (get_reference(parser, &immd, ref, R_MIPS_16)) + return M_ERROR; + ins->data.offset = htons(immd); + + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_LPAREN) + goto reg; + else + goto end; + +// ============================================= + +reg: + if (assert_token(parser, TOK_LPAREN, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->data.rs = reg; + + if (assert_token(parser, TOK_RPAREN, NULL)) + return M_ERROR; + +// ============================================= +end: + if (peek_token(parser, &token)) + return M_ERROR; + + return M_SUCCESS; +} + +static int parse_number(struct parser *parser, uint32_t *n, uint32_t max) +{ + struct token token; + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + if (max && token.number > max) { + ERROR_POS(token, "number cannot be larger than '%d'", max); + return M_ERROR; + } + *n = token.number; + return M_SUCCESS; +} + static int parse_instruction_r(struct parser *parser, struct mips_instruction *ins) { @@ -364,21 +490,21 @@ static int parse_instruction_r(struct parser *parser, if (parse_register(parser, ®)) return M_ERROR; - ins->data.R.rd = reg; + ins->data.rd = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; - ins->data.R.rs = reg; + ins->data.rs = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; - ins->data.R.rt = reg; + ins->data.rt = reg; return M_SUCCESS; } @@ -391,14 +517,14 @@ static int parse_instruction_r2(struct parser *parser, if (parse_register(parser, ®)) return M_ERROR; - ins->data.R.rs = reg; + ins->data.rs = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; - ins->data.R.rt = reg; + ins->data.rt = reg; return M_SUCCESS; } @@ -411,7 +537,7 @@ static int parse_instruction_rs(struct parser *parser, if (parse_register(parser, ®)) return M_ERROR; - ins->data.R.rs = reg; + ins->data.rs = reg; return M_SUCCESS; } @@ -424,7 +550,7 @@ static int parse_instruction_rd(struct parser *parser, if (parse_register(parser, ®)) return M_ERROR; - ins->data.R.rd = reg; + ins->data.rd = reg; return M_SUCCESS; } @@ -438,14 +564,14 @@ static int parse_instruction_i(struct parser *parser, if (parse_register(parser, ®)) return M_ERROR; - ins->data.I.rt = reg; + ins->data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; - ins->data.I.rs = reg; + ins->data.rs = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; @@ -455,7 +581,7 @@ static int parse_instruction_i(struct parser *parser, if (token.number >= MAX16) return M_ERROR; - ins->data.I.immd = token.number; + ins->data.immd = htons(token.number); return M_SUCCESS; } @@ -465,17 +591,21 @@ static int parse_instruction_offset(struct parser *parser, struct mips_instruction *ins, struct reference *ref) { - uint32_t n; - if (get_offset(parser, &n, ref) || n > max) - return M_ERROR; + int32_t n; switch (max) { case MAX26: - ins->data.J.target = n; + if (get_offset_26(parser, &n, ref)) + return M_ERROR; + ins->data.offs26 = htonl(n); break; case MAX16: - ins->data.B.offset = n; + if (get_offset(parser, &n, ref)) + return M_ERROR; + ins->data.offset = htons(n); break; + default: + return M_ERROR; } return M_SUCCESS; @@ -488,30 +618,34 @@ static int parse_instruction_j(struct parser *parser, uint32_t n; if (get_target(parser, &n, ref) || n > MAX26) return M_ERROR; - ins->data.J.target = n; + ins->data.target = n; return M_SUCCESS; } -static int parse_instruction_jr(struct parser *parser, - struct mips_instruction *ins, - struct reference *ref) +static int parse_instruction_branch_equal(struct parser *parser, + struct mips_instruction *ins, + struct reference *ref) { - uint32_t n; - if (get_target(parser, &n, ref) || n > MAX26) + enum mips_register reg; + if (parse_register(parser, ®)) return M_ERROR; - ins->data.J.target = n; + ins->data.rs = reg; - return M_SUCCESS; -} + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; -static int parse_instruction_branch_equal(struct parser *parser, - struct mips_instruction *ins) -{ - enum mips_register reg; if (parse_register(parser, ®)) return M_ERROR; - ins->data.R.rs = reg; + ins->data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + int32_t off; + if (get_offset(parser, &off, ref)) + return M_ERROR; + ins->data.offset = htons(off); return M_SUCCESS; } @@ -521,18 +655,18 @@ static int parse_instruction_branch(struct parser *parser, struct reference *ref) { enum mips_register reg; - uint32_t n; + int32_t n; if (parse_register(parser, ®)) return M_ERROR; - ins->data.B.rs = reg; + ins->data.rs = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; - if (get_offset(parser, &n, ref) || n > MAX16) + if (get_offset(parser, &n, ref)) return M_ERROR; - ins->data.B.offset = n; + ins->data.offset = htons(n); return M_SUCCESS; } @@ -542,40 +676,15 @@ static int parse_instruction_sl(struct parser *parser, struct reference *ref) { enum mips_register reg; - uint32_t offset = 0; - struct token token; if (parse_register(parser, ®)) return M_ERROR; - ins->data.I.rt = reg; + ins->data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; - if (peek_token(parser, &token)) - return M_ERROR; - - if (token.type != TOK_LPAREN) - if (get_offset(parser, &offset, ref)) - return M_ERROR; - ins->data.I.immd = offset; - - if (peek_token(parser, &token)) - return M_ERROR; - - if (token.type == TOK_NL) { - ins->data.I.rs = MIPS_REG_ZERO; - return M_SUCCESS; - } - - if (assert_token(parser, TOK_LPAREN, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.I.rs = reg; - - if (assert_token(parser, TOK_RPAREN, NULL)) + if (get_reg_offset(parser, ins, ref)) return M_ERROR; return M_SUCCESS; @@ -589,14 +698,14 @@ static int parse_instruction_sli(struct parser *parser, if (parse_register(parser, ®)) return M_ERROR; - ins->data.I.rt = reg; + ins->data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX16) return M_ERROR; - ins->data.I.immd = token.number; + ins->data.immd = htons(token.number); return M_SUCCESS; } @@ -609,21 +718,21 @@ static int parse_instruction_s(struct parser *parser, if (parse_register(parser, ®)) return M_ERROR; - ins->data.R.rd = reg; + ins->data.rd = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; - ins->data.R.rt = reg; + ins->data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX5) return M_ERROR; - ins->data.R.shamt = token.number; + ins->data.shamt = token.number; return M_SUCCESS; } @@ -635,25 +744,151 @@ static int parse_instruction_sv(struct parser *parser, if (parse_register(parser, ®)) return M_ERROR; - ins->data.R.rd = reg; + ins->data.rd = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; - ins->data.R.rt = reg; + ins->data.rt = reg; if (assert_token(parser, TOK_COMMA, NULL)) return M_ERROR; if (parse_register(parser, ®)) return M_ERROR; - ins->data.R.rs = reg; + ins->data.rs = reg; return M_SUCCESS; } +static int parse_pseudo_li(struct parser *parser, struct ins_expr *expr) +{ + enum mips_register reg; + uint32_t immd; + + if (parse_register(parser, ®)) + return M_ERROR; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_number(parser, &immd, MAX16)) + return M_ERROR; + + expr->ins_len = 1; + expr->ins[0] = mips_instructions[MIPS_INS_ORI]; + expr->ins[0].data.rt = reg; + expr->ins[0].data.rs = MIPS_REG_ZERO; + expr->ins[0].data.immd = htons(immd); + expr->ref[0].type = R_MIPS_NONE; + + return M_SUCCESS; +} + +static int parse_pseudo_la(struct parser *parser, struct ins_expr *expr) +{ + enum mips_register reg; + struct token token; + + uint16_t hi = 0, lo = 0; + + if (parse_register(parser, ®)) + return M_ERROR; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (next_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_IDENT) { + expr->ref[0].type = R_MIPS_HI16; + expr->ref[0].addend = 0; + strcpy(expr->ref[0].name, token.text); + expr->ref[1].type = R_MIPS_LO16; + expr->ref[1].addend = 0; + strcpy(expr->ref[1].name, token.text); + } else if (token.type == TOK_NUMBER && token.number > MAX32) { + hi = token.number >> 16; + lo = token.number & 0x0000ffff; + expr->ref[0].type = R_MIPS_NONE; + expr->ref[1].type = R_MIPS_NONE; + } else { + return M_ERROR; + } + + expr->ins_len = 2; + expr->ins[0] = mips_instructions[MIPS_INS_LUI]; + expr->ins[0].data.rt = reg; + expr->ins[0].data.immd = htons(hi); + expr->ins[1] = mips_instructions[MIPS_INS_ORI]; + expr->ins[1].data.rt = reg; + expr->ins[1].data.rs = MIPS_REG_ZERO; + expr->ins[1].data.immd = htons(lo); + + return M_SUCCESS; +} + +static int parse_pseudo_move(struct parser *parser, struct ins_expr *expr) +{ + enum mips_register rd, rs; + + if (parse_register(parser, &rd)) + return M_ERROR; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, &rs)) + return M_ERROR; + + expr->ins_len = 1; + expr->ins[0] = mips_instructions[MIPS_INS_OR]; + expr->ins[0].data.rs = rs; + expr->ins[0].data.rt = MIPS_REG_ZERO; + expr->ins[0].data.rd = rd; + expr->ref[0].type = R_MIPS_NONE; + + return M_SUCCESS; +} + +static int parse_pseudo_instruction(struct parser *parser, + struct ins_expr *expr, + struct token ident) +{ + // disablle logging in the logging + // module + extern int log_disabled; + log_disabled = 1; + + int res = M_ERROR; + + struct lexer_state state; + lexer_save(parser->lexer, &state); + + #define CHK(name) if (strcmp(ident.text, #name) == 0) + + CHK(li) + res = parse_pseudo_li(parser, expr); + else CHK(la) + res = parse_pseudo_la(parser, expr); + else CHK(move) + res = parse_pseudo_move(parser, expr); + + if (res) + lexer_load(parser->lexer, &state); + + expr->ins[0].data.raw = 0; + expr->ins[1].data.raw = 0; + expr->ref[0] = (struct reference) {0}; + expr->ref[1] = (struct reference) {0}; + + log_disabled = 0; + return res; +} + static int parse_instruction(struct parser *parser, struct ins_expr *expr, struct token ident) @@ -662,6 +897,9 @@ static int parse_instruction(struct parser *parser, enum mips_parse_format format; int res = M_SUCCESS; + if (parse_pseudo_instruction(parser, expr, ident) == M_SUCCESS) + return M_SUCCESS; + if (get_instruction(ident.text, &instruction)) { ERROR_POS(ident, "unknown instruction '%s'", ident.text); return M_ERROR; @@ -673,7 +911,7 @@ static int parse_instruction(struct parser *parser, // this will only ever generate one instruction expr->ins_len = 1; *ins = instruction; - ref->type = REF_NONE; + ref->type = R_MIPS_NONE; format = mips_parse_formats[instruction.type]; switch (format) { @@ -695,9 +933,6 @@ static int parse_instruction(struct parser *parser, case MIPS_PARSE_J: res = parse_instruction_j(parser, ins, ref); break; - case MIPS_PARSE_JR: - res = parse_instruction_jr(parser, ins, ref); - break; case MIPS_PARSE_O16: res = parse_instruction_offset(parser, MAX16, ins, ref); break; @@ -705,7 +940,7 @@ static int parse_instruction(struct parser *parser, res = parse_instruction_offset(parser, MAX26, ins, ref); break; case MIPS_PARSE_BE: - res = parse_instruction_branch_equal(parser, ins); + res = parse_instruction_branch_equal(parser, ins, ref); break; case MIPS_PARSE_BZ: res = parse_instruction_branch(parser, ins, ref); diff --git a/masm/parse.h b/masm/parse.h index 3052d51..9e0e928 100644 --- a/masm/parse.h +++ b/masm/parse.h @@ -37,14 +37,9 @@ struct mips_directive { }; }; -enum reference_type { - REF_NONE, - REF_OFFESET, - REF_TARGET, -}; - struct reference { - enum reference_type type; + // ELF relocate type + unsigned char type; /// symbol name char name[MAX_LEX_LENGTH]; @@ -91,7 +86,10 @@ struct expr { }; struct parser { + // the lexer + // *weak* ponter, we do not own this struct lexer *lexer; + // the last token peeked struct token peek; }; |