diff options
Diffstat (limited to '')
-rw-r--r-- | masm/gen.c | 812 |
1 files changed, 812 insertions, 0 deletions
diff --git a/masm/gen.c b/masm/gen.c new file mode 100644 index 0000000..13d2848 --- /dev/null +++ b/masm/gen.c @@ -0,0 +1,812 @@ +#include <stdlib.h> +#include <merror.h> +#include <melf.h> +#include <mips32.h> +#include <mips32r6.h> + +#include "tab.h" +#include "gen.h" +#include "parse.h" + +/// +/// section table +/// + +static void section_get_default_perm(struct section *sec, const char *name) +{ + #define __LEN 7 + static const struct perms { + char *name; + bool read; + bool write; + bool execute; + int alignment; + } defaults[__LEN] = { + {".text", true, false, true, 4}, + {".code", true, false, true, 4}, + {".data", true, true, false, 1}, + {".stack", true, true, false, 1}, + {".rodata", true, false, false, 1}, + {".bss", true, true, false, 1}, + {".robss", true, false, false, 1}, + }; + + for (int i = 0; i < __LEN; i++) { + const struct perms *p = &defaults[i]; + if (strcasecmp(name, p->name) != 0) + continue; + sec->read = p->read; + sec->write = p->write; + sec->execute = p->execute; + sec->align = p->alignment; + break; + } + +} + +static int section_get(struct generator *gen, struct section **res, + const struct string *const name) +{ + /// find the section if it exists + for (size_t i = 0; i < gen->sections_len; i++) { + struct section *sec = &gen->sections[i]; + if (sec->name.len != name->len) + continue; + if (strcmp(sec->name.str, name->str) != 0) + continue; + *res = sec; + return M_SUCCESS; + } + + /// allocate a new one if it doesnt + size_t size = gen->sections_size ? gen->sections_size * 2 : 8; + void *new = realloc(gen->sections, size * sizeof(struct section)); + if (new == NULL) { + PERROR("cannot realloc"); + return M_ERROR; + } + + gen->sections_size = size; + gen->sections = new; + + struct section *sec = &gen->sections[gen->sections_len++]; + + // alloc reftab + if (reftab_init(&sec->reftab)) + return M_ERROR; + + // copy name + if (string_clone(&sec->name, name)) + return M_ERROR; + + // set defaults + sec->len = 0; + sec->size = 0; + sec->align = 1; + sec->data = NULL; + sec->read = true; + sec->write = true; + sec->execute = false; + section_get_default_perm(sec, name->str); + + *res = sec; + return M_SUCCESS; +} + +static int section_extend(struct section *section, size_t space) +{ + size_t newlen = section->len + space; + if (newlen < section->size) + return M_SUCCESS; + + size_t size = section->size ? section->size * 2 + newlen : newlen * 2; + void *new = realloc(section->data, size); + if (new == NULL) { + PERROR("cannot realloc"); + return M_ERROR; + } + section->size = size; + section->data = new; + + return M_SUCCESS; +} + +static int section_push(struct section *section, void *data, size_t len) +{ + size_t newlen = section->len + len; + size_t zeros = newlen % section->align; + if (zeros) + zeros = section->align - zeros; + + if (section_extend(section, len + zeros)) + return M_ERROR; + + memset(section->data + section->len, 0, zeros); + memcpy(section->data + section->len + zeros, data, len); + section->len += len + zeros; + + return M_SUCCESS; +} + +static int section_zero(struct section *section, size_t len) +{ + size_t zeros = section->len % section->align; + if (zeros) + zeros = section->align - zeros; + + if (section_extend(section, len + zeros)) + return M_ERROR; + + memset(section->data + section->len, 0, len + zeros); + section->len += len + zeros; + + return M_SUCCESS; +} + +void section_free(struct section *section) +{ + reftab_free(§ion->reftab); + string_free(§ion->name); + free(section->data); +} + +/// +/// generation functions +/// + +static void print_curr_line(struct generator *gen, + const struct expr *const expr) +{ + int line = expr->line_no, + len = expr->byte_end - expr->byte_start, + nl = true, + c = EOF; + FILE *file = gen->parser.lexer.file; + + fseek(file, expr->byte_start, SEEK_SET); + + while (len--) { + c = getc(file); + if (c == EOF || c == '\0') + break; + if (nl) { + fprintf(stderr, "\t%d | ", line); + line++; + nl = false; + } + if (c == '\n') + nl = true; + putc(c, stderr); + } + +} + +static int gen_directive_whb(struct generator *gen, const void *data, + uint32_t count, uint32_t len) +{ + // TODO: endianess + for (uint32_t i = 0; i < count; i++) { + void *ptr = (char *) data + (len * i); + if (section_push(gen->current, ptr, len)) + return M_ERROR; + } + + return M_SUCCESS; +} + +static int gen_directive(struct generator *gen, + const struct expr *const e) +{ + const struct expr_directive *const expr = &e->directive; + int res = M_SUCCESS; + + switch (expr->type) { + case EXPR_DIRECTIVE_ALIGN: + if (expr->align < 1) { + ERROR("alignment cannot be zero"); + print_curr_line(gen, e); + return M_ERROR; + } + gen->current->align = expr->align; + break; + case EXPR_DIRECTIVE_SPACE: + res = section_zero(gen->current, expr->space); + break; + case EXPR_DIRECTIVE_WORD: + res = gen_directive_whb(gen, expr->words, expr->len, + sizeof(uint32_t)); + break; + case EXPR_DIRECTIVE_HALF: + res = gen_directive_whb(gen, expr->halfs, expr->len, + sizeof(uint16_t)); + break; + case EXPR_DIRECTIVE_BYTE: + res = gen_directive_whb(gen, expr->bytes, expr->len, + sizeof(uint8_t)); + break; + case EXPR_DIRECTIVE_SECTION: + res = section_get(gen, &gen->current, &expr->section); + break; + case EXPR_DIRECTIVE_EXTERN: { + struct symbol *sym; + res = symtab_find_or_stub(&gen->symtab, &sym, &expr->label); + if (res == M_SUCCESS) + sym->type = SYM_EXTERN; + break; + } + case EXPR_DIRECTIVE_GLOBL: { + struct symbol *sym; + res = symtab_find_or_stub(&gen->symtab, &sym, &expr->label); + if (res == M_SUCCESS) + sym->type = SYM_GLOBAL; + break; + } + case EXPR_DIRECTIVE_ASCII: + res = section_push(gen->current, expr->string.str, + expr->string.len - 1); + break; + case EXPR_DIRECTIVE_ASCIIZ: + res = section_push(gen->current, expr->string.str, + expr->string.len); + break; + } + + return res; +} + +static int gen_constant(struct generator *gen, struct expr_const *const expr) +{ + (void) gen; + (void) expr; + + ERROR("constants not yet implemented"); + return M_ERROR; +} + +static enum grammer_type get_gmr_type(const char *name, size_t *len) +{ + #define CHK(part, str) { \ + if (strncasecmp(str, name, strlen(str)) == 0) { \ + *len = strlen(str); \ + return GMR_ ##part; \ + }} \ + + CHK(RD, "rd") + CHK(RS, "rs") + CHK(RT, "rt") + CHK(IMMD, "immd") + CHK(OFFSET_BASE, "offset(base)") + CHK(OFFSET, "offset") + CHK(TARGET, "target") + CHK(HI, "hi") + CHK(LO, "lo") + + #undef CHK + + ERROR("!!! BUG: this should never hit !!!"); + exit(1); +} + +static int parse_register(enum mips32_register *reg, struct string *name) +{ + int len = name->len; + int c0 = len > 0 ? name->str[0] : '\0', + c1 = len > 1 ? name->str[1] : '\0', + c2 = len > 2 ? name->str[2] : '\0', + c3 = len > 3 ? name->str[3] : '\0'; + + // $zero + if (c0 == 'z') { + if (c1 == 'e' && c2 == 'r' && c3 == 'o') { + *reg = MIPS32_REG_ZERO; + return M_SUCCESS; + } + } + + // $a0-a3 $at + else if (c0 == 'a') { + if (c1 == 't') { + *reg = MIPS32_REG_AT; + return M_SUCCESS; + } + if (c1 >= '0' && c1 <= '3') { + *reg = MIPS32_REG_A0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $v0-v1 + else if (c0 == 'v') { + if (c1 >= '0' && c1 <= '1') { + *reg = MIPS32_REG_V0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $t0-t9 + else if (c0 == 't') { + if (c1 >= '0' && c1 <= '7') { + *reg = MIPS32_REG_T0; + *reg += c1 - '0'; + return M_SUCCESS; + } + // reg T8-T9 are not in order with T0-T7 + if (c1 >= '8' && c1 <= '9') { + *reg = MIPS32_REG_T8; + *reg += c1 - '8'; + return M_SUCCESS; + } + } + + // $s0-s7 $sp + else if (c0 == 's') { + if (c1 >= '0' && c1 <= '7') { + *reg = MIPS32_REG_S0; + *reg += c1 - '0'; + return M_SUCCESS; + } + if (c1 == 'p') { + *reg = MIPS32_REG_SP; + return M_SUCCESS; + } + } + + // $k0-k1 + else if (c0 == 'k') { + if (c1 >= '0' && c1 <= '1') { + *reg = MIPS32_REG_K0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $gp + else if (c0 == 'g') { + if (c1 == 'p') { + *reg = MIPS32_REG_GP; + return M_SUCCESS; + } + } + + // $fp + else if (c0 == 'f') { + if (c1 == 'p') { + *reg = MIPS32_REG_FP; + return M_SUCCESS; + } + } + + // $rp + else if (c0 == 'r') { + if (c1 == 'a') { + *reg = MIPS32_REG_RA; + return M_SUCCESS; + } + } + + // $0-31 (non aliased register names) + else if (c0 >= '0' && c0 <= '9') { + int i = c0 - '0'; + if (c1 >= '0' && c1 <= '9') { + i *= 10; + i += c1 - '0'; + } + if (i <= 31) { + *reg = i; + return M_SUCCESS; + } + } + + ERROR("unknown register $%.*s", name->len, name->str); + return M_ERROR; +} + +static int gen_ins_read_state(struct generator *gen, + struct expr *const expr, + struct gen_ins_state *state, + struct mips32_grammer *grammer) +{ + char *ptr = grammer->grammer; + uint32_t argi = 0; + + // read values into state + while (*ptr != '\0') { + + if (argi >= expr->instruction.args_len) { + ERROR("not enough arguments passed"); + print_curr_line(gen, expr); + return M_ERROR; + } + struct expr_ins_arg *arg = &expr->instruction.args[argi++]; + + size_t skip; + switch (get_gmr_type(ptr, &skip)) { + case GMR_RD: + // rd + if (arg->type != EXPR_INS_ARG_REGISTER) { + ERROR("expected a register"); + print_curr_line(gen, expr); + return M_ERROR; + } + if (parse_register(&state->rd, &arg->reg)) { + print_curr_line(gen, expr); + return M_ERROR; + } + break; + case GMR_RS: + // rs + if (arg->type != EXPR_INS_ARG_REGISTER) { + ERROR("expected a register"); + print_curr_line(gen, expr); + return M_ERROR; + } + if (parse_register(&state->rs, &arg->reg)) { + print_curr_line(gen, expr); + return M_ERROR; + } + break; + case GMR_RT: + // rt + if (arg->type != EXPR_INS_ARG_REGISTER) { + ERROR("expected a register"); + print_curr_line(gen, expr); + return M_ERROR; + } + if (parse_register(&state->rt, &arg->reg)) { + print_curr_line(gen, expr); + return M_ERROR; + } + break; + case GMR_IMMD: + // immd + if (arg->type != EXPR_INS_ARG_IMMEDIATE) { + ERROR("expected an immediate"); + print_curr_line(gen, expr); + return M_ERROR; + } + state->immd = arg->immd; + break; + case GMR_OFFSET: + // offset + state->offset = 0; + if (arg->type == EXPR_INS_ARG_IMMEDIATE) + state->offset = arg->immd; + else if (arg->type == EXPR_INS_ARG_LABEL) + state->label = &arg->label; + else { + ERROR("invalid instruction"); + print_curr_line(gen, expr); + return M_ERROR; + } + break; + case GMR_OFFSET_BASE: + // offset(base) + if (arg->type != EXPR_INS_ARG_OFFSET) { + ERROR("expected an offset($base)"); + print_curr_line(gen, expr); + return M_ERROR; + } + state->offset = arg->offset.immd; + if (parse_register(&state->base, &arg->offset.reg)) { + print_curr_line(gen, expr); + return M_ERROR; + } + break; + case GMR_TARGET: + // target + state->target = 0; + if (arg->type == EXPR_INS_ARG_IMMEDIATE) + state->target = arg->immd; + else if (arg->type == EXPR_INS_ARG_LABEL) + state->label = &arg->label; + else { + ERROR("invalid instruction"); + print_curr_line(gen, expr); + return M_ERROR; + } + break; + default: + break; + } + + // skip entry + ptr += skip; + + // skip comma + if (*ptr == ',') { + ptr++; + continue; + } else if (*ptr == '\0') { + break; + } else { + ERROR("!! BUG3: invalid splitting char %c !!!", *ptr); + exit(1); + } + + } + + return M_SUCCESS; +} + +static int gen_ins_write_state( + struct generator *gen, + union mips32_instruction ins, // the instruction to modify + struct gen_ins_state *state, // the current read state + char *grammer) // the gramemr to parse +{ + char *ptr = grammer; + enum reference_type reftype = REF_NONE; + + // read values into state + while (*ptr != '\0') { + + // parse next dsl entry + size_t skip; + enum grammer_type gmr = get_gmr_type(ptr, &skip); + + // check for dsl hardcoded register argument + bool hardcoded = false; + enum mips32_register hard_reg; + if (*(ptr + skip) == '=') { + // parse argument + char *rptr = ptr + skip + 2; + hardcoded = true; + struct string regname; + string_bss(®name, rptr); + if (parse_register(&hard_reg, ®name)) { + ERROR("!!! BUG2: this should never hit !!!"); + exit(1); + } + } + + // skip till next comma + for (;*ptr != '\0' && *ptr != ','; ptr++); + if (*ptr == ',') + ptr++; + + switch (gmr) { + case GMR_RD: + ins.rd = hardcoded ? hard_reg : state->rd; + break; + case GMR_RS: + ins.rs = hardcoded ? hard_reg : state->rs; + break; + case GMR_RT: + ins.rt = hardcoded ? hard_reg : state->rt; + break; + case GMR_IMMD: + ins.immd = state->immd; + break; + case GMR_OFFSET: + ins.offset = state->offset; + reftype = REF_MIPS_16; + break; + case GMR_OFFSET_BASE: + ins.offset = state->offset; + ins.rs = state->base; + reftype = REF_MIPS_16; + break; + case GMR_TARGET: + ins.target = state->target; + reftype = REF_MIPS_26; + break; + case GMR_HI: + ins.immd = state->target >> 16; + reftype = REF_MIPS_HI16; + break; + case GMR_LO: + ins.immd = state->target & 0x0000FFFF; + reftype = REF_MIPS_LO16; + break; + } + } + + // get offset for reference (if needed) + uint32_t offset = gen->current->len; + size_t zeros = offset % gen->current->align; + if (zeros) + zeros = gen->current->align - zeros; + offset += zeros; + + // write instructon to section + uint32_t raw = B32(ins.raw); + if (section_push(gen->current, &raw, sizeof(uint32_t))) { + return M_ERROR; + } + + // create reference (if needed) + if (reftype != REF_NONE && state->label != NULL) { + struct symbol *sym; + + if (symtab_find_or_stub(&gen->symtab, &sym, state->label)) + return M_ERROR; + + struct reference ref = { + .type = reftype, + .symbol = sym, + .offset = offset + }; + + if (reftab_push(&gen->current->reftab, &ref)) { + return M_ERROR; + } + } + + return M_SUCCESS; +} + +static int gen_ins(struct generator *gen, struct expr *const expr) +{ + struct mips32_grammer *grammer = NULL; + for (uint32_t i = 0; i < gen->grammers_len; i++) { + struct mips32_grammer *temp = &gen->grammers[i]; + if (strcasecmp(temp->name, expr->instruction.name.str) != 0) + continue; + grammer = temp; + break; + } + + if (grammer == NULL) { + ERROR("unknown instruction"); + print_curr_line(gen, expr); + return M_ERROR; + } + + struct gen_ins_state state; + state.label = NULL; + + // read in the values from the parser + if (gen_ins_read_state(gen, expr, &state, grammer)) + return M_ERROR; + + // write the values into the instructions + // ...and then the sections + if (grammer->pseudo_len > 0) { + // write pseudo + for (int i = 0; i < grammer->pseudo_len; i++) { + union mips32_instruction ins = gen->instructions[ + grammer->pseudo_grammer[i].enum_index]; + if (gen_ins_write_state(gen, ins, &state, + grammer->pseudo_grammer[i].update)) + return M_ERROR; + } + } else { + // write real + union mips32_instruction ins + = gen->instructions[grammer->enum_index]; + if (gen_ins_write_state(gen, ins, &state, grammer->grammer)) + return M_ERROR; + } + + return M_SUCCESS; +} + +static int gen_label(struct generator *gen, struct string *const label) +{ + uint32_t offset = gen->current->len; + ptrdiff_t secidx = gen->current - gen->sections; + size_t zeros = offset % gen->current->align; + if (zeros) + zeros = gen->current->align - zeros; + offset += zeros; + + struct symbol *sym; + /* update existing symbol (if exists) */ + if (symtab_find(&gen->symtab, &sym, label->str) == M_SUCCESS) { + if (sym->secidx != SYM_SEC_STUB) { + // symbols that are not labeled stub are fully defined, + // it is a error to redefine them + ERROR("redefined symbol '%s'", label->str); + return M_ERROR; + } + sym->secidx = secidx; + sym->offset = offset; + /* create a new symbol */ + } else { + struct symbol new = { + .secidx = secidx, + .offset = offset, + .type = SYM_LOCAL, + }; + if (string_clone(&new.name, label)) + return M_ERROR; + if (symtab_push(&gen->symtab, &new)) { + string_free(&new.name); + return M_ERROR; + } + } + + return M_SUCCESS; +} + +/* run codegen */ +static int generate(struct generator *gen) +{ + struct expr expr; + int res = M_SUCCESS; + + // get the next expression + if ((res = parser_next(&gen->parser, &expr))) + return res; + + // if its not a segment directive + // (and we dont have a section) + // create the default + if (( + expr.type != EXPR_DIRECTIVE || + expr.directive.type != EXPR_DIRECTIVE_SECTION) && + gen->current == NULL) { + // create .data section + struct string temp = { + .str = ".data", + .len = 5, + .size = 5, + .allocated = false + }; + if (section_get(gen, &gen->current, &temp)) { + expr_free(&expr); + return M_ERROR; + } + } + + res = M_SUCCESS; + switch (expr.type) { + case EXPR_DIRECTIVE: + res = gen_directive(gen, &expr); + break; + case EXPR_CONSTANT: + res = gen_constant(gen, &expr.constant); + break; + case EXPR_INS: + res = gen_ins(gen, &expr); + break; + case EXPR_LABEL: + res = gen_label(gen, &expr.label); + break; + } + + expr_free(&expr); + return res; +} + +/* run codegen with the mips32r6 specification */ +int generate_mips32r6(struct generator *gen) +{ + gen->instructions_len = __MIPS32R6_INS_LEN; + gen->instructions = mips32r6_instructions; + gen->grammers_len = __MIPS32R6_GRAMMER_LEN; + gen->grammers = mips32r6_grammers; + + int res; + while (res = generate(gen), 1) { + if (res == M_ERROR) + return M_ERROR; + if (res == M_EOF) + break; + } + + return M_SUCCESS; +} + +int generator_init(const char *file, struct generator *gen) +{ + if (parser_init(file, &gen->parser)) + return M_ERROR; + if (symtab_init(&gen->symtab)) + return M_ERROR; + gen->sections = NULL; + gen->sections_len = 0; + gen->sections_size = 0; + return M_SUCCESS; +} + +void generator_free(struct generator *gen) +{ + parser_free(&gen->parser); + symtab_free(&gen->symtab); + for (size_t i = 0; i < gen->sections_len; i++) + section_free(&gen->sections[i]); + free(gen->sections); +} |