#include #include #include #include #include #include "tab.h" #include "gen.h" #include "parse.h" /// /// section table /// static void section_get_default_perm(struct section *sec, const char *name) { #define __LEN 7 static const struct perms { char *name; bool read; bool write; bool execute; int alignment; } defaults[__LEN] = { {".text", true, false, true, 4}, {".code", true, false, true, 4}, {".data", true, true, false, 1}, {".stack", true, true, false, 1}, {".rodata", true, false, false, 1}, {".bss", true, true, false, 1}, {".robss", true, false, false, 1}, }; for (int i = 0; i < __LEN; i++) { const struct perms *p = &defaults[i]; if (strcasecmp(name, p->name) != 0) continue; sec->read = p->read; sec->write = p->write; sec->execute = p->execute; sec->align = p->alignment; break; } } static int section_get(struct generator *gen, struct section **res, const struct string *const name) { /// find the section if it exists for (size_t i = 0; i < gen->sections_len; i++) { struct section *sec = &gen->sections[i]; if (sec->name.len != name->len) continue; if (strcmp(sec->name.str, name->str) != 0) continue; *res = sec; return M_SUCCESS; } /// allocate a new one if it doesnt size_t size = gen->sections_size ? gen->sections_size * 2 : 8; void *new = realloc(gen->sections, size * sizeof(struct section)); if (new == NULL) { PERROR("cannot realloc"); return M_ERROR; } gen->sections_size = size; gen->sections = new; struct section *sec = &gen->sections[gen->sections_len++]; // alloc reftab if (reftab_init(&sec->reftab)) return M_ERROR; // copy name if (string_clone(&sec->name, name)) return M_ERROR; // set defaults sec->len = 0; sec->size = 0; sec->align = 1; sec->data = NULL; sec->read = true; sec->write = true; sec->execute = false; section_get_default_perm(sec, name->str); *res = sec; return M_SUCCESS; } static int section_extend(struct section *section, size_t space) { size_t newlen = section->len + space; if (newlen < section->size) return M_SUCCESS; size_t size = section->size ? section->size * 2 + newlen : newlen * 2; void *new = realloc(section->data, size); if (new == NULL) { PERROR("cannot realloc"); return M_ERROR; } section->size = size; section->data = new; return M_SUCCESS; } static int section_push(struct section *section, void *data, size_t len) { size_t newlen = section->len + len; size_t zeros = newlen % section->align; if (zeros) zeros = section->align - zeros; if (section_extend(section, len + zeros)) return M_ERROR; memset(section->data + section->len, 0, zeros); memcpy(section->data + section->len + zeros, data, len); section->len += len + zeros; return M_SUCCESS; } static int section_zero(struct section *section, size_t len) { size_t zeros = section->len % section->align; if (zeros) zeros = section->align - zeros; if (section_extend(section, len + zeros)) return M_ERROR; memset(section->data + section->len, 0, len + zeros); section->len += len + zeros; return M_SUCCESS; } void section_free(struct section *section) { reftab_free(§ion->reftab); string_free(§ion->name); free(section->data); } /// /// generation functions /// static void print_curr_line(struct generator *gen, const struct expr *const expr) { int line = expr->line_no, len = expr->byte_end - expr->byte_start, nl = true, c = EOF; FILE *file = gen->parser.lexer.file; fseek(file, expr->byte_start, SEEK_SET); while (len--) { c = getc(file); if (c == EOF || c == '\0') break; if (nl) { fprintf(stderr, "\t%d | ", line); line++; nl = false; } if (c == '\n') nl = true; putc(c, stderr); } } static int gen_directive_whb(struct generator *gen, const void *data, uint32_t count, uint32_t len) { // TODO: endianess for (uint32_t i = 0; i < count; i++) { void *ptr = (char *) data + (len * i); if (section_push(gen->current, ptr, len)) return M_ERROR; } return M_SUCCESS; } static int gen_directive(struct generator *gen, const struct expr *const e) { const struct expr_directive *const expr = &e->directive; int res = M_SUCCESS; switch (expr->type) { case EXPR_DIRECTIVE_ALIGN: if (expr->align < 1) { ERROR("alignment cannot be zero"); print_curr_line(gen, e); return M_ERROR; } gen->current->align = expr->align; break; case EXPR_DIRECTIVE_SPACE: res = section_zero(gen->current, expr->space); break; case EXPR_DIRECTIVE_WORD: res = gen_directive_whb(gen, expr->words, expr->len, sizeof(uint32_t)); break; case EXPR_DIRECTIVE_HALF: res = gen_directive_whb(gen, expr->halfs, expr->len, sizeof(uint16_t)); break; case EXPR_DIRECTIVE_BYTE: res = gen_directive_whb(gen, expr->bytes, expr->len, sizeof(uint8_t)); break; case EXPR_DIRECTIVE_SECTION: res = section_get(gen, &gen->current, &expr->section); break; case EXPR_DIRECTIVE_EXTERN: { struct symbol *sym; res = symtab_find_or_stub(&gen->symtab, &sym, &expr->label); if (res == M_SUCCESS) sym->type = SYM_EXTERN; break; } case EXPR_DIRECTIVE_GLOBL: { struct symbol *sym; res = symtab_find_or_stub(&gen->symtab, &sym, &expr->label); if (res == M_SUCCESS) sym->type = SYM_GLOBAL; break; } case EXPR_DIRECTIVE_ASCII: res = section_push(gen->current, expr->string.str, expr->string.len - 1); break; case EXPR_DIRECTIVE_ASCIIZ: res = section_push(gen->current, expr->string.str, expr->string.len); break; } return res; } static int gen_constant(struct generator *gen, struct expr_const *const expr) { (void) gen; (void) expr; ERROR("constants not yet implemented"); return M_ERROR; } static enum grammer_type get_gmr_type(const char *name, size_t *len) { #define CHK(part, str) { \ if (strncasecmp(str, name, strlen(str)) == 0) { \ *len = strlen(str); \ return GMR_ ##part; \ }} \ CHK(RD, "rd") CHK(RS, "rs") CHK(RT, "rt") CHK(IMMD, "immd") CHK(OFFSET_BASE, "offset(base)") CHK(OFFSET, "offset") CHK(TARGET, "target") CHK(HI, "hi") CHK(LO, "lo") #undef CHK ERROR("!!! BUG: this should never hit !!!"); exit(1); } static int parse_register(enum mips32_register *reg, struct string *name) { int len = name->len; int c0 = len > 0 ? name->str[0] : '\0', c1 = len > 1 ? name->str[1] : '\0', c2 = len > 2 ? name->str[2] : '\0', c3 = len > 3 ? name->str[3] : '\0'; // $zero if (c0 == 'z') { if (c1 == 'e' && c2 == 'r' && c3 == 'o') { *reg = MIPS32_REG_ZERO; return M_SUCCESS; } } // $a0-a3 $at else if (c0 == 'a') { if (c1 == 't') { *reg = MIPS32_REG_AT; return M_SUCCESS; } if (c1 >= '0' && c1 <= '3') { *reg = MIPS32_REG_A0; *reg += c1 - '0'; return M_SUCCESS; } } // $v0-v1 else if (c0 == 'v') { if (c1 >= '0' && c1 <= '1') { *reg = MIPS32_REG_V0; *reg += c1 - '0'; return M_SUCCESS; } } // $t0-t9 else if (c0 == 't') { if (c1 >= '0' && c1 <= '7') { *reg = MIPS32_REG_T0; *reg += c1 - '0'; return M_SUCCESS; } // reg T8-T9 are not in order with T0-T7 if (c1 >= '8' && c1 <= '9') { *reg = MIPS32_REG_T8; *reg += c1 - '8'; return M_SUCCESS; } } // $s0-s7 $sp else if (c0 == 's') { if (c1 >= '0' && c1 <= '7') { *reg = MIPS32_REG_S0; *reg += c1 - '0'; return M_SUCCESS; } if (c1 == 'p') { *reg = MIPS32_REG_SP; return M_SUCCESS; } } // $k0-k1 else if (c0 == 'k') { if (c1 >= '0' && c1 <= '1') { *reg = MIPS32_REG_K0; *reg += c1 - '0'; return M_SUCCESS; } } // $gp else if (c0 == 'g') { if (c1 == 'p') { *reg = MIPS32_REG_GP; return M_SUCCESS; } } // $fp else if (c0 == 'f') { if (c1 == 'p') { *reg = MIPS32_REG_FP; return M_SUCCESS; } } // $rp else if (c0 == 'r') { if (c1 == 'a') { *reg = MIPS32_REG_RA; return M_SUCCESS; } } // $0-31 (non aliased register names) else if (c0 >= '0' && c0 <= '9') { int i = c0 - '0'; if (c1 >= '0' && c1 <= '9') { i *= 10; i += c1 - '0'; } if (i <= 31) { *reg = i; return M_SUCCESS; } } ERROR("unknown register $%.*s", name->len, name->str); return M_ERROR; } static int gen_ins_read_state(struct generator *gen, struct expr *const expr, struct gen_ins_state *state, struct mips32_grammer *grammer) { char *ptr = grammer->grammer; uint32_t argi = 0; // read values into state while (*ptr != '\0') { if (argi >= expr->instruction.args_len) { ERROR("not enough arguments passed"); print_curr_line(gen, expr); return M_ERROR; } struct expr_ins_arg *arg = &expr->instruction.args[argi++]; size_t skip; switch (get_gmr_type(ptr, &skip)) { case GMR_RD: // rd if (arg->type != EXPR_INS_ARG_REGISTER) { ERROR("expected a register"); print_curr_line(gen, expr); return M_ERROR; } if (parse_register(&state->rd, &arg->reg)) { print_curr_line(gen, expr); return M_ERROR; } break; case GMR_RS: // rs if (arg->type != EXPR_INS_ARG_REGISTER) { ERROR("expected a register"); print_curr_line(gen, expr); return M_ERROR; } if (parse_register(&state->rs, &arg->reg)) { print_curr_line(gen, expr); return M_ERROR; } break; case GMR_RT: // rt if (arg->type != EXPR_INS_ARG_REGISTER) { ERROR("expected a register"); print_curr_line(gen, expr); return M_ERROR; } if (parse_register(&state->rt, &arg->reg)) { print_curr_line(gen, expr); return M_ERROR; } break; case GMR_IMMD: // immd if (arg->type != EXPR_INS_ARG_IMMEDIATE) { ERROR("expected an immediate"); print_curr_line(gen, expr); return M_ERROR; } state->immd = arg->immd; break; case GMR_OFFSET: // offset state->offset = 0; if (arg->type == EXPR_INS_ARG_IMMEDIATE) state->offset = arg->immd; else if (arg->type == EXPR_INS_ARG_LABEL) state->label = &arg->label; else { ERROR("invalid instruction"); print_curr_line(gen, expr); return M_ERROR; } break; case GMR_OFFSET_BASE: // offset(base) if (arg->type != EXPR_INS_ARG_OFFSET) { ERROR("expected an offset($base)"); print_curr_line(gen, expr); return M_ERROR; } state->offset = arg->offset.immd; if (parse_register(&state->base, &arg->offset.reg)) { print_curr_line(gen, expr); return M_ERROR; } break; case GMR_TARGET: // target state->target = 0; if (arg->type == EXPR_INS_ARG_IMMEDIATE) state->target = arg->immd; else if (arg->type == EXPR_INS_ARG_LABEL) state->label = &arg->label; else { ERROR("invalid instruction"); print_curr_line(gen, expr); return M_ERROR; } break; default: break; } // skip entry ptr += skip; // skip comma if (*ptr == ',') { ptr++; continue; } else if (*ptr == '\0') { break; } else { ERROR("!! BUG3: invalid splitting char %c !!!", *ptr); exit(1); } } return M_SUCCESS; } static int gen_ins_write_state( struct generator *gen, union mips32_instruction ins, // the instruction to modify struct gen_ins_state *state, // the current read state char *grammer) // the gramemr to parse { char *ptr = grammer; enum reference_type reftype = REF_NONE; // read values into state while (*ptr != '\0') { // parse next dsl entry size_t skip; enum grammer_type gmr = get_gmr_type(ptr, &skip); // check for dsl hardcoded register argument bool hardcoded = false; enum mips32_register hard_reg; if (*(ptr + skip) == '=') { // parse argument char *rptr = ptr + skip + 2; hardcoded = true; struct string regname; string_bss(®name, rptr); if (parse_register(&hard_reg, ®name)) { ERROR("!!! BUG2: this should never hit !!!"); exit(1); } } // skip till next comma for (;*ptr != '\0' && *ptr != ','; ptr++); if (*ptr == ',') ptr++; switch (gmr) { case GMR_RD: ins.rd = hardcoded ? hard_reg : state->rd; break; case GMR_RS: ins.rs = hardcoded ? hard_reg : state->rs; break; case GMR_RT: ins.rt = hardcoded ? hard_reg : state->rt; break; case GMR_IMMD: ins.immd = state->immd; break; case GMR_OFFSET: ins.offset = state->offset; reftype = REF_MIPS_16; break; case GMR_OFFSET_BASE: ins.offset = state->offset; ins.rs = state->base; reftype = REF_MIPS_16; break; case GMR_TARGET: ins.target = state->target; reftype = REF_MIPS_26; break; case GMR_HI: ins.immd = state->target >> 16; reftype = REF_MIPS_HI16; break; case GMR_LO: ins.immd = state->target & 0x0000FFFF; reftype = REF_MIPS_LO16; break; } } // get offset for reference (if needed) uint32_t offset = gen->current->len; size_t zeros = offset % gen->current->align; if (zeros) zeros = gen->current->align - zeros; offset += zeros; // write instructon to section uint32_t raw = B32(ins.raw); if (section_push(gen->current, &raw, sizeof(uint32_t))) { return M_ERROR; } // create reference (if needed) if (reftype != REF_NONE && state->label != NULL) { struct symbol *sym; if (symtab_find_or_stub(&gen->symtab, &sym, state->label)) return M_ERROR; struct reference ref = { .type = reftype, .symbol = sym, .offset = offset }; if (reftab_push(&gen->current->reftab, &ref)) { return M_ERROR; } } return M_SUCCESS; } static int gen_ins(struct generator *gen, struct expr *const expr) { struct mips32_grammer *grammer = NULL; for (uint32_t i = 0; i < gen->grammers_len; i++) { struct mips32_grammer *temp = &gen->grammers[i]; if (strcasecmp(temp->name, expr->instruction.name.str) != 0) continue; grammer = temp; break; } if (grammer == NULL) { ERROR("unknown instruction"); print_curr_line(gen, expr); return M_ERROR; } struct gen_ins_state state; state.label = NULL; // read in the values from the parser if (gen_ins_read_state(gen, expr, &state, grammer)) return M_ERROR; // write the values into the instructions // ...and then the sections if (grammer->pseudo_len > 0) { // write pseudo for (int i = 0; i < grammer->pseudo_len; i++) { union mips32_instruction ins = gen->instructions[ grammer->pseudo_grammer[i].enum_index]; if (gen_ins_write_state(gen, ins, &state, grammer->pseudo_grammer[i].update)) return M_ERROR; } } else { // write real union mips32_instruction ins = gen->instructions[grammer->enum_index]; if (gen_ins_write_state(gen, ins, &state, grammer->grammer)) return M_ERROR; } return M_SUCCESS; } static int gen_label(struct generator *gen, struct string *const label) { uint32_t offset = gen->current->len; ptrdiff_t secidx = gen->current - gen->sections; size_t zeros = offset % gen->current->align; if (zeros) zeros = gen->current->align - zeros; offset += zeros; struct symbol *sym; /* update existing symbol (if exists) */ if (symtab_find(&gen->symtab, &sym, label->str) == M_SUCCESS) { if (sym->secidx != SYM_SEC_STUB) { // symbols that are not labeled stub are fully defined, // it is a error to redefine them ERROR("redefined symbol '%s'", label->str); return M_ERROR; } sym->secidx = secidx; sym->offset = offset; /* create a new symbol */ } else { struct symbol new = { .secidx = secidx, .offset = offset, .type = SYM_LOCAL, }; if (string_clone(&new.name, label)) return M_ERROR; if (symtab_push(&gen->symtab, &new)) { string_free(&new.name); return M_ERROR; } } return M_SUCCESS; } /* run codegen */ static int generate(struct generator *gen) { struct expr expr; int res = M_SUCCESS; // get the next expression if ((res = parser_next(&gen->parser, &expr))) return res; // if its not a segment directive // (and we dont have a section) // create the default if (( expr.type != EXPR_DIRECTIVE || expr.directive.type != EXPR_DIRECTIVE_SECTION) && gen->current == NULL) { // create .data section struct string temp = { .str = ".data", .len = 5, .size = 5, .allocated = false }; if (section_get(gen, &gen->current, &temp)) { expr_free(&expr); return M_ERROR; } } res = M_SUCCESS; switch (expr.type) { case EXPR_DIRECTIVE: res = gen_directive(gen, &expr); break; case EXPR_CONSTANT: res = gen_constant(gen, &expr.constant); break; case EXPR_INS: res = gen_ins(gen, &expr); break; case EXPR_LABEL: res = gen_label(gen, &expr.label); break; } expr_free(&expr); return res; } /* run codegen with the mips32r6 specification */ int generate_mips32r6(struct generator *gen) { gen->instructions_len = __MIPS32R6_INS_LEN; gen->instructions = mips32r6_instructions; gen->grammers_len = __MIPS32R6_GRAMMER_LEN; gen->grammers = mips32r6_grammers; int res; while (res = generate(gen), 1) { if (res == M_ERROR) return M_ERROR; if (res == M_EOF) break; } return M_SUCCESS; } int generator_init(const char *file, struct generator *gen) { if (parser_init(file, &gen->parser)) return M_ERROR; if (symtab_init(&gen->symtab)) return M_ERROR; gen->sections = NULL; gen->sections_len = 0; gen->sections_size = 0; return M_SUCCESS; } void generator_free(struct generator *gen) { parser_free(&gen->parser); symtab_free(&gen->symtab); for (size_t i = 0; i < gen->sections_len; i++) section_free(&gen->sections[i]); free(gen->sections); }