summaryrefslogtreecommitdiff
path: root/masm/gen.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--masm/gen.c812
1 files changed, 812 insertions, 0 deletions
diff --git a/masm/gen.c b/masm/gen.c
new file mode 100644
index 0000000..13d2848
--- /dev/null
+++ b/masm/gen.c
@@ -0,0 +1,812 @@
+#include <stdlib.h>
+#include <merror.h>
+#include <melf.h>
+#include <mips32.h>
+#include <mips32r6.h>
+
+#include "tab.h"
+#include "gen.h"
+#include "parse.h"
+
+///
+/// section table
+///
+
+static void section_get_default_perm(struct section *sec, const char *name)
+{
+ #define __LEN 7
+ static const struct perms {
+ char *name;
+ bool read;
+ bool write;
+ bool execute;
+ int alignment;
+ } defaults[__LEN] = {
+ {".text", true, false, true, 4},
+ {".code", true, false, true, 4},
+ {".data", true, true, false, 1},
+ {".stack", true, true, false, 1},
+ {".rodata", true, false, false, 1},
+ {".bss", true, true, false, 1},
+ {".robss", true, false, false, 1},
+ };
+
+ for (int i = 0; i < __LEN; i++) {
+ const struct perms *p = &defaults[i];
+ if (strcasecmp(name, p->name) != 0)
+ continue;
+ sec->read = p->read;
+ sec->write = p->write;
+ sec->execute = p->execute;
+ sec->align = p->alignment;
+ break;
+ }
+
+}
+
+static int section_get(struct generator *gen, struct section **res,
+ const struct string *const name)
+{
+ /// find the section if it exists
+ for (size_t i = 0; i < gen->sections_len; i++) {
+ struct section *sec = &gen->sections[i];
+ if (sec->name.len != name->len)
+ continue;
+ if (strcmp(sec->name.str, name->str) != 0)
+ continue;
+ *res = sec;
+ return M_SUCCESS;
+ }
+
+ /// allocate a new one if it doesnt
+ size_t size = gen->sections_size ? gen->sections_size * 2 : 8;
+ void *new = realloc(gen->sections, size * sizeof(struct section));
+ if (new == NULL) {
+ PERROR("cannot realloc");
+ return M_ERROR;
+ }
+
+ gen->sections_size = size;
+ gen->sections = new;
+
+ struct section *sec = &gen->sections[gen->sections_len++];
+
+ // alloc reftab
+ if (reftab_init(&sec->reftab))
+ return M_ERROR;
+
+ // copy name
+ if (string_clone(&sec->name, name))
+ return M_ERROR;
+
+ // set defaults
+ sec->len = 0;
+ sec->size = 0;
+ sec->align = 1;
+ sec->data = NULL;
+ sec->read = true;
+ sec->write = true;
+ sec->execute = false;
+ section_get_default_perm(sec, name->str);
+
+ *res = sec;
+ return M_SUCCESS;
+}
+
+static int section_extend(struct section *section, size_t space)
+{
+ size_t newlen = section->len + space;
+ if (newlen < section->size)
+ return M_SUCCESS;
+
+ size_t size = section->size ? section->size * 2 + newlen : newlen * 2;
+ void *new = realloc(section->data, size);
+ if (new == NULL) {
+ PERROR("cannot realloc");
+ return M_ERROR;
+ }
+ section->size = size;
+ section->data = new;
+
+ return M_SUCCESS;
+}
+
+static int section_push(struct section *section, void *data, size_t len)
+{
+ size_t newlen = section->len + len;
+ size_t zeros = newlen % section->align;
+ if (zeros)
+ zeros = section->align - zeros;
+
+ if (section_extend(section, len + zeros))
+ return M_ERROR;
+
+ memset(section->data + section->len, 0, zeros);
+ memcpy(section->data + section->len + zeros, data, len);
+ section->len += len + zeros;
+
+ return M_SUCCESS;
+}
+
+static int section_zero(struct section *section, size_t len)
+{
+ size_t zeros = section->len % section->align;
+ if (zeros)
+ zeros = section->align - zeros;
+
+ if (section_extend(section, len + zeros))
+ return M_ERROR;
+
+ memset(section->data + section->len, 0, len + zeros);
+ section->len += len + zeros;
+
+ return M_SUCCESS;
+}
+
+void section_free(struct section *section)
+{
+ reftab_free(&section->reftab);
+ string_free(&section->name);
+ free(section->data);
+}
+
+///
+/// generation functions
+///
+
+static void print_curr_line(struct generator *gen,
+ const struct expr *const expr)
+{
+ int line = expr->line_no,
+ len = expr->byte_end - expr->byte_start,
+ nl = true,
+ c = EOF;
+ FILE *file = gen->parser.lexer.file;
+
+ fseek(file, expr->byte_start, SEEK_SET);
+
+ while (len--) {
+ c = getc(file);
+ if (c == EOF || c == '\0')
+ break;
+ if (nl) {
+ fprintf(stderr, "\t%d | ", line);
+ line++;
+ nl = false;
+ }
+ if (c == '\n')
+ nl = true;
+ putc(c, stderr);
+ }
+
+}
+
+static int gen_directive_whb(struct generator *gen, const void *data,
+ uint32_t count, uint32_t len)
+{
+ // TODO: endianess
+ for (uint32_t i = 0; i < count; i++) {
+ void *ptr = (char *) data + (len * i);
+ if (section_push(gen->current, ptr, len))
+ return M_ERROR;
+ }
+
+ return M_SUCCESS;
+}
+
+static int gen_directive(struct generator *gen,
+ const struct expr *const e)
+{
+ const struct expr_directive *const expr = &e->directive;
+ int res = M_SUCCESS;
+
+ switch (expr->type) {
+ case EXPR_DIRECTIVE_ALIGN:
+ if (expr->align < 1) {
+ ERROR("alignment cannot be zero");
+ print_curr_line(gen, e);
+ return M_ERROR;
+ }
+ gen->current->align = expr->align;
+ break;
+ case EXPR_DIRECTIVE_SPACE:
+ res = section_zero(gen->current, expr->space);
+ break;
+ case EXPR_DIRECTIVE_WORD:
+ res = gen_directive_whb(gen, expr->words, expr->len,
+ sizeof(uint32_t));
+ break;
+ case EXPR_DIRECTIVE_HALF:
+ res = gen_directive_whb(gen, expr->halfs, expr->len,
+ sizeof(uint16_t));
+ break;
+ case EXPR_DIRECTIVE_BYTE:
+ res = gen_directive_whb(gen, expr->bytes, expr->len,
+ sizeof(uint8_t));
+ break;
+ case EXPR_DIRECTIVE_SECTION:
+ res = section_get(gen, &gen->current, &expr->section);
+ break;
+ case EXPR_DIRECTIVE_EXTERN: {
+ struct symbol *sym;
+ res = symtab_find_or_stub(&gen->symtab, &sym, &expr->label);
+ if (res == M_SUCCESS)
+ sym->type = SYM_EXTERN;
+ break;
+ }
+ case EXPR_DIRECTIVE_GLOBL: {
+ struct symbol *sym;
+ res = symtab_find_or_stub(&gen->symtab, &sym, &expr->label);
+ if (res == M_SUCCESS)
+ sym->type = SYM_GLOBAL;
+ break;
+ }
+ case EXPR_DIRECTIVE_ASCII:
+ res = section_push(gen->current, expr->string.str,
+ expr->string.len - 1);
+ break;
+ case EXPR_DIRECTIVE_ASCIIZ:
+ res = section_push(gen->current, expr->string.str,
+ expr->string.len);
+ break;
+ }
+
+ return res;
+}
+
+static int gen_constant(struct generator *gen, struct expr_const *const expr)
+{
+ (void) gen;
+ (void) expr;
+
+ ERROR("constants not yet implemented");
+ return M_ERROR;
+}
+
+static enum grammer_type get_gmr_type(const char *name, size_t *len)
+{
+ #define CHK(part, str) { \
+ if (strncasecmp(str, name, strlen(str)) == 0) { \
+ *len = strlen(str); \
+ return GMR_ ##part; \
+ }} \
+
+ CHK(RD, "rd")
+ CHK(RS, "rs")
+ CHK(RT, "rt")
+ CHK(IMMD, "immd")
+ CHK(OFFSET_BASE, "offset(base)")
+ CHK(OFFSET, "offset")
+ CHK(TARGET, "target")
+ CHK(HI, "hi")
+ CHK(LO, "lo")
+
+ #undef CHK
+
+ ERROR("!!! BUG: this should never hit !!!");
+ exit(1);
+}
+
+static int parse_register(enum mips32_register *reg, struct string *name)
+{
+ int len = name->len;
+ int c0 = len > 0 ? name->str[0] : '\0',
+ c1 = len > 1 ? name->str[1] : '\0',
+ c2 = len > 2 ? name->str[2] : '\0',
+ c3 = len > 3 ? name->str[3] : '\0';
+
+ // $zero
+ if (c0 == 'z') {
+ if (c1 == 'e' && c2 == 'r' && c3 == 'o') {
+ *reg = MIPS32_REG_ZERO;
+ return M_SUCCESS;
+ }
+ }
+
+ // $a0-a3 $at
+ else if (c0 == 'a') {
+ if (c1 == 't') {
+ *reg = MIPS32_REG_AT;
+ return M_SUCCESS;
+ }
+ if (c1 >= '0' && c1 <= '3') {
+ *reg = MIPS32_REG_A0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ }
+
+ // $v0-v1
+ else if (c0 == 'v') {
+ if (c1 >= '0' && c1 <= '1') {
+ *reg = MIPS32_REG_V0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ }
+
+ // $t0-t9
+ else if (c0 == 't') {
+ if (c1 >= '0' && c1 <= '7') {
+ *reg = MIPS32_REG_T0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ // reg T8-T9 are not in order with T0-T7
+ if (c1 >= '8' && c1 <= '9') {
+ *reg = MIPS32_REG_T8;
+ *reg += c1 - '8';
+ return M_SUCCESS;
+ }
+ }
+
+ // $s0-s7 $sp
+ else if (c0 == 's') {
+ if (c1 >= '0' && c1 <= '7') {
+ *reg = MIPS32_REG_S0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ if (c1 == 'p') {
+ *reg = MIPS32_REG_SP;
+ return M_SUCCESS;
+ }
+ }
+
+ // $k0-k1
+ else if (c0 == 'k') {
+ if (c1 >= '0' && c1 <= '1') {
+ *reg = MIPS32_REG_K0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ }
+
+ // $gp
+ else if (c0 == 'g') {
+ if (c1 == 'p') {
+ *reg = MIPS32_REG_GP;
+ return M_SUCCESS;
+ }
+ }
+
+ // $fp
+ else if (c0 == 'f') {
+ if (c1 == 'p') {
+ *reg = MIPS32_REG_FP;
+ return M_SUCCESS;
+ }
+ }
+
+ // $rp
+ else if (c0 == 'r') {
+ if (c1 == 'a') {
+ *reg = MIPS32_REG_RA;
+ return M_SUCCESS;
+ }
+ }
+
+ // $0-31 (non aliased register names)
+ else if (c0 >= '0' && c0 <= '9') {
+ int i = c0 - '0';
+ if (c1 >= '0' && c1 <= '9') {
+ i *= 10;
+ i += c1 - '0';
+ }
+ if (i <= 31) {
+ *reg = i;
+ return M_SUCCESS;
+ }
+ }
+
+ ERROR("unknown register $%.*s", name->len, name->str);
+ return M_ERROR;
+}
+
+static int gen_ins_read_state(struct generator *gen,
+ struct expr *const expr,
+ struct gen_ins_state *state,
+ struct mips32_grammer *grammer)
+{
+ char *ptr = grammer->grammer;
+ uint32_t argi = 0;
+
+ // read values into state
+ while (*ptr != '\0') {
+
+ if (argi >= expr->instruction.args_len) {
+ ERROR("not enough arguments passed");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ struct expr_ins_arg *arg = &expr->instruction.args[argi++];
+
+ size_t skip;
+ switch (get_gmr_type(ptr, &skip)) {
+ case GMR_RD:
+ // rd
+ if (arg->type != EXPR_INS_ARG_REGISTER) {
+ ERROR("expected a register");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ if (parse_register(&state->rd, &arg->reg)) {
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ break;
+ case GMR_RS:
+ // rs
+ if (arg->type != EXPR_INS_ARG_REGISTER) {
+ ERROR("expected a register");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ if (parse_register(&state->rs, &arg->reg)) {
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ break;
+ case GMR_RT:
+ // rt
+ if (arg->type != EXPR_INS_ARG_REGISTER) {
+ ERROR("expected a register");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ if (parse_register(&state->rt, &arg->reg)) {
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ break;
+ case GMR_IMMD:
+ // immd
+ if (arg->type != EXPR_INS_ARG_IMMEDIATE) {
+ ERROR("expected an immediate");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ state->immd = arg->immd;
+ break;
+ case GMR_OFFSET:
+ // offset
+ state->offset = 0;
+ if (arg->type == EXPR_INS_ARG_IMMEDIATE)
+ state->offset = arg->immd;
+ else if (arg->type == EXPR_INS_ARG_LABEL)
+ state->label = &arg->label;
+ else {
+ ERROR("invalid instruction");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ break;
+ case GMR_OFFSET_BASE:
+ // offset(base)
+ if (arg->type != EXPR_INS_ARG_OFFSET) {
+ ERROR("expected an offset($base)");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ state->offset = arg->offset.immd;
+ if (parse_register(&state->base, &arg->offset.reg)) {
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ break;
+ case GMR_TARGET:
+ // target
+ state->target = 0;
+ if (arg->type == EXPR_INS_ARG_IMMEDIATE)
+ state->target = arg->immd;
+ else if (arg->type == EXPR_INS_ARG_LABEL)
+ state->label = &arg->label;
+ else {
+ ERROR("invalid instruction");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ break;
+ default:
+ break;
+ }
+
+ // skip entry
+ ptr += skip;
+
+ // skip comma
+ if (*ptr == ',') {
+ ptr++;
+ continue;
+ } else if (*ptr == '\0') {
+ break;
+ } else {
+ ERROR("!! BUG3: invalid splitting char %c !!!", *ptr);
+ exit(1);
+ }
+
+ }
+
+ return M_SUCCESS;
+}
+
+static int gen_ins_write_state(
+ struct generator *gen,
+ union mips32_instruction ins, // the instruction to modify
+ struct gen_ins_state *state, // the current read state
+ char *grammer) // the gramemr to parse
+{
+ char *ptr = grammer;
+ enum reference_type reftype = REF_NONE;
+
+ // read values into state
+ while (*ptr != '\0') {
+
+ // parse next dsl entry
+ size_t skip;
+ enum grammer_type gmr = get_gmr_type(ptr, &skip);
+
+ // check for dsl hardcoded register argument
+ bool hardcoded = false;
+ enum mips32_register hard_reg;
+ if (*(ptr + skip) == '=') {
+ // parse argument
+ char *rptr = ptr + skip + 2;
+ hardcoded = true;
+ struct string regname;
+ string_bss(&regname, rptr);
+ if (parse_register(&hard_reg, &regname)) {
+ ERROR("!!! BUG2: this should never hit !!!");
+ exit(1);
+ }
+ }
+
+ // skip till next comma
+ for (;*ptr != '\0' && *ptr != ','; ptr++);
+ if (*ptr == ',')
+ ptr++;
+
+ switch (gmr) {
+ case GMR_RD:
+ ins.rd = hardcoded ? hard_reg : state->rd;
+ break;
+ case GMR_RS:
+ ins.rs = hardcoded ? hard_reg : state->rs;
+ break;
+ case GMR_RT:
+ ins.rt = hardcoded ? hard_reg : state->rt;
+ break;
+ case GMR_IMMD:
+ ins.immd = state->immd;
+ break;
+ case GMR_OFFSET:
+ ins.offset = state->offset;
+ reftype = REF_MIPS_16;
+ break;
+ case GMR_OFFSET_BASE:
+ ins.offset = state->offset;
+ ins.rs = state->base;
+ reftype = REF_MIPS_16;
+ break;
+ case GMR_TARGET:
+ ins.target = state->target;
+ reftype = REF_MIPS_26;
+ break;
+ case GMR_HI:
+ ins.immd = state->target >> 16;
+ reftype = REF_MIPS_HI16;
+ break;
+ case GMR_LO:
+ ins.immd = state->target & 0x0000FFFF;
+ reftype = REF_MIPS_LO16;
+ break;
+ }
+ }
+
+ // get offset for reference (if needed)
+ uint32_t offset = gen->current->len;
+ size_t zeros = offset % gen->current->align;
+ if (zeros)
+ zeros = gen->current->align - zeros;
+ offset += zeros;
+
+ // write instructon to section
+ uint32_t raw = B32(ins.raw);
+ if (section_push(gen->current, &raw, sizeof(uint32_t))) {
+ return M_ERROR;
+ }
+
+ // create reference (if needed)
+ if (reftype != REF_NONE && state->label != NULL) {
+ struct symbol *sym;
+
+ if (symtab_find_or_stub(&gen->symtab, &sym, state->label))
+ return M_ERROR;
+
+ struct reference ref = {
+ .type = reftype,
+ .symbol = sym,
+ .offset = offset
+ };
+
+ if (reftab_push(&gen->current->reftab, &ref)) {
+ return M_ERROR;
+ }
+ }
+
+ return M_SUCCESS;
+}
+
+static int gen_ins(struct generator *gen, struct expr *const expr)
+{
+ struct mips32_grammer *grammer = NULL;
+ for (uint32_t i = 0; i < gen->grammers_len; i++) {
+ struct mips32_grammer *temp = &gen->grammers[i];
+ if (strcasecmp(temp->name, expr->instruction.name.str) != 0)
+ continue;
+ grammer = temp;
+ break;
+ }
+
+ if (grammer == NULL) {
+ ERROR("unknown instruction");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+
+ struct gen_ins_state state;
+ state.label = NULL;
+
+ // read in the values from the parser
+ if (gen_ins_read_state(gen, expr, &state, grammer))
+ return M_ERROR;
+
+ // write the values into the instructions
+ // ...and then the sections
+ if (grammer->pseudo_len > 0) {
+ // write pseudo
+ for (int i = 0; i < grammer->pseudo_len; i++) {
+ union mips32_instruction ins = gen->instructions[
+ grammer->pseudo_grammer[i].enum_index];
+ if (gen_ins_write_state(gen, ins, &state,
+ grammer->pseudo_grammer[i].update))
+ return M_ERROR;
+ }
+ } else {
+ // write real
+ union mips32_instruction ins
+ = gen->instructions[grammer->enum_index];
+ if (gen_ins_write_state(gen, ins, &state, grammer->grammer))
+ return M_ERROR;
+ }
+
+ return M_SUCCESS;
+}
+
+static int gen_label(struct generator *gen, struct string *const label)
+{
+ uint32_t offset = gen->current->len;
+ ptrdiff_t secidx = gen->current - gen->sections;
+ size_t zeros = offset % gen->current->align;
+ if (zeros)
+ zeros = gen->current->align - zeros;
+ offset += zeros;
+
+ struct symbol *sym;
+ /* update existing symbol (if exists) */
+ if (symtab_find(&gen->symtab, &sym, label->str) == M_SUCCESS) {
+ if (sym->secidx != SYM_SEC_STUB) {
+ // symbols that are not labeled stub are fully defined,
+ // it is a error to redefine them
+ ERROR("redefined symbol '%s'", label->str);
+ return M_ERROR;
+ }
+ sym->secidx = secidx;
+ sym->offset = offset;
+ /* create a new symbol */
+ } else {
+ struct symbol new = {
+ .secidx = secidx,
+ .offset = offset,
+ .type = SYM_LOCAL,
+ };
+ if (string_clone(&new.name, label))
+ return M_ERROR;
+ if (symtab_push(&gen->symtab, &new)) {
+ string_free(&new.name);
+ return M_ERROR;
+ }
+ }
+
+ return M_SUCCESS;
+}
+
+/* run codegen */
+static int generate(struct generator *gen)
+{
+ struct expr expr;
+ int res = M_SUCCESS;
+
+ // get the next expression
+ if ((res = parser_next(&gen->parser, &expr)))
+ return res;
+
+ // if its not a segment directive
+ // (and we dont have a section)
+ // create the default
+ if ((
+ expr.type != EXPR_DIRECTIVE ||
+ expr.directive.type != EXPR_DIRECTIVE_SECTION) &&
+ gen->current == NULL) {
+ // create .data section
+ struct string temp = {
+ .str = ".data",
+ .len = 5,
+ .size = 5,
+ .allocated = false
+ };
+ if (section_get(gen, &gen->current, &temp)) {
+ expr_free(&expr);
+ return M_ERROR;
+ }
+ }
+
+ res = M_SUCCESS;
+ switch (expr.type) {
+ case EXPR_DIRECTIVE:
+ res = gen_directive(gen, &expr);
+ break;
+ case EXPR_CONSTANT:
+ res = gen_constant(gen, &expr.constant);
+ break;
+ case EXPR_INS:
+ res = gen_ins(gen, &expr);
+ break;
+ case EXPR_LABEL:
+ res = gen_label(gen, &expr.label);
+ break;
+ }
+
+ expr_free(&expr);
+ return res;
+}
+
+/* run codegen with the mips32r6 specification */
+int generate_mips32r6(struct generator *gen)
+{
+ gen->instructions_len = __MIPS32R6_INS_LEN;
+ gen->instructions = mips32r6_instructions;
+ gen->grammers_len = __MIPS32R6_GRAMMER_LEN;
+ gen->grammers = mips32r6_grammers;
+
+ int res;
+ while (res = generate(gen), 1) {
+ if (res == M_ERROR)
+ return M_ERROR;
+ if (res == M_EOF)
+ break;
+ }
+
+ return M_SUCCESS;
+}
+
+int generator_init(const char *file, struct generator *gen)
+{
+ if (parser_init(file, &gen->parser))
+ return M_ERROR;
+ if (symtab_init(&gen->symtab))
+ return M_ERROR;
+ gen->sections = NULL;
+ gen->sections_len = 0;
+ gen->sections_size = 0;
+ return M_SUCCESS;
+}
+
+void generator_free(struct generator *gen)
+{
+ parser_free(&gen->parser);
+ symtab_free(&gen->symtab);
+ for (size_t i = 0; i < gen->sections_len; i++)
+ section_free(&gen->sections[i]);
+ free(gen->sections);
+}