mips/masm/gen.c

813 lines
17 KiB
C
Raw Normal View History

2024-10-04 23:41:10 +00:00
#include <stdlib.h>
#include <merror.h>
#include <melf.h>
#include <mips32.h>
#include <mips32r6.h>
#include "tab.h"
#include "gen.h"
#include "parse.h"
///
/// section table
///
static void section_get_default_perm(struct section *sec, const char *name)
{
#define __LEN 7
static const struct perms {
char *name;
bool read;
bool write;
bool execute;
int alignment;
} defaults[__LEN] = {
{".text", true, false, true, 4},
{".code", true, false, true, 4},
{".data", true, true, false, 1},
{".stack", true, true, false, 1},
{".rodata", true, false, false, 1},
{".bss", true, true, false, 1},
{".robss", true, false, false, 1},
};
for (int i = 0; i < __LEN; i++) {
const struct perms *p = &defaults[i];
if (strcasecmp(name, p->name) != 0)
continue;
sec->read = p->read;
sec->write = p->write;
sec->execute = p->execute;
sec->align = p->alignment;
break;
}
}
static int section_get(struct generator *gen, struct section **res,
const struct string *const name)
{
/// find the section if it exists
for (size_t i = 0; i < gen->sections_len; i++) {
struct section *sec = &gen->sections[i];
if (sec->name.len != name->len)
continue;
if (strcmp(sec->name.str, name->str) != 0)
continue;
*res = sec;
return M_SUCCESS;
}
/// allocate a new one if it doesnt
size_t size = gen->sections_size ? gen->sections_size * 2 : 8;
void *new = realloc(gen->sections, size * sizeof(struct section));
if (new == NULL) {
PERROR("cannot realloc");
return M_ERROR;
}
gen->sections_size = size;
gen->sections = new;
struct section *sec = &gen->sections[gen->sections_len++];
// alloc reftab
if (reftab_init(&sec->reftab))
return M_ERROR;
// copy name
if (string_clone(&sec->name, name))
return M_ERROR;
// set defaults
sec->len = 0;
sec->size = 0;
sec->align = 1;
sec->data = NULL;
sec->read = true;
sec->write = true;
sec->execute = false;
section_get_default_perm(sec, name->str);
*res = sec;
return M_SUCCESS;
}
static int section_extend(struct section *section, size_t space)
{
size_t newlen = section->len + space;
if (newlen < section->size)
return M_SUCCESS;
size_t size = section->size ? section->size * 2 + newlen : newlen * 2;
void *new = realloc(section->data, size);
if (new == NULL) {
PERROR("cannot realloc");
return M_ERROR;
}
section->size = size;
section->data = new;
return M_SUCCESS;
}
static int section_push(struct section *section, void *data, size_t len)
{
size_t newlen = section->len + len;
size_t zeros = newlen % section->align;
if (zeros)
zeros = section->align - zeros;
if (section_extend(section, len + zeros))
return M_ERROR;
memset(section->data + section->len, 0, zeros);
memcpy(section->data + section->len + zeros, data, len);
section->len += len + zeros;
return M_SUCCESS;
}
static int section_zero(struct section *section, size_t len)
{
size_t zeros = section->len % section->align;
if (zeros)
zeros = section->align - zeros;
if (section_extend(section, len + zeros))
return M_ERROR;
memset(section->data + section->len, 0, len + zeros);
section->len += len + zeros;
return M_SUCCESS;
}
void section_free(struct section *section)
{
reftab_free(&section->reftab);
string_free(&section->name);
free(section->data);
}
///
/// generation functions
///
static void print_curr_line(struct generator *gen,
const struct expr *const expr)
{
int line = expr->line_no,
len = expr->byte_end - expr->byte_start,
nl = true,
c = EOF;
FILE *file = gen->parser.lexer.file;
fseek(file, expr->byte_start, SEEK_SET);
while (len--) {
c = getc(file);
if (c == EOF || c == '\0')
break;
if (nl) {
fprintf(stderr, "\t%d | ", line);
line++;
nl = false;
}
if (c == '\n')
nl = true;
putc(c, stderr);
}
}
static int gen_directive_whb(struct generator *gen, const void *data,
uint32_t count, uint32_t len)
{
// TODO: endianess
for (uint32_t i = 0; i < count; i++) {
void *ptr = (char *) data + (len * i);
if (section_push(gen->current, ptr, len))
return M_ERROR;
}
return M_SUCCESS;
}
static int gen_directive(struct generator *gen,
const struct expr *const e)
{
const struct expr_directive *const expr = &e->directive;
int res = M_SUCCESS;
switch (expr->type) {
case EXPR_DIRECTIVE_ALIGN:
if (expr->align < 1) {
ERROR("alignment cannot be zero");
print_curr_line(gen, e);
return M_ERROR;
}
gen->current->align = expr->align;
break;
case EXPR_DIRECTIVE_SPACE:
res = section_zero(gen->current, expr->space);
break;
case EXPR_DIRECTIVE_WORD:
res = gen_directive_whb(gen, expr->words, expr->len,
sizeof(uint32_t));
break;
case EXPR_DIRECTIVE_HALF:
res = gen_directive_whb(gen, expr->halfs, expr->len,
sizeof(uint16_t));
break;
case EXPR_DIRECTIVE_BYTE:
res = gen_directive_whb(gen, expr->bytes, expr->len,
sizeof(uint8_t));
break;
case EXPR_DIRECTIVE_SECTION:
res = section_get(gen, &gen->current, &expr->section);
break;
case EXPR_DIRECTIVE_EXTERN: {
struct symbol *sym;
res = symtab_find_or_stub(&gen->symtab, &sym, &expr->label);
if (res == M_SUCCESS)
sym->type = SYM_EXTERN;
break;
}
case EXPR_DIRECTIVE_GLOBL: {
struct symbol *sym;
res = symtab_find_or_stub(&gen->symtab, &sym, &expr->label);
if (res == M_SUCCESS)
sym->type = SYM_GLOBAL;
break;
}
case EXPR_DIRECTIVE_ASCII:
res = section_push(gen->current, expr->string.str,
expr->string.len - 1);
break;
case EXPR_DIRECTIVE_ASCIIZ:
res = section_push(gen->current, expr->string.str,
expr->string.len);
break;
}
return res;
}
static int gen_constant(struct generator *gen, struct expr_const *const expr)
{
(void) gen;
(void) expr;
ERROR("constants not yet implemented");
return M_ERROR;
}
static enum grammer_type get_gmr_type(const char *name, size_t *len)
{
#define CHK(part, str) { \
if (strncasecmp(str, name, strlen(str)) == 0) { \
*len = strlen(str); \
return GMR_ ##part; \
}} \
CHK(RD, "rd")
CHK(RS, "rs")
CHK(RT, "rt")
CHK(IMMD, "immd")
CHK(OFFSET_BASE, "offset(base)")
CHK(OFFSET, "offset")
CHK(TARGET, "target")
CHK(HI, "hi")
CHK(LO, "lo")
#undef CHK
ERROR("!!! BUG: this should never hit !!!");
exit(1);
}
static int parse_register(enum mips32_register *reg, struct string *name)
{
int len = name->len;
int c0 = len > 0 ? name->str[0] : '\0',
c1 = len > 1 ? name->str[1] : '\0',
c2 = len > 2 ? name->str[2] : '\0',
c3 = len > 3 ? name->str[3] : '\0';
// $zero
if (c0 == 'z') {
if (c1 == 'e' && c2 == 'r' && c3 == 'o') {
*reg = MIPS32_REG_ZERO;
return M_SUCCESS;
}
}
// $a0-a3 $at
else if (c0 == 'a') {
if (c1 == 't') {
*reg = MIPS32_REG_AT;
return M_SUCCESS;
}
if (c1 >= '0' && c1 <= '3') {
*reg = MIPS32_REG_A0;
*reg += c1 - '0';
return M_SUCCESS;
}
}
// $v0-v1
else if (c0 == 'v') {
if (c1 >= '0' && c1 <= '1') {
*reg = MIPS32_REG_V0;
*reg += c1 - '0';
return M_SUCCESS;
}
}
// $t0-t9
else if (c0 == 't') {
if (c1 >= '0' && c1 <= '7') {
*reg = MIPS32_REG_T0;
*reg += c1 - '0';
return M_SUCCESS;
}
// reg T8-T9 are not in order with T0-T7
if (c1 >= '8' && c1 <= '9') {
*reg = MIPS32_REG_T8;
*reg += c1 - '8';
return M_SUCCESS;
}
}
// $s0-s7 $sp
else if (c0 == 's') {
if (c1 >= '0' && c1 <= '7') {
*reg = MIPS32_REG_S0;
*reg += c1 - '0';
return M_SUCCESS;
}
if (c1 == 'p') {
*reg = MIPS32_REG_SP;
return M_SUCCESS;
}
}
// $k0-k1
else if (c0 == 'k') {
if (c1 >= '0' && c1 <= '1') {
*reg = MIPS32_REG_K0;
*reg += c1 - '0';
return M_SUCCESS;
}
}
// $gp
else if (c0 == 'g') {
if (c1 == 'p') {
*reg = MIPS32_REG_GP;
return M_SUCCESS;
}
}
// $fp
else if (c0 == 'f') {
if (c1 == 'p') {
*reg = MIPS32_REG_FP;
return M_SUCCESS;
}
}
// $rp
else if (c0 == 'r') {
if (c1 == 'a') {
*reg = MIPS32_REG_RA;
return M_SUCCESS;
}
}
// $0-31 (non aliased register names)
else if (c0 >= '0' && c0 <= '9') {
int i = c0 - '0';
if (c1 >= '0' && c1 <= '9') {
i *= 10;
i += c1 - '0';
}
if (i <= 31) {
*reg = i;
return M_SUCCESS;
}
}
ERROR("unknown register $%.*s", name->len, name->str);
return M_ERROR;
}
static int gen_ins_read_state(struct generator *gen,
struct expr *const expr,
struct gen_ins_state *state,
struct mips32_grammer *grammer)
{
char *ptr = grammer->grammer;
uint32_t argi = 0;
// read values into state
while (*ptr != '\0') {
if (argi >= expr->instruction.args_len) {
ERROR("not enough arguments passed");
print_curr_line(gen, expr);
return M_ERROR;
}
struct expr_ins_arg *arg = &expr->instruction.args[argi++];
size_t skip;
switch (get_gmr_type(ptr, &skip)) {
case GMR_RD:
// rd
if (arg->type != EXPR_INS_ARG_REGISTER) {
ERROR("expected a register");
print_curr_line(gen, expr);
return M_ERROR;
}
if (parse_register(&state->rd, &arg->reg)) {
print_curr_line(gen, expr);
return M_ERROR;
}
break;
case GMR_RS:
// rs
if (arg->type != EXPR_INS_ARG_REGISTER) {
ERROR("expected a register");
print_curr_line(gen, expr);
return M_ERROR;
}
if (parse_register(&state->rs, &arg->reg)) {
print_curr_line(gen, expr);
return M_ERROR;
}
break;
case GMR_RT:
// rt
if (arg->type != EXPR_INS_ARG_REGISTER) {
ERROR("expected a register");
print_curr_line(gen, expr);
return M_ERROR;
}
if (parse_register(&state->rt, &arg->reg)) {
print_curr_line(gen, expr);
return M_ERROR;
}
break;
case GMR_IMMD:
// immd
if (arg->type != EXPR_INS_ARG_IMMEDIATE) {
ERROR("expected an immediate");
print_curr_line(gen, expr);
return M_ERROR;
}
state->immd = arg->immd;
break;
case GMR_OFFSET:
// offset
state->offset = 0;
if (arg->type == EXPR_INS_ARG_IMMEDIATE)
state->offset = arg->immd;
else if (arg->type == EXPR_INS_ARG_LABEL)
state->label = &arg->label;
else {
ERROR("invalid instruction");
print_curr_line(gen, expr);
return M_ERROR;
}
break;
case GMR_OFFSET_BASE:
// offset(base)
if (arg->type != EXPR_INS_ARG_OFFSET) {
ERROR("expected an offset($base)");
print_curr_line(gen, expr);
return M_ERROR;
}
state->offset = arg->offset.immd;
if (parse_register(&state->base, &arg->offset.reg)) {
print_curr_line(gen, expr);
return M_ERROR;
}
break;
case GMR_TARGET:
// target
state->target = 0;
if (arg->type == EXPR_INS_ARG_IMMEDIATE)
state->target = arg->immd;
else if (arg->type == EXPR_INS_ARG_LABEL)
state->label = &arg->label;
else {
ERROR("invalid instruction");
print_curr_line(gen, expr);
return M_ERROR;
}
break;
default:
break;
}
// skip entry
ptr += skip;
// skip comma
if (*ptr == ',') {
ptr++;
continue;
} else if (*ptr == '\0') {
break;
} else {
ERROR("!! BUG3: invalid splitting char %c !!!", *ptr);
exit(1);
}
}
return M_SUCCESS;
}
static int gen_ins_write_state(
struct generator *gen,
union mips32_instruction ins, // the instruction to modify
struct gen_ins_state *state, // the current read state
char *grammer) // the gramemr to parse
{
char *ptr = grammer;
enum reference_type reftype = REF_NONE;
// read values into state
while (*ptr != '\0') {
// parse next dsl entry
size_t skip;
enum grammer_type gmr = get_gmr_type(ptr, &skip);
// check for dsl hardcoded register argument
bool hardcoded = false;
enum mips32_register hard_reg;
if (*(ptr + skip) == '=') {
// parse argument
char *rptr = ptr + skip + 2;
hardcoded = true;
struct string regname;
string_bss(&regname, rptr);
if (parse_register(&hard_reg, &regname)) {
ERROR("!!! BUG2: this should never hit !!!");
exit(1);
}
}
// skip till next comma
for (;*ptr != '\0' && *ptr != ','; ptr++);
if (*ptr == ',')
ptr++;
switch (gmr) {
case GMR_RD:
ins.rd = hardcoded ? hard_reg : state->rd;
break;
case GMR_RS:
ins.rs = hardcoded ? hard_reg : state->rs;
break;
case GMR_RT:
ins.rt = hardcoded ? hard_reg : state->rt;
break;
case GMR_IMMD:
ins.immd = state->immd;
break;
case GMR_OFFSET:
ins.offset = state->offset;
reftype = REF_MIPS_16;
break;
case GMR_OFFSET_BASE:
ins.offset = state->offset;
ins.rs = state->base;
reftype = REF_MIPS_16;
break;
case GMR_TARGET:
ins.target = state->target;
reftype = REF_MIPS_26;
break;
case GMR_HI:
ins.immd = state->target >> 16;
reftype = REF_MIPS_HI16;
break;
case GMR_LO:
ins.immd = state->target & 0x0000FFFF;
reftype = REF_MIPS_LO16;
break;
}
}
// get offset for reference (if needed)
uint32_t offset = gen->current->len;
size_t zeros = offset % gen->current->align;
if (zeros)
zeros = gen->current->align - zeros;
offset += zeros;
// write instructon to section
uint32_t raw = B32(ins.raw);
if (section_push(gen->current, &raw, sizeof(uint32_t))) {
return M_ERROR;
}
// create reference (if needed)
if (reftype != REF_NONE && state->label != NULL) {
struct symbol *sym;
if (symtab_find_or_stub(&gen->symtab, &sym, state->label))
return M_ERROR;
struct reference ref = {
.type = reftype,
.symbol = sym,
.offset = offset
};
if (reftab_push(&gen->current->reftab, &ref)) {
return M_ERROR;
}
}
return M_SUCCESS;
}
static int gen_ins(struct generator *gen, struct expr *const expr)
{
struct mips32_grammer *grammer = NULL;
for (uint32_t i = 0; i < gen->grammers_len; i++) {
struct mips32_grammer *temp = &gen->grammers[i];
if (strcasecmp(temp->name, expr->instruction.name.str) != 0)
continue;
grammer = temp;
break;
}
if (grammer == NULL) {
ERROR("unknown instruction");
print_curr_line(gen, expr);
return M_ERROR;
}
struct gen_ins_state state;
state.label = NULL;
// read in the values from the parser
if (gen_ins_read_state(gen, expr, &state, grammer))
return M_ERROR;
// write the values into the instructions
// ...and then the sections
if (grammer->pseudo_len > 0) {
// write pseudo
for (int i = 0; i < grammer->pseudo_len; i++) {
union mips32_instruction ins = gen->instructions[
grammer->pseudo_grammer[i].enum_index];
if (gen_ins_write_state(gen, ins, &state,
grammer->pseudo_grammer[i].update))
return M_ERROR;
}
} else {
// write real
union mips32_instruction ins
= gen->instructions[grammer->enum_index];
if (gen_ins_write_state(gen, ins, &state, grammer->grammer))
return M_ERROR;
}
return M_SUCCESS;
}
static int gen_label(struct generator *gen, struct string *const label)
{
uint32_t offset = gen->current->len;
ptrdiff_t secidx = gen->current - gen->sections;
size_t zeros = offset % gen->current->align;
if (zeros)
zeros = gen->current->align - zeros;
offset += zeros;
struct symbol *sym;
/* update existing symbol (if exists) */
if (symtab_find(&gen->symtab, &sym, label->str) == M_SUCCESS) {
if (sym->secidx != SYM_SEC_STUB) {
// symbols that are not labeled stub are fully defined,
// it is a error to redefine them
ERROR("redefined symbol '%s'", label->str);
return M_ERROR;
}
sym->secidx = secidx;
sym->offset = offset;
/* create a new symbol */
} else {
struct symbol new = {
.secidx = secidx,
.offset = offset,
.type = SYM_LOCAL,
};
if (string_clone(&new.name, label))
return M_ERROR;
if (symtab_push(&gen->symtab, &new)) {
string_free(&new.name);
return M_ERROR;
}
}
return M_SUCCESS;
}
/* run codegen */
static int generate(struct generator *gen)
{
struct expr expr;
int res = M_SUCCESS;
// get the next expression
if ((res = parser_next(&gen->parser, &expr)))
return res;
// if its not a segment directive
// (and we dont have a section)
// create the default
if ((
expr.type != EXPR_DIRECTIVE ||
expr.directive.type != EXPR_DIRECTIVE_SECTION) &&
gen->current == NULL) {
// create .data section
struct string temp = {
.str = ".data",
.len = 5,
.size = 5,
.allocated = false
};
if (section_get(gen, &gen->current, &temp)) {
expr_free(&expr);
return M_ERROR;
}
}
res = M_SUCCESS;
switch (expr.type) {
case EXPR_DIRECTIVE:
res = gen_directive(gen, &expr);
break;
case EXPR_CONSTANT:
res = gen_constant(gen, &expr.constant);
break;
case EXPR_INS:
res = gen_ins(gen, &expr);
break;
case EXPR_LABEL:
res = gen_label(gen, &expr.label);
break;
}
expr_free(&expr);
return res;
}
/* run codegen with the mips32r6 specification */
int generate_mips32r6(struct generator *gen)
{
gen->instructions_len = __MIPS32R6_INS_LEN;
gen->instructions = mips32r6_instructions;
gen->grammers_len = __MIPS32R6_GRAMMER_LEN;
gen->grammers = mips32r6_grammers;
int res;
while (res = generate(gen), 1) {
if (res == M_ERROR)
return M_ERROR;
if (res == M_EOF)
break;
}
return M_SUCCESS;
}
int generator_init(const char *file, struct generator *gen)
{
if (parser_init(file, &gen->parser))
return M_ERROR;
if (symtab_init(&gen->symtab))
return M_ERROR;
gen->sections = NULL;
gen->sections_len = 0;
gen->sections_size = 0;
return M_SUCCESS;
}
void generator_free(struct generator *gen)
{
parser_free(&gen->parser);
symtab_free(&gen->symtab);
for (size_t i = 0; i < gen->sections_len; i++)
section_free(&gen->sections[i]);
free(gen->sections);
}