summaryrefslogtreecommitdiff
path: root/masm
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--masm/asm.c645
-rw-r--r--masm/asm.h197
-rw-r--r--masm/gen.c812
-rw-r--r--masm/gen.h118
-rw-r--r--masm/lex.c223
-rw-r--r--masm/lex.h86
-rw-r--r--masm/out.obin0 -> 644 bytes
-rw-r--r--masm/parse.c1459
-rw-r--r--masm/parse.h140
-rw-r--r--masm/reftab.c43
-rw-r--r--masm/reltab.c43
-rw-r--r--masm/sectab.c166
-rw-r--r--masm/string.c81
-rw-r--r--masm/strtab.c8
-rw-r--r--masm/symtab.c108
-rw-r--r--masm/tab.h98
16 files changed, 2111 insertions, 2116 deletions
diff --git a/masm/asm.c b/masm/asm.c
index 328ae16..619f9e5 100644
--- a/masm/asm.c
+++ b/masm/asm.c
@@ -1,5 +1,4 @@
#include <merror.h>
-#include <mips.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
@@ -9,8 +8,9 @@
#include <melf.h>
#include "asm.h"
+#include "gen.h"
#include "mlimits.h"
-#include "parse.h"
+#include "tab.h"
extern char *current_file;
@@ -19,325 +19,158 @@ extern char *current_file;
#define SEC_ALIGN 0x1000
-static int create_symbol(struct assembler *assembler,
- const char name[MAX_LEX_LENGTH],
- ssize_t section_idx,
- size_t section_offset,
- unsigned char bind)
-{
- size_t str_off;
- if (strtab_write_str(&assembler->strtab, name, &str_off))
- return M_ERROR;
+static int elf_rel_type(enum reference_type ty) {
+ switch (ty) {
+ case REF_NONE:
+ return R_MIPS_NONE;
+ case REF_MIPS_16:
+ return R_MIPS_16;
+ case REF_MIPS_26:
+ return R_MIPS_26;
+ case REF_MIPS_PC16:
+ return R_MIPS_PC16;
+ case REF_MIPS_LO16:
+ return R_MIPS_LO16;
+ case REF_MIPS_HI16:
+ return R_MIPS_HI16;
+ }
- Elf32_Sym symbol = {
- .st_name = B32(str_off),
- .st_value = B32(section_offset),
- .st_size = 0,
- .st_info = ELF32_ST_INFO(bind, STT_NOTYPE),
- .st_other = ELF32_ST_VISIBILITY(STV_DEFAULT),
- .st_shndx = B16(section_idx),
- };
+ return R_MIPS_NONE;
+}
- // dont put magic flag values inside symbol, only real indexes
- if (section_idx < 0)
- symbol.st_shndx = 0;
+static int elf_section_init_reltab(struct section *sec,
+ struct elf_section *elf_sec)
+{
+ Elf32_Rel *reltab = malloc(sizeof(Elf32_Rel) *
+ sec->reftab.len);
- if (symtab_push(&assembler->symtab, symbol, section_idx))
+ if (reltab == NULL) {
+ PERROR("cannot alloc");
return M_ERROR;
+ }
+ for (uint32_t i = 0; i < sec->reftab.len; i++) {
+ Elf32_Rel *rel = &reltab[i];
+ struct reference *ref = &sec->reftab.references[i];
+ rel->r_offset = B32(ref->offset);
+ int sym = ref->symbol->tabidx + 1;
+ int type = elf_rel_type(ref->type);
+ rel->r_info = B32(ELF32_R_INFO(sym, type));
+ }
+
+ elf_sec->reltab_len = sec->reftab.len;
+ elf_sec->reltab = reltab;
return M_SUCCESS;
}
-static int find_symbol_or_stub(struct assembler *assembler,
- const char name[MAX_LEX_LENGTH],
- Elf32_Sym **res,
- size_t *res2)
+static int elf_section_init(struct section *sec, struct elf_section *elf_sec)
{
- if (symtab_find(&assembler->symtab, res, res2, name) == M_SUCCESS)
- return M_SUCCESS;
+ elf_sec->data = sec;
+ elf_sec->shdr_idx = 0; // dont know yet
+ elf_sec->reltab_shidx = 0; // dont know yet
+ elf_sec->reltab_len = sec->reftab.len;
+ elf_sec->reltab = NULL;
- if (create_symbol(assembler, name, SYMSEC_STUB, 0, STB_LOCAL))
+ if (sec->reftab.len && elf_section_init_reltab(sec, elf_sec))
return M_ERROR;
- size_t idx = assembler->symtab.len - 1;
-
- if (res != NULL)
- *res = &assembler->symtab.symbols[idx];
- if (res2 != NULL)
- *res2 = idx;
-
return M_SUCCESS;
}
-static int handle_directive(struct assembler *assembler,
- struct mips_directive *directive)
+/* free an elf section */
+static void elf_section_free(struct elf_section *sec)
{
- switch (directive->type) {
- case MIPS_DIRECTIVE_SECTION: {
- struct section_table *sec_tbl = &assembler->sectab;
- struct section *sec;
- if (sectab_get(sec_tbl, &sec, directive->name)
- == M_SUCCESS) {
- sec_tbl->current = sec;
- break;
- }
-
- if (sectab_alloc(sec_tbl, &sec, directive->name))
- return M_ERROR;
-
- sec_tbl->current = sec;
- break;
- }
-
- case MIPS_DIRECTIVE_ALIGN: {
- assembler->sectab.current->alignment =
- 1 << directive->align;
- if (assembler->sectab.current->alignment == 0) {
- ERROR("cannot align to zero");
- return M_ERROR;
- }
- break;
- }
-
- case MIPS_DIRECTIVE_SPACE: {
- struct section_entry entry;
- entry.type = ENT_NO_DATA;
- entry.size = directive->space;
- if (sec_push(assembler->sectab.current, entry))
- return M_ERROR;
- break;
- }
-
- case MIPS_DIRECTIVE_WORD: {
- for (uint32_t i = 0; i < directive->len; i++) {
- struct section_entry entry;
- entry.type = ENT_WORD;
- entry.word = directive->words[i];
- entry.size = sizeof(uint32_t);
- if (sec_push(assembler->sectab.current,
- entry))
- return M_ERROR;
- }
- break;
- }
+ if (sec->reltab != NULL)
+ free(sec->reltab);
+}
- case MIPS_DIRECTIVE_HALF: {
- for (uint32_t i = 0; i < directive->len; i++) {
- struct section_entry entry;
- entry.type = ENT_HALF;
- entry.half = directive->halfs[i];
- entry.size = sizeof(uint16_t);
- if (sec_push(assembler->sectab.current,
- entry))
- return M_ERROR;
- }
- break;
- }
+static int asm_init_sections(struct assembler *assembler)
+{
+ struct section *sections = assembler->gen.sections;
+ uint32_t len = assembler->gen.sections_len;
- case MIPS_DIRECTIVE_BYTE: {
- for (uint32_t i = 0; i < directive->len; i++) {
- struct section_entry entry;
- entry.type = ENT_BYTE;
- entry.byte = directive->bytes[i];
- entry.size = sizeof(uint8_t);
- if (sec_push(assembler->sectab.current,
- entry))
- return M_ERROR;
- }
- break;
+ struct elf_section *elftab = malloc(sizeof(struct elf_section) * len);
+ if (elftab == NULL) {
+ PERROR("cannot alloc");
+ return M_ERROR;
}
- case MIPS_DIRECTIVE_EXTERN: {
- if (symtab_find(&assembler->symtab, NULL, NULL,
- directive->name) == M_SUCCESS) {
- ERROR("cannot extern local symbol '%s'",
- directive->name);
+ for (uint32_t i = 0; i < len; i++) {
+ struct elf_section *elfsec = &elftab[i];
+ elfsec->data = &sections[i];
+ if (elf_section_init(&sections[i], elfsec)) {
+ free(elftab);
return M_ERROR;
}
-
- if (create_symbol(assembler, directive->name, SYMSEC_EXTERN, 0,
- STB_GLOBAL))
- return M_ERROR;
-
- break;
- }
-
- case MIPS_DIRECTIVE_GLOBL: {
- Elf32_Sym *sym;
- if (symtab_find(&assembler->symtab, &sym, NULL,
- directive->name) == M_SUCCESS) {
- sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_NOTYPE);
- break;
- }
-
- if (create_symbol(assembler, directive->name, SYMSEC_STUB, 0,
- STB_GLOBAL))
- return M_ERROR;
-
- break;
}
- case MIPS_DIRECTIVE_ASCII: {
- struct section_entry entry;
- entry.type = ENT_STR;
- entry.size = strlen(directive->name);
- memcpy(entry.str, directive->name, entry.size);
- if (sec_push(assembler->sectab.current, entry))
- return M_ERROR;
- break;
- }
-
- case MIPS_DIRECTIVE_ASCIIZ: {
- struct section_entry entry;
- entry.type = ENT_STR;
- entry.size = strlen(directive->name) + 1;
- memcpy(entry.str, directive->name, entry.size);
- if (sec_push(assembler->sectab.current, entry))
- return M_ERROR;
- break;
- }
- }
-
- return M_SUCCESS;
+ assembler->sections = elftab;
+ assembler->section_len = len;
+ return M_SUCCESS;
}
-static int handle_label(struct assembler *assembler,
- const char name[MAX_LEX_LENGTH])
-{
- struct section *cur = assembler->sectab.current;
-
- Elf32_Sym *ref;
- size_t symidx;
-
- if (symtab_find(&assembler->symtab, &ref, &symidx, name) == M_SUCCESS) {
- ssize_t *sec = &assembler->symtab.sections[symidx];
-
- // check if the symbol is acutally jus a stub, if so
- // we need to update it
- if (*sec == SYMSEC_STUB) {
- *sec = cur->index;
- ref->st_value = B32(sec_size(cur));
- return M_SUCCESS;
- }
-
- ERROR("redefined symbol '%s'", name);
- return M_ERROR;
+static int elf_sym_bind(enum symbol_type ty) {
+ switch (ty) {
+ case SYM_LOCAL:
+ return STB_LOCAL;
+ case SYM_GLOBAL:
+ return STB_GLOBAL;
+ case SYM_EXTERN:
+ return STB_GLOBAL;
}
- if (create_symbol(assembler, name, cur->index, sec_size(cur),
- STB_LOCAL))
- return M_ERROR;
-
- return M_SUCCESS;
+ return STB_GLOBAL;
}
-static int handle_ins(struct assembler *assembler,
- struct ins_expr *expr)
-{
- struct section *sec = assembler->sectab.current;
- size_t secidx = sec->len;
-
- for (size_t i = 0; i < expr->ins_len; i++) {
- union mips_instruction_data *ins =
- &expr->ins[i].data;
- struct reference *ref =
- &expr->ref[i];
- struct section_entry entry;
-
- entry.type = ENT_INS;
- entry.size = sizeof(union mips_instruction_data);
- entry.ins = B32(ins->raw);
+static int asm_init_symtab(struct assembler *assembler) {
+ struct symbol_table *symtab = &assembler->gen.symtab;
+ size_t len = symtab->len + 1;
+ Elf32_Sym *elftab = malloc(sizeof(Elf32_Sym) * len);
+ if (elftab == NULL) {
+ PERROR("cannot alloc");
+ }
- if (sec_push(sec, entry))
- return M_ERROR;
+ // add null entry
+ elftab[0] = (Elf32_Sym) {0};
- if (ref->type == R_MIPS_NONE)
- continue;
+ // add rest of the entries
+ for (uint32_t i = 0; i < symtab->len; i++) {
+ struct symbol *sym = &symtab->symbols[i];
+ int bind = elf_sym_bind(sym->type);
+ int type = STT_NOTYPE;
- size_t symidx;
- if (find_symbol_or_stub(assembler, ref->name, NULL, &symidx))
+ // get name
+ size_t str_off;
+ if (strtab_write_str(&assembler->strtab, sym->name.str,
+ &str_off)) {
+ free(elftab);
return M_ERROR;
+ }
- Elf32_Rela rel = {
- .r_info = B32(ELF32_R_INFO(symidx, ref->type)),
- .r_addend = B32(ref->addend),
- .r_offset = B32(sec_index(sec, secidx + i)),
+ elftab[i+1] = (Elf32_Sym) {
+ .st_name = B32(str_off),
+ .st_info = ELF32_ST_INFO(bind, type),
+ .st_size = 0,
+ .st_other = 0,
+ .st_value = B32(sym->offset),
+ .st_shndx = 0,
};
-
- if (reltab_push(&sec->reltab, rel))
- return M_ERROR;
}
- return M_SUCCESS;
-}
-
-static int parse_file(struct assembler *assembler)
-{
- struct parser *parser = &assembler->parser;
-
- while (1) {
- struct expr expr;
- int res = parser_next(parser, &expr);
-
- if (res == M_ERROR)
- return M_ERROR;
-
- if (res == M_EOF)
- return M_SUCCESS;
-
- switch (expr.type) {
- case EXPR_INS:
- if (handle_ins(assembler, &expr.ins))
- return M_ERROR;
- break;
- case EXPR_DIRECTIVE:
- if (handle_directive(assembler,
- &expr.directive))
- return M_ERROR;
- break;
-
- case EXPR_LABEL:
- if (handle_label(assembler, expr.label))
- return M_ERROR;
- break;
-
- case EXPR_CONSTANT:
- break;
- }
- }
+ assembler->symbols = elftab;
+ assembler->symtab_len = len;
return M_SUCCESS;
}
-static int assemble_phdr(struct assembler *assembler, Elf32_Phdr **res,
- uint32_t *res2)
+static int parse_file(struct assembler *assembler)
{
- Elf32_Phdr *phdr = malloc(sizeof(Elf32_Phdr) *
- assembler->sectab.len);
- if (phdr == NULL) {
- PERROR("cannot alloc");
- return M_ERROR;;
- }
-
- for (uint32_t i = 0; i < assembler->sectab.len; i++) {
- Elf32_Phdr *hdr = &phdr[i];
- struct section *sec = &assembler->sectab.sections[i];
- size_t size = sec_size(sec);
- hdr->p_type = B32(PT_LOAD);
- hdr->p_flags = B32(
- (sec->execute << 0) |
- (sec->write << 1) |
- (sec->read << 2));
- hdr->p_offset = 0;
- hdr->p_vaddr = 0;
- hdr->p_paddr = 0;
- hdr->p_filesz = B32(size);
- hdr->p_memsz = B32(size);
- hdr->p_align = B32(SEC_ALIGN);
- }
-
- *res = phdr;
- *res2 = assembler->sectab.len;
+ if (generate_mips32r6(&assembler->gen))
+ return M_ERROR;
+ if (asm_init_sections(assembler))
+ return M_ERROR;
+ if (asm_init_symtab(assembler))
+ return M_ERROR;
return M_SUCCESS;
}
@@ -349,8 +182,8 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res,
max_entries += 1; // symtab
max_entries += 1; // strtab
max_entries += 1; // shtrtab
- max_entries += assembler->sectab.len; // sections
- max_entries += assembler->sectab.len; // reltabs per section
+ max_entries += assembler->section_len; // sections
+ max_entries += assembler->section_len; // reltabs per section
Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * max_entries);
@@ -366,16 +199,17 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res,
shdr[count++] = (Elf32_Shdr) {0};
// reltables
- for (uint32_t i = 0; i < assembler->sectab.len; i++) {
- struct section *sec = &assembler->sectab.sections[i];
- const char *prefix = ".reltab.";
- char reltab_name[MAX_LEX_LENGTH + 8];
+ for (uint32_t i = 0; i < assembler->section_len; i++) {
+ struct elf_section *sec = &assembler->sections[i];
+ const char *prefix = ".reltab";
+ char reltab_name[MAX_LEX_LENGTH + strlen(prefix)];
- if (sec->reltab.len == 0)
+ if (sec->reltab_len == 0)
continue;
strcpy(reltab_name, prefix);
- strcat(reltab_name, sec->name);
+ strncat(reltab_name, sec->data->name.str,
+ MAX_LEX_LENGTH - strlen(prefix));
if (strtab_write_str(&assembler->shstrtab,
reltab_name, &str_off)) {
@@ -386,7 +220,7 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res,
sec->reltab_shidx = count;
shdr[count++] = (Elf32_Shdr) {
.sh_name = B32(str_off),
- .sh_type = B32(SHT_RELA),
+ .sh_type = B32(SHT_REL),
.sh_flags = 0,
.sh_addr = 0,
.sh_offset = 0,
@@ -394,41 +228,41 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = B32(1),
- .sh_entsize = B32(sizeof(Elf32_Rela)),
+ .sh_entsize = B32(sizeof(Elf32_Rel)),
};
}
// for each section
- for (uint32_t i = 0; i < assembler->sectab.len; i++) {
- struct section *sec = &assembler->sectab.sections[i];
- char name[MAX_LEX_LENGTH+1] = ".";
+ for (uint32_t i = 0; i < assembler->section_len; i++) {
+ struct elf_section *sec = &assembler->sections[i];
+ const char *name = sec->data->name.str;
- strcat(name, sec->name);
if (strtab_write_str(&assembler->shstrtab, name, &str_off)) {
free(shdr);
return M_ERROR;
}
sec->shdr_idx = count;
- if (sec->reltab.len != 0)
+ if (sec->reltab_len != 0)
shdr[sec->reltab_shidx].sh_info = B32(count);
- shdr[count++] = (Elf32_Shdr){
- .sh_name = B32(str_off),
- .sh_type = B32(SHT_PROGBITS),
- .sh_flags = B32(
- (sec->write << 0) |
- (sec->execute << 2) |
+ shdr[count++] = (Elf32_Shdr){
+ .sh_name = B32(str_off),
+ .sh_type = B32(sec->data->execute ?
+ SHT_PROGBITS : SHT_NOBITS),
+ .sh_flags = B32(
+ (sec->data->write << 0) |
+ (sec->data->execute << 2) |
SHF_ALLOC),
- .sh_addr = 0,
- .sh_offset = 0,
- .sh_size = 0,
- .sh_link = 0,
- .sh_info = 0,
- .sh_addralign = B32(sec->alignment),
- .sh_entsize = 0,
- };
- }
+ .sh_addr = 0,
+ .sh_offset = 0,
+ .sh_size = 0,
+ .sh_link = 0,
+ .sh_info = 0,
+ .sh_addralign = B32(SEC_ALIGN),
+ .sh_entsize = 0,
+ };
+ }
// symbol table
if (strtab_write_str(&assembler->shstrtab, ".symtab", &str_off)) {
@@ -490,9 +324,9 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res,
.sh_entsize = 0,
};
- for (uint32_t i = 0; i < assembler->sectab.len; i++) {
- struct section *sec = &assembler->sectab.sections[i];
- if (sec->reltab.len == 0)
+ for (uint32_t i = 0; i < assembler->section_len; i++) {
+ struct elf_section *sec = &assembler->sections[i];
+ if (sec->reltab_len == 0)
continue;
shdr[sec->reltab_shidx].sh_link =
B32(assembler->symtab_shidx);
@@ -507,61 +341,53 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res,
static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr)
{
Elf32_Shdr *shdr = (Elf32_Shdr *) assembler->shdr;
- Elf32_Phdr *phdr = (Elf32_Phdr *) assembler->phdr;
uint32_t ptr = 0;
// we must now correct offets and sizes inside the ehdr, phdr,
// and shdr
ptr += sizeof(Elf32_Ehdr);
- // phdr
- ehdr->e_phoff = B32(ptr);
- ptr += assembler->phdr_len * sizeof(Elf32_Phdr);
-
// reltbls
- for (uint32_t i = 0; i < assembler->sectab.len; i++) {
- struct section *sec = &assembler->sectab.sections[i];
- if (sec->reltab.len == 0)
+ for (uint32_t i = 0; i < assembler->section_len; i++) {
+ struct elf_section *sec = &assembler->sections[i];
+ if (sec->reltab_len == 0)
continue;
int idx = sec->reltab_shidx;
- int len = sec->reltab.len;
+ int len = sec->reltab_len;
shdr[idx].sh_offset = B32(ptr);
- shdr[idx].sh_size = B32(len * sizeof(Elf32_Rela));
- ptr += len * sizeof(Elf32_Rela);
- }
-
- // section padding
- {
- uint32_t mod = ptr % SEC_ALIGN;
- if (mod != 0)
- assembler->secalign = (SEC_ALIGN - mod);
- else
- assembler->secalign = 0;
- ptr += assembler->secalign;
+ shdr[idx].sh_size = B32(len * sizeof(Elf32_Rel));
+ ptr += len * sizeof(Elf32_Rel);
}
// sections
size_t v_addr = 0;
- for (uint32_t i = 0; i < assembler->sectab.len; i++) {
- struct section *sec = &assembler->sectab.sections[i];
+ for (uint32_t i = 0; i < assembler->section_len; i++) {
+
+ size_t pad = v_addr % SEC_ALIGN;
+ if (pad)
+ pad = SEC_ALIGN - pad;
+ v_addr += pad;
+
+ struct elf_section *sec = &assembler->sections[i];
uint32_t idx = sec->shdr_idx;
- uint32_t size = ntohl(phdr[i].p_filesz);
- phdr[i].p_offset = B32(ptr);
- phdr[i].p_vaddr = B32(v_addr);
- phdr[i].p_paddr = B32(v_addr);
+ uint32_t size = sec->data->len;
shdr[idx].sh_offset = B32(ptr);
- shdr[idx].sh_size = phdr[i].p_filesz;
- shdr[idx].sh_addr = phdr[i].p_vaddr;
+ shdr[idx].sh_size = B32(size);
+ shdr[idx].sh_addr = B32(v_addr);
v_addr += size;
ptr += size;
}
// symtab
- shdr[assembler->symtab_shidx].sh_offset = B32(ptr);
- shdr[assembler->symtab_shidx].sh_link = B32(assembler->strtab_shidx);
- shdr[assembler->symtab_shidx].sh_size =
- B32(assembler->symtab.len * sizeof(Elf32_Sym));
- ptr += assembler->symtab.len * sizeof(Elf32_Sym);
+ {
+ uint32_t len = assembler->symtab_len;
+ uint32_t size = len * sizeof(Elf32_Sym);
+ shdr[assembler->symtab_shidx].sh_offset = B32(ptr);
+ shdr[assembler->symtab_shidx].sh_link =
+ B32(assembler->strtab_shidx);
+ shdr[assembler->symtab_shidx].sh_size = B32(size);
+ ptr += size;
+ }
// strtab
shdr[assembler->strtab_shidx].sh_offset = B32(ptr);
@@ -573,24 +399,10 @@ static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr)
shdr[assembler->shstrtab_shidx].sh_size =
B32(assembler->shstrtab.size);
ptr += assembler->shstrtab.size;
-
// shdr
ehdr->e_shoff = B32(ptr);
}
-static void update_sym_shindx(struct assembler *assembler)
-{
- for (size_t i = 0; i < assembler->symtab.len; i++) {
- Elf32_Sym *sym = &assembler->symtab.symbols[i];
- ssize_t sec = assembler->symtab.sections[i];
-
- if (sec >= 0) {
- sym->st_shndx = B16(assembler->
- sectab.sections[sec].shdr_idx);
- }
- }
-}
-
static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr,
const char *path)
{
@@ -605,80 +417,70 @@ static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr,
// ehdr
fwrite(ehdr, sizeof(Elf32_Ehdr), 1, out);
- // phdr
- fwrite(assembler->phdr, sizeof(Elf32_Phdr), assembler->phdr_len, out);
-
// reltbls
- for (uint32_t i = 0; i < assembler->sectab.len; i++) {
- struct section *sec = &assembler->sectab.sections[i];
- if (sec->reltab.len == 0)
+ for (uint32_t i = 0; i < assembler->section_len; i++) {
+ struct elf_section *sec = &assembler->sections[i];
+ void *ptr = sec->reltab;
+ int len = sec->reltab_len;
+ if (len < 1)
continue;
- void *ptr = sec->reltab.data;
- int len = sec->reltab.len;
- fwrite(ptr, sizeof(Elf32_Rela), len, out);
- }
-
- // section padding
- for (uint32_t i = 0; i < assembler->secalign; i++) {
- uint8_t zero = 0;
- fwrite(&zero, 1, 1, out);
+ fwrite(ptr, sizeof(Elf32_Rel), len, out);
}
// sections
- for (uint32_t i = 0; i < assembler->sectab.len; i++) {
- struct section *sec = &assembler->sectab.sections[i];
- for (uint32_t j = 0; j < sec->len; j++) {
- struct section_entry *entry = &sec->entries[j];
- size_t size = entry->size;
- size_t zeros = size % sec->alignment;;
- if (entry->type != ENT_NO_DATA)
- fwrite(&entry->data, size, 1, out);
- else
- zeros += size;
- while(zeros) {
- fputc(0, out);
- zeros--;
- }
- }
+ for (uint32_t i = 0; i < assembler->section_len; i++) {
+ struct elf_section *sec = &assembler->sections[i];
+ void *ptr = sec->data->data;
+ size_t size = sec->data->len;
+ fwrite(ptr, 1, size, out);
}
// sym tbl
- fwrite(assembler->symtab.symbols, sizeof(Elf32_Sym),
- assembler->symtab.len, out);
+ fwrite(assembler->symbols, sizeof(Elf32_Sym), assembler->symtab_len,
+ out);
// str tbl
- fwrite(assembler->strtab.ptr, assembler->strtab.size, 1, out);
+ fwrite(assembler->strtab.ptr, 1, assembler->strtab.size, out);
// shstr tbl
- fwrite(assembler->shstrtab.ptr, assembler->shstrtab.size, 1, out);
+ fwrite(assembler->shstrtab.ptr, 1, assembler->shstrtab.size, out);
// shdr
fwrite(assembler->shdr, sizeof(Elf32_Shdr), assembler->shdr_len, out);
+ // close
fclose(out);
return M_SUCCESS;
}
-static int assemble_elf(struct assembler *assembler, const char *out)
+static void update_sym_shndx(struct assembler *assembler)
{
- if (assemble_phdr(assembler, (Elf32_Phdr **) &assembler->phdr,
- &assembler->phdr_len)) {
- return M_ERROR;
+ for (uint32_t i = 1; i < assembler->symtab_len; i++) {
+ Elf32_Sym *esym = &assembler->symbols[i];
+ struct symbol *sym = &assembler->gen.symtab.symbols[i - 1];
+
+ // get shindx
+ int shindx = 0;
+ if (sym->secidx != SYM_SEC_STUB)
+ shindx = assembler->sections[sym->secidx].shdr_idx;
+ else if (sym->type == SYM_EXTERN)
+ shindx = 0;
+
+ esym->st_shndx = B16(shindx);
}
+}
- if (assemble_shdr(assembler, (Elf32_Shdr **) &assembler->shdr,
- &assembler->shdr_len)) {
+static int assemble_elf(struct assembler *assembler, const char *out)
+{
+ if (assemble_shdr(assembler, &assembler->shdr, &assembler->shdr_len))
return M_ERROR;
- };
Elf32_Ehdr ehdr = MIPS_ELF_EHDR;
- ehdr.e_phnum = B16(assembler->phdr_len);
ehdr.e_shnum = B16(assembler->shdr_len);
ehdr.e_shstrndx = B16(assembler->shstrtab_shidx);
-
update_offsets(assembler, &ehdr);
- update_sym_shindx(assembler);
+ update_sym_shndx(assembler);
if (write_file(assembler, &ehdr, out))
return M_ERROR;
@@ -709,10 +511,16 @@ int assemble_file(struct assembler_arguments args)
int assembler_init(struct assembler *assembler, const char *path)
{
- if (lexer_init(path, &assembler->lexer))
- return M_ERROR;
+ assembler->shdr = NULL;
+ assembler->symbols = NULL;
+ assembler->sections = NULL;
+ assembler->strtab.ptr = NULL;
+ assembler->shstrtab.ptr = NULL;
+ assembler->gen.sections = NULL;
+ assembler->gen.symtab.symbols = NULL;
+ assembler->section_len = 0;
- if (parser_init(&assembler->lexer, &assembler->parser))
+ if (generator_init(path, &assembler->gen))
return M_ERROR;
if (strtab_init(&assembler->shstrtab))
@@ -721,31 +529,22 @@ int assembler_init(struct assembler *assembler, const char *path)
if (strtab_init(&assembler->strtab))
return M_ERROR;
- if (symtab_init(&assembler->symtab))
- return M_ERROR;
-
- if (sectab_init(&assembler->sectab))
- return M_ERROR;
-
- assembler->symtab.strtab = &assembler->strtab;
- assembler->phdr = NULL;
- assembler->shdr = NULL;
-
return M_SUCCESS;
}
void assembler_free(struct assembler *assembler)
{
- if (assembler->phdr)
- free(assembler->phdr);
if (assembler->shdr)
free(assembler->shdr);
+ if (assembler->symbols)
+ free(assembler->symbols);
+ if (assembler->sections) {
+ for (uint32_t i = 0; i < assembler->section_len; i++)
+ elf_section_free(&assembler->sections[i]);
+ free(assembler->sections);
+ }
- sectab_free(&assembler->sectab);
- symtab_free(&assembler->symtab);
strtab_free(&assembler->strtab);
strtab_free(&assembler->shstrtab);
-
- parser_free(&assembler->parser);
- lexer_free(&assembler->lexer);
+ generator_free(&assembler->gen);
}
diff --git a/masm/asm.h b/masm/asm.h
index 1162164..fecd335 100644
--- a/masm/asm.h
+++ b/masm/asm.h
@@ -3,19 +3,15 @@
#ifndef __ASM_H__
#define __ASM_H__
-#include <stddef.h>
#include <elf.h>
-#include <mips.h>
-#include "mlimits.h"
-#include "parse.h"
-#include "lex.h"
+#include "gen.h"
///
/// ELF string table
///
-struct str_table {
+struct elf_str_table {
// size of the ptr in bytes
size_t size;
@@ -25,199 +21,60 @@ struct str_table {
};
/* initalize a string table */
-int strtab_init(struct str_table *strtab);
+int strtab_init(struct elf_str_table *strtab);
/* free a string table */
-void strtab_free(struct str_table *strtab);
+void strtab_free(struct elf_str_table *strtab);
/* get a string form the string table */
-int strtab_get_str(struct str_table *strtab, const char *str, size_t *res);
+int strtab_get_str(struct elf_str_table *strtab, const char *str, size_t *res);
/* get or append a string into the string table */
-int strtab_write_str(struct str_table *strtab, const char *str, size_t *res);
-
-
-///
-/// ELF symbol table
-///
-
-struct symbol_table {
- // length in size in sym ammt
- size_t len;
- size_t size;
-
- // the Elf symbols
- Elf32_Sym *symbols;
-
- // keeps track of what section each ELF symbol is in
- // *!!this is NOT the section header index in the ELF ehdr!!*
- ssize_t *sections;
-
- // symbols reference a string table that acutally
- // holds the strings
- //
- // *weak* ptr, we do not own this!!!
- struct str_table *strtab;
-
-};
-
-/* initalize a symbol table */
-int symtab_init(struct symbol_table *symtab);
-
-/* free the symbol table */
-void symtab_free(struct symbol_table *symtab);
-
-/* add a symbol to the symbol tbl */
-int symtab_push(struct symbol_table *symtab, const Elf32_Sym sym,
- ssize_t sec_idx);
-
-/* find a symbol by name in the symbol table */
-int symtab_find(struct symbol_table *symtab, Elf32_Sym **sym, size_t *idx,
- const char name[MAX_LEX_LENGTH]);
-
-///
-/// ELF relocation table
-///
-
-struct relocation_table {
- size_t len;
- size_t size;
- Elf32_Rela *data;
-};
-
-/* initalize a relocation table */
-int reltab_init(struct relocation_table *reltab);
-
-/* free the relocation table */
-void reltab_free(struct relocation_table *reltab);
-
-/* add a entry to the relocation table */
-int reltab_push(struct relocation_table *reltab, const Elf32_Rela rel);
-
-///
-/// section entry
-///
-
-enum section_entry_type {
- ENT_INS,
- ENT_WORD,
- ENT_HALF,
- ENT_BYTE,
- ENT_STR,
- ENT_NO_DATA,
-};
-
-/* holds a entry inside the section, i.e. a instruction, raw data,
- * special directives */
-struct section_entry {
- size_t size;
- enum section_entry_type type;
-
- union {
- // to get memory address
- char data;
-
- // data
- uint32_t ins;
- char str[MAX_LEX_LENGTH];
- int32_t word;
- int16_t half;
- int8_t byte;
- };
-};
+int strtab_write_str(struct elf_str_table *strtab, const char *str, size_t *res);
///
-/// section
+/// elf section
///
/* holds a section of the asm file (i.e. .text, .bss, .data) */
-struct section {
- // length and size of amount of entries
- size_t len;
- size_t size;
- struct section_entry *entries;
-
- // section name
- char name[MAX_LEX_LENGTH];
+struct elf_section {
+ // section data *weak* pointer
+ struct section *data;
// index of the section in
- // all the sections
- size_t index;
-
- // index of the sectio in
// the ELF shdr
size_t shdr_idx;
- // ELF section data
- bool read;
- bool write;
- bool execute;
- uint16_t alignment;
-
- // ELF tables
+ // relocation table
size_t reltab_shidx;
- struct relocation_table reltab;
+ uint32_t reltab_len;
+ Elf32_Rel *reltab;
};
-/* get the size of the section in bytes */
-size_t sec_size(struct section *section);
-
-/* get the index of a entry in bytes */
-size_t sec_index(struct section *section, size_t index);
-
-/* add a section entry to the section */
-int sec_push(struct section *section, struct section_entry entry);
-
-/* holds eachs section */
-struct section_table {
- // length and size of amount of sections
- size_t len;
- size_t size;
- struct section *sections;
-
- // the current section
- struct section *current;
-};
-
-/* initalize the section table */
-int sectab_init(struct section_table *sec_tbl);
-
-/* free the section table */
-void sectab_free(struct section_table *sec_tbl);
-
-/* create a new section in the section table */
-int sectab_alloc(struct section_table *sec_tbl, struct section **sec,
- const char name[MAX_LEX_LENGTH]);
-
-/* get a section by name from the section table */
-int sectab_get(struct section_table *sec_tbl, struct section **sec,
- const char name[MAX_LEX_LENGTH]);
-
///
/// assembler
///
struct assembler {
- // the token lexer
- struct lexer lexer;
- // the expression parser
- struct parser parser;
+ // the code generator
+ struct generator gen;
- /// ELF tables
+ /// symbol table
size_t symtab_shidx;
- struct symbol_table symtab;
+ size_t symtab_len;
+ Elf32_Sym *symbols;
+
+ // sh string table
size_t strtab_shidx;
- struct str_table strtab;
- size_t shstrtab_shidx;
- struct str_table shstrtab;
+ struct elf_str_table strtab;
- /// Segments
- struct section_table sectab;
- uint32_t secalign; // align sections to 0x1000 when writing
+ // string table
+ size_t shstrtab_shidx;
+ struct elf_str_table shstrtab;
- /// program header
- Elf32_Phdr *phdr;
- uint32_t phdr_len;
+ /// sections
+ uint32_t section_len;
+ struct elf_section *sections;
/// section header
Elf32_Shdr *shdr;
diff --git a/masm/gen.c b/masm/gen.c
new file mode 100644
index 0000000..13d2848
--- /dev/null
+++ b/masm/gen.c
@@ -0,0 +1,812 @@
+#include <stdlib.h>
+#include <merror.h>
+#include <melf.h>
+#include <mips32.h>
+#include <mips32r6.h>
+
+#include "tab.h"
+#include "gen.h"
+#include "parse.h"
+
+///
+/// section table
+///
+
+static void section_get_default_perm(struct section *sec, const char *name)
+{
+ #define __LEN 7
+ static const struct perms {
+ char *name;
+ bool read;
+ bool write;
+ bool execute;
+ int alignment;
+ } defaults[__LEN] = {
+ {".text", true, false, true, 4},
+ {".code", true, false, true, 4},
+ {".data", true, true, false, 1},
+ {".stack", true, true, false, 1},
+ {".rodata", true, false, false, 1},
+ {".bss", true, true, false, 1},
+ {".robss", true, false, false, 1},
+ };
+
+ for (int i = 0; i < __LEN; i++) {
+ const struct perms *p = &defaults[i];
+ if (strcasecmp(name, p->name) != 0)
+ continue;
+ sec->read = p->read;
+ sec->write = p->write;
+ sec->execute = p->execute;
+ sec->align = p->alignment;
+ break;
+ }
+
+}
+
+static int section_get(struct generator *gen, struct section **res,
+ const struct string *const name)
+{
+ /// find the section if it exists
+ for (size_t i = 0; i < gen->sections_len; i++) {
+ struct section *sec = &gen->sections[i];
+ if (sec->name.len != name->len)
+ continue;
+ if (strcmp(sec->name.str, name->str) != 0)
+ continue;
+ *res = sec;
+ return M_SUCCESS;
+ }
+
+ /// allocate a new one if it doesnt
+ size_t size = gen->sections_size ? gen->sections_size * 2 : 8;
+ void *new = realloc(gen->sections, size * sizeof(struct section));
+ if (new == NULL) {
+ PERROR("cannot realloc");
+ return M_ERROR;
+ }
+
+ gen->sections_size = size;
+ gen->sections = new;
+
+ struct section *sec = &gen->sections[gen->sections_len++];
+
+ // alloc reftab
+ if (reftab_init(&sec->reftab))
+ return M_ERROR;
+
+ // copy name
+ if (string_clone(&sec->name, name))
+ return M_ERROR;
+
+ // set defaults
+ sec->len = 0;
+ sec->size = 0;
+ sec->align = 1;
+ sec->data = NULL;
+ sec->read = true;
+ sec->write = true;
+ sec->execute = false;
+ section_get_default_perm(sec, name->str);
+
+ *res = sec;
+ return M_SUCCESS;
+}
+
+static int section_extend(struct section *section, size_t space)
+{
+ size_t newlen = section->len + space;
+ if (newlen < section->size)
+ return M_SUCCESS;
+
+ size_t size = section->size ? section->size * 2 + newlen : newlen * 2;
+ void *new = realloc(section->data, size);
+ if (new == NULL) {
+ PERROR("cannot realloc");
+ return M_ERROR;
+ }
+ section->size = size;
+ section->data = new;
+
+ return M_SUCCESS;
+}
+
+static int section_push(struct section *section, void *data, size_t len)
+{
+ size_t newlen = section->len + len;
+ size_t zeros = newlen % section->align;
+ if (zeros)
+ zeros = section->align - zeros;
+
+ if (section_extend(section, len + zeros))
+ return M_ERROR;
+
+ memset(section->data + section->len, 0, zeros);
+ memcpy(section->data + section->len + zeros, data, len);
+ section->len += len + zeros;
+
+ return M_SUCCESS;
+}
+
+static int section_zero(struct section *section, size_t len)
+{
+ size_t zeros = section->len % section->align;
+ if (zeros)
+ zeros = section->align - zeros;
+
+ if (section_extend(section, len + zeros))
+ return M_ERROR;
+
+ memset(section->data + section->len, 0, len + zeros);
+ section->len += len + zeros;
+
+ return M_SUCCESS;
+}
+
+void section_free(struct section *section)
+{
+ reftab_free(&section->reftab);
+ string_free(&section->name);
+ free(section->data);
+}
+
+///
+/// generation functions
+///
+
+static void print_curr_line(struct generator *gen,
+ const struct expr *const expr)
+{
+ int line = expr->line_no,
+ len = expr->byte_end - expr->byte_start,
+ nl = true,
+ c = EOF;
+ FILE *file = gen->parser.lexer.file;
+
+ fseek(file, expr->byte_start, SEEK_SET);
+
+ while (len--) {
+ c = getc(file);
+ if (c == EOF || c == '\0')
+ break;
+ if (nl) {
+ fprintf(stderr, "\t%d | ", line);
+ line++;
+ nl = false;
+ }
+ if (c == '\n')
+ nl = true;
+ putc(c, stderr);
+ }
+
+}
+
+static int gen_directive_whb(struct generator *gen, const void *data,
+ uint32_t count, uint32_t len)
+{
+ // TODO: endianess
+ for (uint32_t i = 0; i < count; i++) {
+ void *ptr = (char *) data + (len * i);
+ if (section_push(gen->current, ptr, len))
+ return M_ERROR;
+ }
+
+ return M_SUCCESS;
+}
+
+static int gen_directive(struct generator *gen,
+ const struct expr *const e)
+{
+ const struct expr_directive *const expr = &e->directive;
+ int res = M_SUCCESS;
+
+ switch (expr->type) {
+ case EXPR_DIRECTIVE_ALIGN:
+ if (expr->align < 1) {
+ ERROR("alignment cannot be zero");
+ print_curr_line(gen, e);
+ return M_ERROR;
+ }
+ gen->current->align = expr->align;
+ break;
+ case EXPR_DIRECTIVE_SPACE:
+ res = section_zero(gen->current, expr->space);
+ break;
+ case EXPR_DIRECTIVE_WORD:
+ res = gen_directive_whb(gen, expr->words, expr->len,
+ sizeof(uint32_t));
+ break;
+ case EXPR_DIRECTIVE_HALF:
+ res = gen_directive_whb(gen, expr->halfs, expr->len,
+ sizeof(uint16_t));
+ break;
+ case EXPR_DIRECTIVE_BYTE:
+ res = gen_directive_whb(gen, expr->bytes, expr->len,
+ sizeof(uint8_t));
+ break;
+ case EXPR_DIRECTIVE_SECTION:
+ res = section_get(gen, &gen->current, &expr->section);
+ break;
+ case EXPR_DIRECTIVE_EXTERN: {
+ struct symbol *sym;
+ res = symtab_find_or_stub(&gen->symtab, &sym, &expr->label);
+ if (res == M_SUCCESS)
+ sym->type = SYM_EXTERN;
+ break;
+ }
+ case EXPR_DIRECTIVE_GLOBL: {
+ struct symbol *sym;
+ res = symtab_find_or_stub(&gen->symtab, &sym, &expr->label);
+ if (res == M_SUCCESS)
+ sym->type = SYM_GLOBAL;
+ break;
+ }
+ case EXPR_DIRECTIVE_ASCII:
+ res = section_push(gen->current, expr->string.str,
+ expr->string.len - 1);
+ break;
+ case EXPR_DIRECTIVE_ASCIIZ:
+ res = section_push(gen->current, expr->string.str,
+ expr->string.len);
+ break;
+ }
+
+ return res;
+}
+
+static int gen_constant(struct generator *gen, struct expr_const *const expr)
+{
+ (void) gen;
+ (void) expr;
+
+ ERROR("constants not yet implemented");
+ return M_ERROR;
+}
+
+static enum grammer_type get_gmr_type(const char *name, size_t *len)
+{
+ #define CHK(part, str) { \
+ if (strncasecmp(str, name, strlen(str)) == 0) { \
+ *len = strlen(str); \
+ return GMR_ ##part; \
+ }} \
+
+ CHK(RD, "rd")
+ CHK(RS, "rs")
+ CHK(RT, "rt")
+ CHK(IMMD, "immd")
+ CHK(OFFSET_BASE, "offset(base)")
+ CHK(OFFSET, "offset")
+ CHK(TARGET, "target")
+ CHK(HI, "hi")
+ CHK(LO, "lo")
+
+ #undef CHK
+
+ ERROR("!!! BUG: this should never hit !!!");
+ exit(1);
+}
+
+static int parse_register(enum mips32_register *reg, struct string *name)
+{
+ int len = name->len;
+ int c0 = len > 0 ? name->str[0] : '\0',
+ c1 = len > 1 ? name->str[1] : '\0',
+ c2 = len > 2 ? name->str[2] : '\0',
+ c3 = len > 3 ? name->str[3] : '\0';
+
+ // $zero
+ if (c0 == 'z') {
+ if (c1 == 'e' && c2 == 'r' && c3 == 'o') {
+ *reg = MIPS32_REG_ZERO;
+ return M_SUCCESS;
+ }
+ }
+
+ // $a0-a3 $at
+ else if (c0 == 'a') {
+ if (c1 == 't') {
+ *reg = MIPS32_REG_AT;
+ return M_SUCCESS;
+ }
+ if (c1 >= '0' && c1 <= '3') {
+ *reg = MIPS32_REG_A0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ }
+
+ // $v0-v1
+ else if (c0 == 'v') {
+ if (c1 >= '0' && c1 <= '1') {
+ *reg = MIPS32_REG_V0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ }
+
+ // $t0-t9
+ else if (c0 == 't') {
+ if (c1 >= '0' && c1 <= '7') {
+ *reg = MIPS32_REG_T0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ // reg T8-T9 are not in order with T0-T7
+ if (c1 >= '8' && c1 <= '9') {
+ *reg = MIPS32_REG_T8;
+ *reg += c1 - '8';
+ return M_SUCCESS;
+ }
+ }
+
+ // $s0-s7 $sp
+ else if (c0 == 's') {
+ if (c1 >= '0' && c1 <= '7') {
+ *reg = MIPS32_REG_S0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ if (c1 == 'p') {
+ *reg = MIPS32_REG_SP;
+ return M_SUCCESS;
+ }
+ }
+
+ // $k0-k1
+ else if (c0 == 'k') {
+ if (c1 >= '0' && c1 <= '1') {
+ *reg = MIPS32_REG_K0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ }
+
+ // $gp
+ else if (c0 == 'g') {
+ if (c1 == 'p') {
+ *reg = MIPS32_REG_GP;
+ return M_SUCCESS;
+ }
+ }
+
+ // $fp
+ else if (c0 == 'f') {
+ if (c1 == 'p') {
+ *reg = MIPS32_REG_FP;
+ return M_SUCCESS;
+ }
+ }
+
+ // $rp
+ else if (c0 == 'r') {
+ if (c1 == 'a') {
+ *reg = MIPS32_REG_RA;
+ return M_SUCCESS;
+ }
+ }
+
+ // $0-31 (non aliased register names)
+ else if (c0 >= '0' && c0 <= '9') {
+ int i = c0 - '0';
+ if (c1 >= '0' && c1 <= '9') {
+ i *= 10;
+ i += c1 - '0';
+ }
+ if (i <= 31) {
+ *reg = i;
+ return M_SUCCESS;
+ }
+ }
+
+ ERROR("unknown register $%.*s", name->len, name->str);
+ return M_ERROR;
+}
+
+static int gen_ins_read_state(struct generator *gen,
+ struct expr *const expr,
+ struct gen_ins_state *state,
+ struct mips32_grammer *grammer)
+{
+ char *ptr = grammer->grammer;
+ uint32_t argi = 0;
+
+ // read values into state
+ while (*ptr != '\0') {
+
+ if (argi >= expr->instruction.args_len) {
+ ERROR("not enough arguments passed");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ struct expr_ins_arg *arg = &expr->instruction.args[argi++];
+
+ size_t skip;
+ switch (get_gmr_type(ptr, &skip)) {
+ case GMR_RD:
+ // rd
+ if (arg->type != EXPR_INS_ARG_REGISTER) {
+ ERROR("expected a register");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ if (parse_register(&state->rd, &arg->reg)) {
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ break;
+ case GMR_RS:
+ // rs
+ if (arg->type != EXPR_INS_ARG_REGISTER) {
+ ERROR("expected a register");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ if (parse_register(&state->rs, &arg->reg)) {
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ break;
+ case GMR_RT:
+ // rt
+ if (arg->type != EXPR_INS_ARG_REGISTER) {
+ ERROR("expected a register");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ if (parse_register(&state->rt, &arg->reg)) {
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ break;
+ case GMR_IMMD:
+ // immd
+ if (arg->type != EXPR_INS_ARG_IMMEDIATE) {
+ ERROR("expected an immediate");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ state->immd = arg->immd;
+ break;
+ case GMR_OFFSET:
+ // offset
+ state->offset = 0;
+ if (arg->type == EXPR_INS_ARG_IMMEDIATE)
+ state->offset = arg->immd;
+ else if (arg->type == EXPR_INS_ARG_LABEL)
+ state->label = &arg->label;
+ else {
+ ERROR("invalid instruction");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ break;
+ case GMR_OFFSET_BASE:
+ // offset(base)
+ if (arg->type != EXPR_INS_ARG_OFFSET) {
+ ERROR("expected an offset($base)");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ state->offset = arg->offset.immd;
+ if (parse_register(&state->base, &arg->offset.reg)) {
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ break;
+ case GMR_TARGET:
+ // target
+ state->target = 0;
+ if (arg->type == EXPR_INS_ARG_IMMEDIATE)
+ state->target = arg->immd;
+ else if (arg->type == EXPR_INS_ARG_LABEL)
+ state->label = &arg->label;
+ else {
+ ERROR("invalid instruction");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+ break;
+ default:
+ break;
+ }
+
+ // skip entry
+ ptr += skip;
+
+ // skip comma
+ if (*ptr == ',') {
+ ptr++;
+ continue;
+ } else if (*ptr == '\0') {
+ break;
+ } else {
+ ERROR("!! BUG3: invalid splitting char %c !!!", *ptr);
+ exit(1);
+ }
+
+ }
+
+ return M_SUCCESS;
+}
+
+static int gen_ins_write_state(
+ struct generator *gen,
+ union mips32_instruction ins, // the instruction to modify
+ struct gen_ins_state *state, // the current read state
+ char *grammer) // the gramemr to parse
+{
+ char *ptr = grammer;
+ enum reference_type reftype = REF_NONE;
+
+ // read values into state
+ while (*ptr != '\0') {
+
+ // parse next dsl entry
+ size_t skip;
+ enum grammer_type gmr = get_gmr_type(ptr, &skip);
+
+ // check for dsl hardcoded register argument
+ bool hardcoded = false;
+ enum mips32_register hard_reg;
+ if (*(ptr + skip) == '=') {
+ // parse argument
+ char *rptr = ptr + skip + 2;
+ hardcoded = true;
+ struct string regname;
+ string_bss(&regname, rptr);
+ if (parse_register(&hard_reg, &regname)) {
+ ERROR("!!! BUG2: this should never hit !!!");
+ exit(1);
+ }
+ }
+
+ // skip till next comma
+ for (;*ptr != '\0' && *ptr != ','; ptr++);
+ if (*ptr == ',')
+ ptr++;
+
+ switch (gmr) {
+ case GMR_RD:
+ ins.rd = hardcoded ? hard_reg : state->rd;
+ break;
+ case GMR_RS:
+ ins.rs = hardcoded ? hard_reg : state->rs;
+ break;
+ case GMR_RT:
+ ins.rt = hardcoded ? hard_reg : state->rt;
+ break;
+ case GMR_IMMD:
+ ins.immd = state->immd;
+ break;
+ case GMR_OFFSET:
+ ins.offset = state->offset;
+ reftype = REF_MIPS_16;
+ break;
+ case GMR_OFFSET_BASE:
+ ins.offset = state->offset;
+ ins.rs = state->base;
+ reftype = REF_MIPS_16;
+ break;
+ case GMR_TARGET:
+ ins.target = state->target;
+ reftype = REF_MIPS_26;
+ break;
+ case GMR_HI:
+ ins.immd = state->target >> 16;
+ reftype = REF_MIPS_HI16;
+ break;
+ case GMR_LO:
+ ins.immd = state->target & 0x0000FFFF;
+ reftype = REF_MIPS_LO16;
+ break;
+ }
+ }
+
+ // get offset for reference (if needed)
+ uint32_t offset = gen->current->len;
+ size_t zeros = offset % gen->current->align;
+ if (zeros)
+ zeros = gen->current->align - zeros;
+ offset += zeros;
+
+ // write instructon to section
+ uint32_t raw = B32(ins.raw);
+ if (section_push(gen->current, &raw, sizeof(uint32_t))) {
+ return M_ERROR;
+ }
+
+ // create reference (if needed)
+ if (reftype != REF_NONE && state->label != NULL) {
+ struct symbol *sym;
+
+ if (symtab_find_or_stub(&gen->symtab, &sym, state->label))
+ return M_ERROR;
+
+ struct reference ref = {
+ .type = reftype,
+ .symbol = sym,
+ .offset = offset
+ };
+
+ if (reftab_push(&gen->current->reftab, &ref)) {
+ return M_ERROR;
+ }
+ }
+
+ return M_SUCCESS;
+}
+
+static int gen_ins(struct generator *gen, struct expr *const expr)
+{
+ struct mips32_grammer *grammer = NULL;
+ for (uint32_t i = 0; i < gen->grammers_len; i++) {
+ struct mips32_grammer *temp = &gen->grammers[i];
+ if (strcasecmp(temp->name, expr->instruction.name.str) != 0)
+ continue;
+ grammer = temp;
+ break;
+ }
+
+ if (grammer == NULL) {
+ ERROR("unknown instruction");
+ print_curr_line(gen, expr);
+ return M_ERROR;
+ }
+
+ struct gen_ins_state state;
+ state.label = NULL;
+
+ // read in the values from the parser
+ if (gen_ins_read_state(gen, expr, &state, grammer))
+ return M_ERROR;
+
+ // write the values into the instructions
+ // ...and then the sections
+ if (grammer->pseudo_len > 0) {
+ // write pseudo
+ for (int i = 0; i < grammer->pseudo_len; i++) {
+ union mips32_instruction ins = gen->instructions[
+ grammer->pseudo_grammer[i].enum_index];
+ if (gen_ins_write_state(gen, ins, &state,
+ grammer->pseudo_grammer[i].update))
+ return M_ERROR;
+ }
+ } else {
+ // write real
+ union mips32_instruction ins
+ = gen->instructions[grammer->enum_index];
+ if (gen_ins_write_state(gen, ins, &state, grammer->grammer))
+ return M_ERROR;
+ }
+
+ return M_SUCCESS;
+}
+
+static int gen_label(struct generator *gen, struct string *const label)
+{
+ uint32_t offset = gen->current->len;
+ ptrdiff_t secidx = gen->current - gen->sections;
+ size_t zeros = offset % gen->current->align;
+ if (zeros)
+ zeros = gen->current->align - zeros;
+ offset += zeros;
+
+ struct symbol *sym;
+ /* update existing symbol (if exists) */
+ if (symtab_find(&gen->symtab, &sym, label->str) == M_SUCCESS) {
+ if (sym->secidx != SYM_SEC_STUB) {
+ // symbols that are not labeled stub are fully defined,
+ // it is a error to redefine them
+ ERROR("redefined symbol '%s'", label->str);
+ return M_ERROR;
+ }
+ sym->secidx = secidx;
+ sym->offset = offset;
+ /* create a new symbol */
+ } else {
+ struct symbol new = {
+ .secidx = secidx,
+ .offset = offset,
+ .type = SYM_LOCAL,
+ };
+ if (string_clone(&new.name, label))
+ return M_ERROR;
+ if (symtab_push(&gen->symtab, &new)) {
+ string_free(&new.name);
+ return M_ERROR;
+ }
+ }
+
+ return M_SUCCESS;
+}
+
+/* run codegen */
+static int generate(struct generator *gen)
+{
+ struct expr expr;
+ int res = M_SUCCESS;
+
+ // get the next expression
+ if ((res = parser_next(&gen->parser, &expr)))
+ return res;
+
+ // if its not a segment directive
+ // (and we dont have a section)
+ // create the default
+ if ((
+ expr.type != EXPR_DIRECTIVE ||
+ expr.directive.type != EXPR_DIRECTIVE_SECTION) &&
+ gen->current == NULL) {
+ // create .data section
+ struct string temp = {
+ .str = ".data",
+ .len = 5,
+ .size = 5,
+ .allocated = false
+ };
+ if (section_get(gen, &gen->current, &temp)) {
+ expr_free(&expr);
+ return M_ERROR;
+ }
+ }
+
+ res = M_SUCCESS;
+ switch (expr.type) {
+ case EXPR_DIRECTIVE:
+ res = gen_directive(gen, &expr);
+ break;
+ case EXPR_CONSTANT:
+ res = gen_constant(gen, &expr.constant);
+ break;
+ case EXPR_INS:
+ res = gen_ins(gen, &expr);
+ break;
+ case EXPR_LABEL:
+ res = gen_label(gen, &expr.label);
+ break;
+ }
+
+ expr_free(&expr);
+ return res;
+}
+
+/* run codegen with the mips32r6 specification */
+int generate_mips32r6(struct generator *gen)
+{
+ gen->instructions_len = __MIPS32R6_INS_LEN;
+ gen->instructions = mips32r6_instructions;
+ gen->grammers_len = __MIPS32R6_GRAMMER_LEN;
+ gen->grammers = mips32r6_grammers;
+
+ int res;
+ while (res = generate(gen), 1) {
+ if (res == M_ERROR)
+ return M_ERROR;
+ if (res == M_EOF)
+ break;
+ }
+
+ return M_SUCCESS;
+}
+
+int generator_init(const char *file, struct generator *gen)
+{
+ if (parser_init(file, &gen->parser))
+ return M_ERROR;
+ if (symtab_init(&gen->symtab))
+ return M_ERROR;
+ gen->sections = NULL;
+ gen->sections_len = 0;
+ gen->sections_size = 0;
+ return M_SUCCESS;
+}
+
+void generator_free(struct generator *gen)
+{
+ parser_free(&gen->parser);
+ symtab_free(&gen->symtab);
+ for (size_t i = 0; i < gen->sections_len; i++)
+ section_free(&gen->sections[i]);
+ free(gen->sections);
+}
diff --git a/masm/gen.h b/masm/gen.h
new file mode 100644
index 0000000..19f575c
--- /dev/null
+++ b/masm/gen.h
@@ -0,0 +1,118 @@
+/* Copyright (c) 2024 Freya Murphy */
+
+#ifndef __GEN_H__
+#define __GEN_H__
+
+#include <mlimits.h>
+#include <mips32.h>
+#include <stdint.h>
+
+#include "parse.h"
+#include "tab.h"
+
+// predefine
+struct generator;
+
+///
+/// a section
+///
+struct section {
+ // name
+ struct string name;
+
+ // alignment
+ size_t align;
+
+ // data
+ char *data;
+ size_t len;
+ size_t size;
+
+ // permissions
+ bool read;
+ bool write;
+ bool execute;
+
+ /// reference table
+ struct reference_table reftab;
+};
+
+void section_free(struct section *section);
+
+///
+/// instruction generation state
+///
+
+struct gen_ins_state {
+ // rd,rst,rt
+ enum mips32_register rd;
+ enum mips32_register rs;
+ enum mips32_register rt;
+
+ // immd
+ uint16_t immd;
+
+ // offset(base)
+ uint16_t offset;
+ enum mips32_register base;
+
+ // target
+ uint32_t target;
+
+ // current referencd label
+ struct string *label;
+};
+
+///
+/// grammer type
+///
+
+enum grammer_type {
+ GMR_RD,
+ GMR_RS,
+ GMR_RT,
+ GMR_IMMD,
+ GMR_OFFSET,
+ GMR_OFFSET_BASE,
+ GMR_TARGET,
+ GMR_HI,
+ GMR_LO,
+};
+
+///
+/// generates assembley
+/// from a parser stream
+///
+struct generator {
+ struct parser parser;
+
+ // current instruction table
+ size_t instructions_len;
+ union mips32_instruction *instructions;
+
+ // current grammer table
+ size_t grammers_len;
+ struct mips32_grammer *grammers;
+
+ // segments
+ size_t sections_len;
+ size_t sections_size;
+ struct section *sections;
+
+ // current section
+ struct section *current;
+
+ // symbol table
+ struct symbol_table symtab;
+};
+
+/* generate the input as mips32r6 */
+int generate_mips32r6(struct generator *gen);
+
+/* initalize a generator */
+int generator_init(const char *file, struct generator *gen);
+
+/* free a generator */
+void generator_free(struct generator *gen);
+
+#endif /* __GEN_H__ */
diff --git a/masm/lex.c b/masm/lex.c
index a7707d6..b835a7f 100644
--- a/masm/lex.c
+++ b/masm/lex.c
@@ -2,6 +2,10 @@
#include <mlimits.h>
#include <merror.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
static struct {
int x;
@@ -46,64 +50,24 @@ static void skip_comment(struct lexer *lexer)
}
}
-/* lexes text until whitespace
- * returns error on zero length or too long */
-static int lex_ident(struct lexer *lexer, char text[MAX_LEX_LENGTH])
-{
- int len = 0;
- char *ptr = text;
- int c;
-
- while (1) {
- c = lex_peek(lexer);
- if (!(
- (c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z') ||
- (c >= '0' && c <= '9') ||
- (c == '_')
- )) {
- break;
- }
-
- // pop char out of lexer
- lex_next(lexer);
-
- if (len + 1 == MAX_LEX_LENGTH) {
- ERROR_POS(pos, "ident has max length of %d",
- MAX_LEX_LENGTH);
- return M_ERROR;
- }
-
- *ptr++ = c;
- len++;
- }
-
- if (len == 0) {
- ERROR_POS(pos, "attempted to lex empty ident %d",
- MAX_LEX_LENGTH);
- return M_ERROR;
- }
-
- *ptr = '\0';
- return M_SUCCESS;
-}
-
/* lexes a string until closing quote
* returns error if string is too long or hit newline */
-static int lex_string(struct lexer *lexer,char text[MAX_LEX_LENGTH])
+static int lex_string(struct lexer *lexer, struct string *string)
{
- int len = 0;
- char *ptr = text;
- int c;
+ char c;
+ string_init(string);
while (1) {
c = lex_next(lexer);
+
+ // stop on ending quote
if (c == '"')
break;
// strings cannot span multiple lines
if (c == '\n') {
ERROR_POS(pos, "reached newline before end of string");
+ string_free(string);
return M_ERROR;
}
@@ -129,20 +93,73 @@ static int lex_string(struct lexer *lexer,char text[MAX_LEX_LENGTH])
}
}
- if (len + 1 == MAX_LEX_LENGTH) {
- ERROR_POS(pos, "string has max length of %d",
- MAX_LEX_LENGTH);
+ // push char into string
+ if (string_push(string, c)) {
+ string_free(string);
+ return M_ERROR;
+ }
+ }
+
+ // null terminate string
+ if (string_push(string, '\0')) {
+ free(string->str);
+ return M_ERROR;
+ }
+
+ return M_SUCCESS;
+}
+
+/* lexes text until whitespace
+ * returns error on zero length or too long */
+static int lex_ident(struct lexer *lexer, struct string *string,
+ char prefix)
+{
+ char c;
+ string_init(string);
+
+ if (prefix != '\0' && string_push(string, prefix)) {
+ string_free(string);
+ return M_ERROR;
+ }
+
+ while (1) {
+ c = lex_peek(lexer);
+ if (!(
+ (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ (c == '_')
+ )) {
+ break;
+ }
+
+ // pop char out of lexer
+ lex_next(lexer);
+
+ // push char into string
+ if (string_push(string, c)) {
+ free(string->str);
return M_ERROR;
}
+ }
+
+ // empty idents are not allowed
+ if (string->len < 1) {
+ string_free(string);
+ ERROR("empty ident tokens are not allowed");
+ return M_ERROR;
+ }
- *ptr++ = c;
- len++;
+ // null terminate string
+ if (string_push(string, '\0')) {
+ string_free(string);
+ return M_ERROR;
}
- *ptr = '\0';
return M_SUCCESS;
}
+
/* lexes a integer number in base 2,8,10, or 16,
* uses base 10 by default but chan be changed by 0b, 0o, and 0x */
static int lex_number(struct lexer *lexer, int64_t *n)
@@ -221,6 +238,7 @@ int lexer_next(struct lexer *lexer, struct token *token)
again: // use label to avoid whitespace recursion
token->x = lexer->x;
token->y = lexer->y;
+ token->off = ftell(lexer->file);
pos.x = lexer->x;
pos.y = lexer->y;
token->type = TOK_EOF;
@@ -231,54 +249,80 @@ again: // use label to avoid whitespace recursion
switch (c) {
case EOF:
+
+ // return a EOF token
case '\0':
token->type = TOK_EOF;
break;
+
+ // skip the comment
+ // .. and return a NL token
case ';':
case '#':
skip_comment(lexer);
token->type = TOK_NL;
break;
+
+ // skip the whitespace and
+ // try to parse the next character
case ' ':
case '\t':
// skip white space
lex_next(lexer);
goto again;
+
+ // return a NL token
case '\n':
lex_next(lexer);
token->type = TOK_NL;
break;
+
+ // return a comma token
case ',':
lex_next(lexer);
token->type = TOK_COMMA;
break;
+
+ // return a equal token
case '=':
lex_next(lexer);
token->type = TOK_EQUAL;
break;
+
+ // return a left paren token
case '(':
lex_next(lexer);
token->type = TOK_LPAREN;
break;
+
+ // return a right paren token
case ')':
token->type = TOK_RPAREN;
lex_next(lexer);
break;
+
+ // return a register token
case '$':
token->type = TOK_REG;
lex_next(lexer);
- res = lex_ident(lexer, token->text);
+ res = lex_ident(lexer, &token->string, '\0');
break;
+
+ // return a directive token
case '.':
token->type = TOK_DIRECTIVE;
lex_next(lexer);
- res = lex_ident(lexer, token->text);
+ res = lex_ident(lexer, &token->string, '.');
break;
+
+ // return a string token
case '"':
token->type = TOK_STRING;
lex_next(lexer);
- res = lex_string(lexer, token->text);
+ res = lex_string(lexer, &token->string);
break;
+
+ // return a number token
case '-':
case '0':
case '1':
@@ -293,68 +337,78 @@ again: // use label to avoid whitespace recursion
token->type = TOK_NUMBER;
res = lex_number(lexer, &token->number);
break;
+
+ // return a ident or label token depending
+ // if it ends with a colon
default:
token->type = TOK_IDENT;
- res = lex_ident(lexer, token->text);
+ res = lex_ident(lexer, &token->string, '\0');
if (lex_peek(lexer) == ':') {
lex_next(lexer);
token->type = TOK_LABEL;
}
break;
}
+
return res;
}
int lexer_init(const char *path, struct lexer *lexer)
{
- FILE *file = fopen(path, "r");
- if (file == NULL) {
- PERROR("cannot read '%s'", path);
- return M_ERROR;
- }
- lexer->file = file;
+ /// defaults
+ lexer->file = NULL;
lexer->peek = EOF;
lexer->x = 1;
lexer->y = 1;
+
+ /// load file
+ lexer->file = fopen(path, "r");
+ if (lexer->file == NULL) {
+ PERROR("cannot read");
+ return M_ERROR;
+ }
+
return M_SUCCESS;
}
-int lexer_free(struct lexer *lexer)
+void lexer_free(struct lexer *lexer)
{
- return fclose(lexer->file);
+ if (lexer->file)
+ fclose(lexer->file);
}
char *token_str(enum token_type type)
{
switch (type) {
- case TOK_IDENT:
+ case TOK_IDENT:
return "ident";
- case TOK_REG:
+ case TOK_REG:
return "register";
- case TOK_LABEL:
+ case TOK_LABEL:
return "label";
- case TOK_STRING:
+ case TOK_STRING:
return "string";
- case TOK_COMMA:
+ case TOK_COMMA:
return "comma";
- case TOK_EQUAL:
+ case TOK_EQUAL:
return "equal";
- case TOK_LPAREN:
+ case TOK_LPAREN:
return "left parentheses";
- case TOK_RPAREN:
+ case TOK_RPAREN:
return "right parentheses";
- case TOK_NUMBER:
+ case TOK_NUMBER:
return "number";
- case TOK_EOF:
+ case TOK_EOF:
return "end of file";
- case TOK_NL:
+ case TOK_NL:
return "new line";
- case TOK_DIRECTIVE:
+ case TOK_DIRECTIVE:
return "directive";
- }
+ }
return "unknown";
}
+/* save the current state from the lexer */
void lexer_save(struct lexer *lexer, struct lexer_state *state)
{
state->x = lexer->x;
@@ -371,3 +425,18 @@ void lexer_load(struct lexer *lexer, const struct lexer_state *state)
lexer->peek = state->peek;
fseek(lexer->file, state->offset, SEEK_SET);
}
+
+void token_free(struct token *token)
+{
+ switch (token->type) {
+ case TOK_REG:
+ case TOK_IDENT:
+ case TOK_LABEL:
+ case TOK_STRING:
+ case TOK_DIRECTIVE:
+ if (token->string.str)
+ free(token->string.str);
+ break;
+ default:
+ }
+}
diff --git a/masm/lex.h b/masm/lex.h
index e08d0a3..8da6558 100644
--- a/masm/lex.h
+++ b/masm/lex.h
@@ -7,41 +7,89 @@
#include <stdio.h>
#include <stdint.h>
-struct lexer {
- FILE *file;
- int peek;
- int x;
- int y;
+/// represents a non null
+/// terminated string
+struct string {
+ char *str;
+ uint32_t len;
+ uint32_t size;
+ bool allocated;
};
-struct lexer_state {
- long offset;
- int peek;
- int x;
- int y;
-};
+/* initalize a string */
+void string_init(struct string *string);
+/* free a string */
+void string_free(struct string *string);
+/* clone a string, leave the old one */
+int string_clone(struct string *dst, const struct string *const src);
+/* move a string, delete the old one */
+void string_move(struct string *dst, struct string *src);
+/* pushes a char onto a string */
+int string_push(struct string *string, char c);
+/* load a string from the bss (not allocated) */
+void string_bss(struct string *string, char *src);
enum token_type {
- TOK_IDENT,
- TOK_REG,
- TOK_LABEL,
- TOK_STRING,
+ /// has no associated
+ /// data
TOK_COMMA,
TOK_EQUAL,
TOK_LPAREN,
TOK_RPAREN,
- TOK_NUMBER,
TOK_EOF,
TOK_NL,
+
+ /// uses number
+ TOK_NUMBER,
+
+ /// uses string
+ TOK_REG,
+ TOK_IDENT,
+ TOK_LABEL,
+ TOK_STRING,
TOK_DIRECTIVE,
};
+/// represents a token
+/// returned from the lexer
struct token {
+ /// type
enum token_type type;
+
+ /// position
+ int x, y;
+ /// pos in bytes
+ int off;
+
+ /// data
union {
int64_t number;
- char text[MAX_LEX_LENGTH];
+ struct string string;
};
+};
+
+/* frees a token*/
+void token_free(struct token *token);
+
+/// holds the data
+/// for the current lexer
+struct lexer {
+ // the currently
+ // open file
+ FILE *file;
+
+ // the last character peeked
+ int peek;
+
+ // the current position
+ int x, y;
+};
+
+/// holds a previous state of a
+/// lexer, which allows rebounding
+struct lexer_state {
+ long offset;
+ int peek;
int x;
int y;
};
@@ -49,8 +97,8 @@ struct token {
/* initalize a lexer */
int lexer_init(const char *file, struct lexer *lexer);
-/* free the lxer */
-int lexer_free(struct lexer *lexer);
+/* free the lexer */
+void lexer_free(struct lexer *lexer);
/* lexes the next token, returns M_ERROR on error,
* and TOK_EOF on EOF */
diff --git a/masm/out.o b/masm/out.o
new file mode 100644
index 0000000..ab24e9b
--- /dev/null
+++ b/masm/out.o
Binary files differ
diff --git a/masm/parse.c b/masm/parse.c
index dbe6ade..b36aa1e 100644
--- a/masm/parse.c
+++ b/masm/parse.c
@@ -1,1326 +1,533 @@
#include <mlimits.h>
-#include <merror.h>
#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-#include <elf.h>
+#include <merror.h>
+#include <stddef.h>
-#include "parse.h"
#include "lex.h"
-#include "mips.h"
+#include "parse.h"
-#define B16(x) (x)
-#define B32(x) (x)
+///
+/// Token Functions
+/// either get a token, peek a token,
+/// or assert a token was returned
+///
+/* get the next token from the lexer */
static int next_token(struct parser *parser, struct token *tok)
{
+ // return peeked first
if (parser->peek.type != TOK_EOF) {
if (tok != NULL)
*tok = parser->peek;
+ else
+ token_free(&parser->peek);
+
parser->peek.type = TOK_EOF;
return M_SUCCESS;
}
+
+ // get next token
struct token token;
- if (lexer_next(parser->lexer, &token))
+ if (lexer_next(&parser->lexer, &token))
return M_ERROR;
- if (tok != NULL)
+
+ // return value if given pointer
+ // else free
+ if (tok != NULL) {
*tok = token;
+ } else {
+ token_free(&token);
+ }
+
return M_SUCCESS;
}
-
+/* peek the next token from the lexer */
static int peek_token(struct parser *parser, struct token *tok)
{
+ // if we dont have a saved token
+ // get the next one
if (parser->peek.type == TOK_EOF) {
if (next_token(parser, &parser->peek))
return M_ERROR;
}
+
+ // return it if we were given
+ // a pointer
if (tok != NULL)
*tok = parser->peek;
+
return M_SUCCESS;
}
-
+/* get the next token from the lexer, and assert its of type <type> */
static int assert_token(struct parser *parser, enum token_type type,
struct token *tok)
{
+ // get next token
struct token token;
if (next_token(parser, &token))
return M_ERROR;
+
+ // assert its of type <type>
if (token.type != type) {
ERROR_POS(token, "expected a token of type '%s', got '%s'",
token_str(type), token_str(token.type));
+ token_free(&token);
return M_ERROR;
}
- if (tok != NULL)
+
+ // return value if given pointer
+ // else free
+ if (tok != NULL) {
*tok = token;
+ } else {
+ token_free(&token);
+ }
+
return M_SUCCESS;
}
+/* get the next token from the lexer, and assert its of type NL */
static int assert_eol(struct parser *parser)
{
struct token token;
if (next_token(parser, &token))
return M_ERROR;
if (token.type != TOK_NL && token.type != TOK_EOF) {
- ERROR_POS(token, "expected a new line or end of file");
+ ERROR_POS(token, "expected a new line or end of file, got '%s'",
+ token_str(token.type));
return M_ERROR;
}
+ token_free(&token);
return M_SUCCESS;
}
-/* each instruction has a given parse format
- * internal to the parser */
-enum mips_parse_format {
- // register type: rs, rt, td
- MIPS_PARSE_R,
- // register type: rs, rt
- MIPS_PARSE_R2,
- // register type: rd
- MIPS_PARSE_RD,
- // register type: rs
- MIPS_PARSE_RS,
- // imeediate type: rs, rt, immd
- MIPS_PARSE_I,
- // jump type: offset
- MIPS_PARSE_J,
- // offset 16b type: offset
- MIPS_PARSE_O16,
- // offset 26b type: offset
- MIPS_PARSE_O26,
- // breanch equal type: rs, rt, offset
- MIPS_PARSE_BE,
- // branch zero type: rs, offset
- MIPS_PARSE_BZ,
- // store and load: rt, offset(base)
- MIPS_PARSE_SL,
- // store and load immediate: rt, immediate
- MIPS_PARSE_SLI,
- // shift: rd, rt, sa
- MIPS_PARSE_S,
- // shift variable: rd, rt, rs
- MIPS_PARSE_SV,
- // none:
- MIPS_PARSE_NONE,
-};
-
-#define FORMAT(ins, format) \
- [MIPS_INS_##ins] = MIPS_PARSE_##format, \
-
-const enum mips_parse_format mips_parse_formats[] = {
- FORMAT(ADD, R)
- FORMAT(ADDI, I)
- FORMAT(ADDIU, I)
- FORMAT(ADDU, R)
- FORMAT(AND, R)
- FORMAT(ANDI, I)
- FORMAT(BAL, O16)
- FORMAT(BALC, O26)
- FORMAT(BC, O26)
- FORMAT(BEQ, BE)
- FORMAT(BEQL, BE)
- FORMAT(BGEZ, BZ)
- FORMAT(BGEZAL, BZ)
- FORMAT(BGEZALL, BZ)
- FORMAT(BGEZL, BZ)
- FORMAT(BGTZ, BZ)
- FORMAT(BGTZL, BZ)
- FORMAT(BLEZ, BZ)
- FORMAT(BLEZL, BZ)
- FORMAT(BLTZ, BZ)
- FORMAT(BLTZAL, BZ)
- FORMAT(BLTZALL, BZ)
- FORMAT(BLTZL, BZ)
- FORMAT(BNE, BE)
- FORMAT(BNEL, BE)
- FORMAT(DIV, R)
- FORMAT(MOD, R)
- FORMAT(DIVU, R)
- FORMAT(MODU, R)
- FORMAT(J, J)
- FORMAT(JAL, J)
- FORMAT(JALR, RS) // TODO: handle rd
- FORMAT(JALX, J)
- FORMAT(JR, RS)
- FORMAT(LB, SL)
- FORMAT(LBU, SL)
- FORMAT(LH, SL)
- FORMAT(LHU, SL)
- FORMAT(LUI, SLI)
- FORMAT(LW, SL)
- FORMAT(MFHI, RD)
- FORMAT(MFLO, RD)
- FORMAT(MTHI, RS)
- FORMAT(MTLO, RS)
- FORMAT(MUL, R)
- FORMAT(MUH, R)
- FORMAT(MULU, R)
- FORMAT(MUHU, R)
- FORMAT(SB, SL)
- FORMAT(SH, SL)
- FORMAT(SW, SL)
- FORMAT(SLL, S)
- FORMAT(SLLV, SV)
- FORMAT(SLT, R)
- FORMAT(SLTI, I)
- FORMAT(SLTIU, I)
- FORMAT(SLTU, R)
- FORMAT(SRA, S)
- FORMAT(SRAV, SV)
- FORMAT(SRL, S)
- FORMAT(SRLV, SV)
- FORMAT(SYSCALL, NONE)
- FORMAT(OR, R)
- FORMAT(ORI, I)
- FORMAT(NOR, R)
- FORMAT(SUB, R)
- FORMAT(SUBU, R)
- FORMAT(XOR, R)
- FORMAT(XORI, I)
-};
-
-#undef FORMAT
-
-#define MAX5 (1 << 5)
-#define MAX16 (1 << 16)
-#define MAX26 (1 << 25)
-#define MAX32 (1 << 31)
-
-static int get_reference(struct parser *parser, uint64_t *offset,
- struct reference *ref, unsigned char type)
+/* peek the next token and return SUCCESS on eol */
+static int peek_eol(struct parser *parser)
{
struct token token;
-
- if (next_token(parser, &token))
- return M_ERROR;
-
- if (token.type == TOK_NUMBER) {
-
- *offset = token.number;
- return M_SUCCESS;
- }
-
- if (token.type != TOK_IDENT) {
- ERROR_POS(token, "unexpected token of type '%s'",
- token_str(token.type));
- return M_ERROR;
- }
-
- strcpy(ref->name, token.text);
- ref->type = type;
- ref->addend = 0;
-
- // return zero for now
- *offset = 0;
- return M_SUCCESS;
-}
-
-static int get_offset(struct parser *parser, int32_t *offset,
- struct reference *ref)
-{
- uint64_t off;
- if (get_reference(parser, &off, ref, R_MIPS_PC16))
- return M_ERROR;
-
- if (off % 4) {
- ERROR_POS((*parser->lexer), "cannot use offset of '%ld', must "
- "be divisble by four", off);
- return M_ERROR;
- }
-
- if (off > MAX16) {
- ERROR("offset '%d' cannot be larger than 16 bits", off);
- return M_ERROR;
- }
-
- *offset = off;
- return M_SUCCESS;
-}
-
-static int get_offset_26(struct parser *parser, int32_t *offset,
- struct reference *ref)
-{
- uint64_t off;
- if (get_reference(parser, &off, ref, R_MIPS_PC26_S2))
- return M_ERROR;
-
- if (off % 4) {
- ERROR_POS((*parser->lexer), "cannot use offset of '%ld', must "
- "be divisble by four", off);
- return M_ERROR;
- }
-
- if (off > MAX26) {
- ERROR("offset '%d' cannot be larger than 26 bits", off);
- return M_ERROR;
- }
-
- *offset = off;
- return M_SUCCESS;
-}
-
-static int get_target(struct parser *parser, uint32_t *offset,
- struct reference *ref)
-{
- uint64_t off;
- if (get_reference(parser, &off, ref, R_MIPS_26))
- return M_ERROR;
-
- if (off > MAX26) {
- ERROR("target '%d' cannot be larger than 26 bits", off);
+ if (peek_token(parser, &token))
return M_ERROR;
- }
-
- *offset = off;
- return M_SUCCESS;
+ int res = (token.type == TOK_NL || token.type == TOK_EOF) ?
+ M_SUCCESS : M_ERROR;
+ return res;
}
-static int get_instruction(const char *ident, struct mips_instruction *res)
-{
- for (int i = 0; i < __MIPS_INS_LEN; i++) {
- struct mips_instruction ins =
- mips_instructions[i];
- if (strcasecmp(ident, ins.name) == 0) {
- if (res != NULL)
- *res = ins;
- return M_SUCCESS;
- }
- }
- return M_ERROR;
-}
+///
+/// PARSER FUNCTIONS
+/// parses each type of expression
+///
-static int parse_register(struct parser *parser, enum mips_register *reg)
+static int parse_directive_whb(struct parser *parser, void *data, uint32_t *res,
+ size_t length, size_t max_size)
{
struct token token;
- if (assert_token(parser, TOK_REG, &token))
- return M_ERROR;
-
- int len = strlen(token.text);
- int c0 = len > 0 ? token.text[0] : '\0',
- c1 = len > 1 ? token.text[1] : '\0',
- c2 = len > 2 ? token.text[2] : '\0',
- c3 = len > 3 ? token.text[3] : '\0';
-
- // $zero
- if (c0 == 'z') {
- if (c1 == 'e' && c2 == 'r' && c3 == 'o') {
- *reg = MIPS_REG_ZERO;
- return M_SUCCESS;
- }
- }
-
- // $a0-a3 $at
- else if (c0 == 'a') {
- if (c1 == 't') {
- *reg = MIPS_REG_AT;
- return M_SUCCESS;
- }
- if (c1 >= '0' && c1 <= '3') {
- *reg = MIPS_REG_A0;
- *reg += c1 - '0';
- return M_SUCCESS;
- }
- }
-
- // $v0-v1
- else if (c0 == 'v') {
- if (c1 >= '0' && c1 <= '1') {
- *reg = MIPS_REG_V0;
- *reg += c1 - '0';
- return M_SUCCESS;
- }
- }
-
- // $t0-t9
- else if (c0 == 't') {
- if (c1 >= '0' && c1 <= '7') {
- *reg = MIPS_REG_T0;
- *reg += c1 - '0';
- return M_SUCCESS;
- }
- // reg T8-T9 are not in order with T0-T7
- if (c1 >= '8' && c1 <= '9') {
- *reg = MIPS_REG_T8;
- *reg += c1 - '8';
- return M_SUCCESS;
- }
- }
-
- // $s0-s7 $sp
- else if (c0 == 's') {
- if (c1 >= '0' && c1 <= '7') {
- *reg = MIPS_REG_S0;
- *reg += c1 - '0';
- return M_SUCCESS;
- }
- if (c1 == 'p') {
- *reg = MIPS_REG_SP;
- return M_SUCCESS;
- }
- }
+ int len = 0;
- // $k0-k1
- else if (c0 == 'k') {
- if (c1 >= '0' && c1 <= '1') {
- *reg = MIPS_REG_K0;
- *reg += c1 - '0';
- return M_SUCCESS;
- }
- }
+ while (1) {
+ if (peek_eol(parser) == M_SUCCESS)
+ break;
- // $gp
- else if (c0 == 'g') {
- if (c1 == 'p') {
- *reg = MIPS_REG_GP;
- return M_SUCCESS;
- }
- }
+ if (assert_token(parser, TOK_NUMBER, &token))
+ return M_ERROR;
- // $fp
- else if (c0 == 'f') {
- if (c1 == 'p') {
- *reg = MIPS_REG_FP;
- return M_SUCCESS;
+ if ((uint64_t)token.number > max_size) {
+ ERROR_POS(token, "number cannot exceed max size of %zu",
+ max_size);
+ return M_ERROR;
}
- }
- // $rp
- else if (c0 == 'r') {
- if (c1 == 'a') {
- *reg = MIPS_REG_RA;
- return M_SUCCESS;
+ if (len >= MAX_ARG_LENGTH) {
+ ERROR_POS(token, "exceeded max argument length for "
+ "directives");
+ return M_ERROR;
}
- }
- // $0-31 (non aliased register names)
- else if (c0 >= '0' && c0 <= '9') {
- int i = c0 - '0';
- if (c1 >= '0' && c1 <= '9') {
- i *= 10;
- i += c1 - '0';
- }
- if (i <= 31) {
- *reg = i;
- return M_SUCCESS;
- }
+ // BUG: does this only work on little endian???
+ memcpy((uint8_t *) data + (len++ * length), &token.number,
+ max_size);
}
- ERROR_POS(token, "unknown register $%s", token.text);
- return M_ERROR;
-}
-
-static int get_reg_offset(struct parser *parser,
- struct ins_expr *expr)
-{
- struct token token;
- enum mips_register reg;
-
- struct mips_instruction *fi = &expr->ins[0];
- struct mips_instruction *si = &expr->ins[1]; // possibly pseudo
- struct reference *fr = &expr->ref[0];
- struct reference *sr = &expr->ref[1];
-
- expr->ins_len = 1;
- fr->type = R_MIPS_NONE;
-
-// =============================================
-
- // defaults
- fi->data.rs = MIPS_REG_ZERO;
- fi->data.immd = 0;
-
- if (peek_token(parser, &token))
- return M_ERROR;
-
- if (token.type == TOK_IDENT)
- goto label;
- else if (token.type == TOK_LPAREN)
- goto reg;
- else
- goto off;
-
-// =============================================
-
-label:
-
- next_token(parser, &token);
-
- expr->ins_len = 2;
-
- // move over first instruction to add in a LUI
- *si = *fi;
- si->data.rs = MIPS_REG_AT;
- si->data.offset = 0;
-
- // update LUI
- *fi = mips_instructions[MIPS_INS_LUI];
- fi->data.rt = MIPS_REG_AT;
- fi->data.immd = 0;
-
- // update references
- strcpy(fr->name, token.text);
- fr->type = R_MIPS_HI16;
- fr->addend = 0;
- strcpy(sr->name, token.text);
- sr->type = R_MIPS_LO16;
- sr->addend = 0;
-
- goto end;
-
-// =============================================
-
-off:
-
- if (assert_token(parser, TOK_NUMBER, &token))
- return M_ERROR;
- fi->data.immd = B16(token.number);
-
- if (peek_token(parser, &token))
- return M_ERROR;
-
- if (token.type == TOK_LPAREN)
- goto reg;
- else
- goto end;
-
-// =============================================
-
-reg:
- if (assert_token(parser, TOK_LPAREN, NULL))
- return M_ERROR;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- fi->data.rs = reg;
-
- if (assert_token(parser, TOK_RPAREN, NULL))
- return M_ERROR;
-
-// =============================================
-end:
- if (peek_token(parser, &token))
- return M_ERROR;
-
+ *res = len;
return M_SUCCESS;
}
-static int parse_number(struct parser *parser, uint32_t *n, uint32_t max)
+static int parse_immd(struct parser *parser, uint16_t *num)
{
struct token token;
if (assert_token(parser, TOK_NUMBER, &token))
return M_ERROR;
- if (max && token.number > max) {
- ERROR_POS(token, "number cannot be larger than '%d'", max);
- return M_ERROR;
- }
- *n = token.number;
- return M_SUCCESS;
-}
-
-static int parse_instruction_r(struct parser *parser,
- struct mips_instruction *ins)
-{
- // format: rs, rt, rd
- enum mips_register reg;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rd = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rs = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rt = reg;
-
- return M_SUCCESS;
-}
-
-static int parse_instruction_r2(struct parser *parser,
- struct mips_instruction *ins)
-{
- // format: rs, rt
- enum mips_register reg;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rs = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rt = reg;
-
- return M_SUCCESS;
-}
-
-static int parse_instruction_rs(struct parser *parser,
- struct mips_instruction *ins)
-{
- // format: rs
- enum mips_register reg;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rs = reg;
+ // TOK_NUMBER does not need to be freed
+ *num = token.number;
return M_SUCCESS;
}
-static int parse_instruction_rd(struct parser *parser,
- struct mips_instruction *ins)
+static int parse_ident(struct parser *parser, struct string *ident)
{
- // format: rd
- enum mips_register reg;
-
- if (parse_register(parser, &reg))
+ struct token token;
+ if (assert_token(parser, TOK_IDENT, &token))
return M_ERROR;
- ins->data.rd = reg;
-
+ string_move(ident, &token.string);
return M_SUCCESS;
}
-static int parse_instruction_i(struct parser *parser,
- struct mips_instruction *ins)
+static int parse_string(struct parser *parser, struct string *string)
{
- // format: rs, rt, immd
- enum mips_register reg;
struct token token;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rt = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rs = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (assert_token(parser, TOK_NUMBER, &token))
- return M_ERROR;
-
- if (token.number >= MAX16)
+ if (assert_token(parser, TOK_STRING, &token))
return M_ERROR;
- ins->data.immd = B16(token.number);
-
+ string_move(string, &token.string);
return M_SUCCESS;
}
-static int parse_instruction_offset(struct parser *parser,
- uint32_t max,
- struct mips_instruction *ins,
- struct reference *ref)
+/* parses a directive */
+static int parse_directive(struct parser *parser, struct string *name,
+ struct expr_directive *expr)
{
- int32_t n;
+ #define CHK(n) if (strcmp(name->str, #n) == 0)
- switch (max) {
- case MAX26:
- if (get_offset_26(parser, &n, ref))
- return M_ERROR;
- ins->data.offs26 = B32(n);
- break;
- case MAX16:
- if (get_offset(parser, &n, ref))
- return M_ERROR;
- ins->data.offset = B16(n);
- break;
- default:
- return M_ERROR;
+ CHK(.align) {
+ expr->type = EXPR_DIRECTIVE_ALIGN;
+ return parse_immd(parser, &expr->align);
+ } else CHK(.space) {
+ expr->type = EXPR_DIRECTIVE_SPACE;
+ return parse_immd(parser, &expr->space);
+ } else CHK(.word) {
+ expr->type = EXPR_DIRECTIVE_WORD;
+ return parse_directive_whb(parser, expr->words, &expr->len,
+ sizeof(uint32_t), UINT32_MAX);
+ } else CHK(.half) {
+ expr->type = EXPR_DIRECTIVE_HALF;
+ return parse_directive_whb(parser, expr->halfs, &expr->len,
+ sizeof(uint16_t), UINT16_MAX);
+ } else CHK(.byte) {
+ expr->type = EXPR_DIRECTIVE_BYTE;
+ return parse_directive_whb(parser, expr->bytes, &expr->len,
+ sizeof(uint8_t), UINT8_MAX);
+ } else CHK(.extern) {
+ expr->type = EXPR_DIRECTIVE_EXTERN;
+ return parse_ident(parser, &expr->label);
+ } else CHK(.globl) {
+ expr->type = EXPR_DIRECTIVE_GLOBL;
+ return parse_ident(parser, &expr->label);
+ } else CHK(.ascii) {
+ expr->type = EXPR_DIRECTIVE_ASCII;
+ return parse_string(parser, &expr->string);
+ } else CHK(.asciiz) {
+ expr->type = EXPR_DIRECTIVE_ASCIIZ;
+ return parse_string(parser, &expr->string);
+ } else {
+ expr->type = EXPR_DIRECTIVE_SECTION;
+ string_move(&expr->section, name);
+ return M_SUCCESS;
}
- return M_SUCCESS;
-}
-
-static int parse_instruction_j(struct parser *parser,
- struct mips_instruction *ins,
- struct reference *ref)
-{
- uint32_t n;
- if (get_target(parser, &n, ref) || n > MAX26)
- return M_ERROR;
- ins->data.target = n;
-
- return M_SUCCESS;
-}
-
-static int parse_instruction_branch_equal(struct parser *parser,
- struct mips_instruction *ins,
- struct reference *ref)
-{
- enum mips_register reg;
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rs = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rt = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- int32_t off;
- if (get_offset(parser, &off, ref))
- return M_ERROR;
- ins->data.offset = B16(off);
-
- return M_SUCCESS;
-}
-
-static int parse_instruction_branch(struct parser *parser,
- struct mips_instruction *ins,
- struct reference *ref)
-{
- enum mips_register reg;
- int32_t n;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rs = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (get_offset(parser, &n, ref))
- return M_ERROR;
- ins->data.offset = B16(n);
-
- return M_SUCCESS;
-}
-
-static int parse_instruction_sl(struct parser *parser,
- struct ins_expr *expr)
-{
- enum mips_register reg;
- struct mips_instruction *ins = &expr->ins[0];
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rt = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (get_reg_offset(parser, expr))
- return M_ERROR;
-
- return M_SUCCESS;
+ #undef CHK
}
-static int parse_instruction_sli(struct parser *parser,
- struct mips_instruction *ins)
+static int parse_constant(struct parser *parser, struct string *name,
+ struct expr_const *constant)
{
- enum mips_register reg;
- struct token token;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rt = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX16)
+ if (assert_token(parser, TOK_EQUAL, NULL))
return M_ERROR;
- ins->data.immd = B16(token.number);
- return M_SUCCESS;
-}
-
-static int parse_instruction_s(struct parser *parser,
- struct mips_instruction *ins)
-{
- enum mips_register reg;
struct token token;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rd = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rt = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX5)
+ if (assert_token(parser, TOK_NUMBER, &token))
return M_ERROR;
- ins->data.shamt = token.number;
+ string_move(&constant->name, name);
+ constant->num = token.number;
return M_SUCCESS;
}
-static int parse_instruction_sv(struct parser *parser,
- struct mips_instruction *ins)
+static int parse_offset(struct parser *parser,
+ struct expr_ins_arg *arg,
+ uint64_t immd)
{
- enum mips_register reg;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rd = reg;
+ // the immediate has already been parsed
+ // now parse (REG)
- if (assert_token(parser, TOK_COMMA, NULL))
+ if (assert_token(parser, TOK_LPAREN, NULL))
return M_ERROR;
- if (parse_register(parser, &reg))
+ struct token token;
+ if (assert_token(parser, TOK_REG, &token))
return M_ERROR;
- ins->data.rt = reg;
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
+ // set values
+ string_move(&arg->offset.reg, &token.string);
+ arg->offset.immd = immd;
- if (parse_register(parser, &reg))
+ if (assert_token(parser, TOK_RPAREN, NULL)) {
+ string_free(&arg->offset.reg);
return M_ERROR;
- ins->data.rs = reg;
+ }
return M_SUCCESS;
}
-static int parse_pseudo_li(struct parser *parser, struct ins_expr *expr)
+static int parse_instruction_arg(struct parser *parser,
+ struct expr_ins_arg *arg)
{
- enum mips_register reg;
- uint32_t immd;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
+ // allowed token matches:
+ //
+ // register:
+ // REG
+ //
+ // label:
+ // IDENT
+ //
+ // immediate:
+ // IMMD
+ //
+ // offset:
+ // (REG)
+ // IMMD(REG)
- if (parse_number(parser, &immd, MAX16))
- return M_ERROR;
-
- expr->ins_len = 1;
- expr->ins[0] = mips_instructions[MIPS_INS_ORI];
- expr->ins[0].data.rt = reg;
- expr->ins[0].data.rs = MIPS_REG_ZERO;
- expr->ins[0].data.immd = B16(immd);
- expr->ref[0].type = R_MIPS_NONE;
-
- return M_SUCCESS;
-}
-
-static int parse_pseudo_la(struct parser *parser, struct ins_expr *expr)
-{
- enum mips_register reg;
struct token token;
-
- uint16_t hi = 0, lo = 0;
-
- if (parse_register(parser, &reg))
+ if (peek_token(parser, &token))
return M_ERROR;
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
+ // if its a left paren, were parsing
+ // an offset
+ if (token.type == TOK_LPAREN) {
+ arg->type = EXPR_INS_ARG_OFFSET;
+ return parse_offset(parser, arg, 0);
+ }
+ // token must now be either a number (immediate)
+ // register, or label,,,
+ // ... take ownership of the next token
if (next_token(parser, &token))
return M_ERROR;
- if (token.type == TOK_IDENT) {
- expr->ref[0].type = R_MIPS_HI16;
- expr->ref[0].addend = 0;
- strcpy(expr->ref[0].name, token.text);
- expr->ref[1].type = R_MIPS_LO16;
- expr->ref[1].addend = 0;
- strcpy(expr->ref[1].name, token.text);
- } else if (token.type == TOK_NUMBER && token.number > MAX32) {
- hi = token.number >> 16;
- lo = token.number & 0x0000ffff;
- expr->ref[0].type = R_MIPS_NONE;
- expr->ref[1].type = R_MIPS_NONE;
- } else {
- return M_ERROR;
+ // if its a register... return
+ // there are no other pathways
+ if (token.type == TOK_REG) {
+ arg->type = EXPR_INS_ARG_REGISTER;
+ string_move(&arg->reg, &token.string);
+ return M_SUCCESS;
}
- expr->ins_len = 2;
- expr->ins[0] = mips_instructions[MIPS_INS_LUI];
- expr->ins[0].data.rt = reg;
- expr->ins[0].data.immd = B16(hi);
- expr->ins[1] = mips_instructions[MIPS_INS_ADDI];
- expr->ins[1].data.rt = reg;
- expr->ins[1].data.rs = reg;
- expr->ins[1].data.immd = B16(lo);
-
- return M_SUCCESS;
-}
-
-static int parse_pseudo_move(struct parser *parser, struct ins_expr *expr)
-{
- enum mips_register rd, rs;
-
- if (parse_register(parser, &rd))
- return M_ERROR;
+ // if it is a label... return
+ // therea are no other pathways
+ if (token.type == TOK_IDENT) {
+ arg->type = EXPR_INS_ARG_LABEL;
+ string_move(&arg->label, &token.string);
+ return M_SUCCESS;
+ }
- if (assert_token(parser, TOK_COMMA, NULL))
+ // now it must be a number...
+ // throw a error if its now
+ if (token.type != TOK_NUMBER) {
+ ERROR_POS(token, "expected number, got %s",
+ token_str(token.type));
+ token_free(&token);
return M_ERROR;
+ }
- if (parse_register(parser, &rs))
+ uint64_t immd = (uint64_t)token.number;
+ // now if the next token is a lparen
+ // parse offset, else return immd
+ if (peek_token(parser, &token))
return M_ERROR;
- expr->ins_len = 1;
- expr->ins[0] = mips_instructions[MIPS_INS_OR];
- expr->ins[0].data.rs = rs;
- expr->ins[0].data.rt = MIPS_REG_ZERO;
- expr->ins[0].data.rd = rd;
- expr->ref[0].type = R_MIPS_NONE;
-
- return M_SUCCESS;
-}
-
-static int parse_pseudo_nop(struct parser *parser, struct ins_expr *expr)
-{
- (void) parser;
-
- expr->ins_len = 1;
- expr->ins[0] = mips_instructions[MIPS_INS_SLL];
- expr->ref[0].type = R_MIPS_NONE;
-
- return M_SUCCESS;
-}
-
-static int parse_pseudo_instruction(struct parser *parser,
- struct ins_expr *expr,
- struct token ident)
-{
- // disablle logging in the logging
- // module
- extern int log_disabled;
- log_disabled = 1;
-
- int res = M_ERROR;
-
- struct lexer_state state;
- lexer_save(parser->lexer, &state);
-
- #define CHK(name) if (strcmp(ident.text, #name) == 0)
-
- CHK(li)
- res = parse_pseudo_li(parser, expr);
- else CHK(la)
- res = parse_pseudo_la(parser, expr);
- else CHK(move)
- res = parse_pseudo_move(parser, expr);
- else CHK(nop)
- res = parse_pseudo_nop(parser, expr);
-
- #undef CHK
-
- if (res) {
- // reset on fail
- lexer_load(parser->lexer, &state);
- expr->ins[0].data.raw = 0;
- expr->ins[1].data.raw = 0;
- expr->ref[0] = (struct reference) {0};
- expr->ref[1] = (struct reference) {0};
+ if (token.type == TOK_LPAREN) {
+ arg->type = EXPR_INS_ARG_OFFSET;
+ return parse_offset(parser, arg, immd);
+ } else {
+ arg->type = EXPR_INS_ARG_IMMEDIATE;
+ arg->immd = immd;
+ return M_SUCCESS;
}
- log_disabled = 0;
- return res;
}
-static int parse_instruction(struct parser *parser,
- struct ins_expr *expr,
- struct token ident)
+static int parse_instruction(struct parser *parser, struct string *name,
+ struct expr_ins *ins)
{
- struct mips_instruction instruction;
- enum mips_parse_format format;
- int res = M_SUCCESS;
-
- if (parse_pseudo_instruction(parser, expr, ident) == M_SUCCESS)
- return M_SUCCESS;
-
- if (get_instruction(ident.text, &instruction)) {
- ERROR_POS(ident, "unknown instruction '%s'", ident.text);
- return M_ERROR;
- }
-
- struct mips_instruction *ins = &expr->ins[0];
- struct reference *ref = &expr->ref[0];
-
- // this will only ever generate one instruction
- expr->ins_len = 1;
- *ins = instruction;
- ref->type = R_MIPS_NONE;
-
- format = mips_parse_formats[instruction.type];
- switch (format) {
- case MIPS_PARSE_R:
- res = parse_instruction_r(parser, ins);
- break;
- case MIPS_PARSE_R2:
- res = parse_instruction_r2(parser, ins);
- break;
- case MIPS_PARSE_RS:
- res = parse_instruction_rs(parser, ins);
- break;
- case MIPS_PARSE_RD:
- res = parse_instruction_rd(parser, ins);
- break;
- case MIPS_PARSE_I:
- res = parse_instruction_i(parser, ins);
- break;
- case MIPS_PARSE_J:
- res = parse_instruction_j(parser, ins, ref);
- break;
- case MIPS_PARSE_O16:
- res = parse_instruction_offset(parser, MAX16, ins, ref);
- break;
- case MIPS_PARSE_O26:
- res = parse_instruction_offset(parser, MAX26, ins, ref);
- break;
- case MIPS_PARSE_BE:
- res = parse_instruction_branch_equal(parser, ins, ref);
- break;
- case MIPS_PARSE_BZ:
- res = parse_instruction_branch(parser, ins, ref);
- break;
- case MIPS_PARSE_SL:
- res = parse_instruction_sl(parser, expr);
- break;
- case MIPS_PARSE_SLI:
- res = parse_instruction_sli(parser, ins);
- break;
- case MIPS_PARSE_S:
- res = parse_instruction_s(parser, ins);
- break;
- case MIPS_PARSE_SV:
- res = parse_instruction_sv(parser, ins);
- break;
- case MIPS_PARSE_NONE:
- res = M_SUCCESS;
- break;
- }
+ int len = 0;
- if (res == M_SUCCESS && assert_eol(parser))
- return M_ERROR;
+ if (peek_eol(parser) == M_SUCCESS)
+ goto skip_args;
- return res;
-}
+ while (1) {
+ if (len >= MAX_ARG_LENGTH) {
+ ERROR_POS(parser->lexer,
+ "reached max argument length");
+ return M_ERROR;
+ }
-static int parse_directive_align(struct parser *parser,
- struct mips_directive *directive)
-{
- struct token token;
- if (assert_token(parser, TOK_NUMBER, &token))
- return M_ERROR;
+ if (parse_instruction_arg(parser, &ins->args[len++]))
+ return M_ERROR;
- if (token.number < 0) {
- ERROR_POS(token, "cannot align negative");
- return M_ERROR;
- }
+ if (peek_eol(parser) == M_SUCCESS)
+ break;
- if (token.number > MAX16) {
- ERROR_POS(token, "cannot align more than 65kb");
- return M_ERROR;
+ if (assert_token(parser, TOK_COMMA, NULL))
+ return M_ERROR;
}
- directive->type = MIPS_DIRECTIVE_ALIGN;
- directive->align = token.number;
+skip_args:
+ string_move(&ins->name, name);
+ ins->args_len = len;
return M_SUCCESS;
}
-static int parse_directive_space(struct parser *parser,
- struct mips_directive *directive)
+/* gets the next value from the parser */
+int parser_next(struct parser *parser, struct expr *expr)
{
- struct token token;
- if (assert_token(parser, TOK_NUMBER, &token))
- return M_ERROR;
+ // the next token being looked at
+ struct token token = {
+ .type = TOK_NL
+ };
- if (token.number < 0) {
- ERROR_POS(token, "cannot reserve negative");
- return M_ERROR;
- }
+ // the result to return
+ int res = M_SUCCESS;
- if (token.number > MAX16) {
- ERROR_POS(token, "cannot reserve more than 65kb");
- return M_ERROR;
+ // skip all new lines
+ while (1) {
+ if (next_token(parser, &token))
+ return M_ERROR;
+ if (token.type != TOK_NL)
+ break;
+ token_free(&token);
}
- directive->type = MIPS_DIRECTIVE_SPACE;
- directive->space = token.number;
-
- return M_SUCCESS;
-}
+ expr->line_no = parser->lexer.y;
+ expr->byte_start = token.off;
+ expr->byte_end = token.off;
-static int parse_directive_whb(struct parser *parser,
- struct mips_directive *directive,
- enum mips_directive_type type)
-{
- struct token token;
- uint32_t size = 0;
- uint32_t len = 0;
+ // if EOF, return M_EOF
+ if (token.type == TOK_EOF)
+ return M_EOF;
- switch (type) {
- case MIPS_DIRECTIVE_WORD:
- size = UINT32_MAX;
- break;
- case MIPS_DIRECTIVE_HALF:
- size = UINT16_MAX;
- break;
- case MIPS_DIRECTIVE_BYTE:
- size = UINT8_MAX;
- break;
- default:
+ // when a ident ends with a colon
+ // parse a lebel
+ else if (token.type == TOK_LABEL) {
+ expr->type = EXPR_LABEL;
+ // label now owns string
+ string_move(&expr->label, &token.string);
}
- directive->type = type;
-
- while (1) {
- if (assert_token(parser, TOK_NUMBER, &token))
- return M_ERROR;
+ // when a ident starts with a dot
+ // parse a directive
+ else if (token.type == TOK_DIRECTIVE) {
+ expr->type = EXPR_DIRECTIVE;
+ res = parse_directive(parser, &token.string, &expr->directive);
+ }
- if (len >= MAX_ARG_LENGTH) {
- ERROR_POS(token, "directives cannot be longer than "
- "%d arguments", MAX_ARG_LENGTH);
+ // peek the next token:
+ // 1. = means parse constant
+ // 2. else parse instruction
+ else {
+ if (token.type != TOK_IDENT) {
+ ERROR_POS(token, "expected ident, got %s",
+ token_str(token.type));
+ token_free(&token);
return M_ERROR;
}
- if (token.number > size) {
- ERROR_POS(token, "number cannot execede max size of: "
- "%d", size);
+ struct token peek;
+ if (peek_token(parser, &peek)) {
+ token_free(&token);
return M_ERROR;
}
- switch (type) {
- case MIPS_DIRECTIVE_WORD:
- directive->words[len++] = token.number;
-
- break;
- case MIPS_DIRECTIVE_HALF:
- directive->halfs[len++] = token.number;
- break;
- case MIPS_DIRECTIVE_BYTE:
- directive->bytes[len++] = token.number;
- break;
- default:
- }
-
- if (peek_token(parser, &token))
- return M_ERROR;
-
- if (token.type == TOK_COMMA) {
- next_token(parser, NULL);
- continue;
+ if (peek.type == TOK_EQUAL) {
+ expr->type = EXPR_CONSTANT;
+ res = parse_constant(parser, &token.string,
+ &expr->constant);
+ } else {
+ expr->type = EXPR_INS;
+ res = parse_instruction(parser, &token.string,
+ &expr->instruction);
}
-
- break;
}
- directive->len = len;
+ // update byte end for expr
+ expr->byte_end = ftell(parser->lexer.file);
- return M_SUCCESS;
-}
-
-static int parse_directive_extern(struct parser *parser,
- struct mips_directive *directive)
-{
- struct token token;
- if (assert_token(parser, TOK_IDENT, &token))
- return M_ERROR;
-
- directive->type = MIPS_DIRECTIVE_EXTERN;
- strcpy(directive->name, token.text);
+ // free tokens
+ token_free(&token);
- return M_SUCCESS;
-}
-
-static int parse_directive_globl(struct parser *parser,
- struct mips_directive *directive)
-{
- struct token token;
- if (assert_token(parser, TOK_IDENT, &token))
+ // everything must end in a new line
+ if (res == M_SUCCESS && assert_eol(parser))
return M_ERROR;
- directive->type = MIPS_DIRECTIVE_GLOBL;
- strcpy(directive->name, token.text);
-
- return M_SUCCESS;
+ return res;
}
-static int parse_directive_ascii(struct parser *parser,
- struct mips_directive *directive,
- enum mips_directive_type type)
+int parser_init(const char *file, struct parser *parser)
{
- struct token token;
- if (assert_token(parser, TOK_STRING, &token))
+ parser->peek.type = TOK_EOF;
+ if (lexer_init(file, &parser->lexer))
return M_ERROR;
-
- directive->type = type;
- strcpy(directive->name, token.text);
-
- return M_SUCCESS;
-}
-
-static int parse_section(struct mips_directive *directive,
- char name[MAX_LEX_LENGTH])
-{
- directive->type = MIPS_DIRECTIVE_SECTION;
- strcpy(directive->name, name);
-
return M_SUCCESS;
}
-static int parse_directive(struct parser *parser,
- struct mips_directive *directive)
-{
- struct token token;
- if (assert_token(parser, TOK_DIRECTIVE, &token))
- return M_ERROR;
-
- // .align n
- if (strcmp(token.text, "align") == 0)
- return parse_directive_align(parser, directive);
- else if (strcmp(token.text, "space") == 0)
- return parse_directive_space(parser, directive);
- else if (strcmp(token.text, "word") == 0)
- return parse_directive_whb(parser, directive,
- MIPS_DIRECTIVE_WORD);
- else if (strcmp(token.text, "half") == 0)
- return parse_directive_whb(parser, directive,
- MIPS_DIRECTIVE_HALF);
- else if (strcmp(token.text, "byte") == 0)
- return parse_directive_whb(parser, directive,
- MIPS_DIRECTIVE_BYTE);
- else if (strcmp(token.text, "extern") == 0)
- return parse_directive_extern(parser, directive);
- else if (strcmp(token.text, "globl") == 0)
- return parse_directive_globl(parser, directive);
- else if (strcmp(token.text, "ascii") == 0)
- return parse_directive_ascii(parser, directive,
- MIPS_DIRECTIVE_ASCII);
- else if (strcmp(token.text, "asciiz") == 0)
- return parse_directive_ascii(parser, directive,
- MIPS_DIRECTIVE_ASCIIZ);
- else
- return parse_section(directive, token.text);
-}
-static int parse_constant(struct parser *parser, struct const_expr *expr,
- struct token ident)
+void parser_free(struct parser *parser)
{
- struct token number;
-
- if (assert_token(parser, TOK_EQUAL, NULL))
- return M_ERROR;
-
- if (assert_token(parser, TOK_NUMBER, &number))
- return M_ERROR;
-
- strcpy(expr->name,ident.text);
- expr->value = number.number;
-
- return M_SUCCESS;
+ token_free(&parser->peek);
+ lexer_free(&parser->lexer);
}
-static int parser_handle_ident(struct parser *parser, struct expr *expr)
+static inline void expr_directive_free(struct expr_directive *dir)
{
- struct token ident;
- struct token peek;
-
- if (assert_token(parser, TOK_IDENT, &ident))
- return M_ERROR;
-
- if (peek_token(parser, &peek))
- return M_ERROR;
-
- if (peek.type == TOK_EQUAL) {
- expr->type = EXPR_CONSTANT;
- return parse_constant(parser, &expr->constant, ident);
- } else {
- expr->type = EXPR_INS;
- return parse_instruction(parser, &expr->ins, ident);
+ switch (dir->type) {
+ case EXPR_DIRECTIVE_SECTION:
+ string_free(&dir->section);
+ break;
+ case EXPR_DIRECTIVE_EXTERN:
+ case EXPR_DIRECTIVE_GLOBL:
+ string_free(&dir->label);
+ break;
+ case EXPR_DIRECTIVE_ASCII:
+ case EXPR_DIRECTIVE_ASCIIZ:
+ string_free(&dir->string);
+ break;
+ default:
}
}
-
-static int parse_label(struct parser *parser,
- struct expr *expr)
-{
- struct token token;
-
- if (assert_token(parser, TOK_LABEL, &token))
- return M_ERROR;
- strcpy(expr->label, token.text);
-
- return M_SUCCESS;
-}
-
-
-int parser_next(struct parser *parser, struct expr *expr)
+static inline void expr_ins_arg_free(struct expr_ins_arg *arg)
{
- struct token token;
- int res = M_SUCCESS;
-
-again:
- if (peek_token(parser, &token))
- return M_ERROR;
-
- switch (token.type) {
- case TOK_NL:
- next_token(parser, NULL);
- goto again;
-
- case TOK_EOF:
- res = M_EOF;
- break;
-
- case TOK_LABEL:
- expr->type = EXPR_LABEL;
- res = parse_label(parser, expr);
- break;
-
- case TOK_DIRECTIVE:
- expr->type = EXPR_DIRECTIVE;
- res = parse_directive(parser, &expr->directive);
- break;
-
- case TOK_IDENT:
- res = parser_handle_ident(parser, expr);
- break;
-
- default:
- ERROR_POS(token, "unexpected token '%s'",
- token_str(token.type));
- return M_ERROR;
-
+ switch (arg->type) {
+ case EXPR_INS_ARG_REGISTER:
+ string_free(&arg->reg);
+ break;
+ case EXPR_INS_ARG_IMMEDIATE:
+ break;
+ case EXPR_INS_ARG_LABEL:
+ string_free(&arg->label);
+ break;
+ case EXPR_INS_ARG_OFFSET:
+ string_free(&arg->offset.reg);
+ break;
}
-
- return res;
-}
-
-int parser_init(struct lexer *lexer, struct parser *parser)
-{
- parser->lexer = lexer;
- parser->peek.type = TOK_EOF;
- return M_SUCCESS;
}
-
-void parser_free(struct parser *parser)
+void expr_free(struct expr *expr)
{
- (void) parser;
+ switch (expr->type) {
+ case EXPR_DIRECTIVE:
+ expr_directive_free(&expr->directive);
+ break;
+ case EXPR_CONSTANT:
+ string_free(&expr->constant.name);
+ break;
+ case EXPR_INS:
+ string_free(&expr->instruction.name);
+ for (uint32_t i = 0; i < expr->instruction.args_len; i++)
+ expr_ins_arg_free(&expr->instruction.args[i]);
+ break;
+ case EXPR_LABEL:
+ string_free(&expr->label);
+ break;
+ }
}
diff --git a/masm/parse.h b/masm/parse.h
index 9e0e928..61036cd 100644
--- a/masm/parse.h
+++ b/masm/parse.h
@@ -6,89 +6,147 @@
#include "lex.h"
#include <mlimits.h>
-#include <mips.h>
#include <stdint.h>
-/* mips directive types */
-enum mips_directive_type {
- MIPS_DIRECTIVE_ALIGN,
- MIPS_DIRECTIVE_SPACE,
- MIPS_DIRECTIVE_WORD,
- MIPS_DIRECTIVE_HALF,
- MIPS_DIRECTIVE_BYTE,
- MIPS_DIRECTIVE_SECTION,
- MIPS_DIRECTIVE_EXTERN,
- MIPS_DIRECTIVE_GLOBL,
- MIPS_DIRECTIVE_ASCII,
- MIPS_DIRECTIVE_ASCIIZ,
+/// the type to a direcive
+enum expr_directive_type {
+ EXPR_DIRECTIVE_ALIGN,
+ EXPR_DIRECTIVE_SPACE,
+ EXPR_DIRECTIVE_WORD,
+ EXPR_DIRECTIVE_HALF,
+ EXPR_DIRECTIVE_BYTE,
+ EXPR_DIRECTIVE_SECTION,
+ EXPR_DIRECTIVE_EXTERN,
+ EXPR_DIRECTIVE_GLOBL,
+ EXPR_DIRECTIVE_ASCII,
+ EXPR_DIRECTIVE_ASCIIZ,
};
-/* mip32 directive */
-struct mips_directive {
- enum mips_directive_type type;
- uint32_t len; // used for words, halfs, bytes
+/// holds a directive
+struct expr_directive {
+ // the type of the directive
+ enum expr_directive_type type;
+
+ // lengh of .word, .half, or .byte directive
+ uint32_t len;
+
+ // directive data
union {
+ // e.g. align 2
uint16_t align;
+ // e.g. space 4096
uint16_t space;
+ // e.g. .word 0x1 0x2
uint32_t words[MAX_ARG_LENGTH];
uint16_t halfs[MAX_ARG_LENGTH];
- uint8_t bytes[MAX_ARG_LENGTH];
- char name[MAX_ARG_LENGTH];
+ uint8_t bytes[MAX_ARG_LENGTH];
+ // e.g. .ascii "hello world!"
+ struct string string;
+ // e.g. .globl main
+ struct string label;
+ // e.g. .text
+ struct string section;
};
};
-struct reference {
- // ELF relocate type
- unsigned char type;
+/// holds a constant expression
+struct expr_const {
+ // the name of the constant
+ struct string name;
+
+ // the value of the constant
+ uint32_t num;
+};
+
+/// the type to a right
+/// hand side argument to an
+/// instruction
+enum expr_ins_arg_type {
+ // e.g. $ra
+ EXPR_INS_ARG_REGISTER,
+
+ // e.g. 0x80
+ EXPR_INS_ARG_IMMEDIATE,
- /// symbol name
- char name[MAX_LEX_LENGTH];
+ // e.g. main
+ EXPR_INS_ARG_LABEL,
- /// integer addend
- int64_t addend;
+ // e.g. 4($sp)
+ EXPR_INS_ARG_OFFSET,
};
-struct const_expr {
- char name[MAX_LEX_LENGTH];
- uint32_t value;
+/// a right hand argument
+/// to an instruction
+struct expr_ins_arg {
+ enum expr_ins_arg_type type;
+
+ union {
+ // register
+ struct string reg;
+
+ // immediate
+ uint64_t immd;
+
+ // label
+ struct string label;
+
+ // offset
+ struct expr_ins_offset {
+ // immediate
+ uint64_t immd;
+ // register
+ struct string reg;
+ } offset;
+ };
};
-struct ins_expr {
+/// holds a instruction
+struct expr_ins {
/// pesudo instructions can return
/// more than one instruction
- size_t ins_len;
- struct mips_instruction ins[2];
+ struct string name;
- /// instructions can reference symbols.
- /// instruction `n` will be paried with reference `n`
- struct reference ref[2];
+ // the arguments of the instruction
+ uint32_t args_len;
+ struct expr_ins_arg args[MAX_ARG_LENGTH];
};
enum expr_type {
+ // e.g. .align 2
EXPR_DIRECTIVE,
+ // e.g. SIZE = 8
EXPR_CONSTANT,
+ // e.g. li $t0, 17
EXPR_INS,
+ // e.g. _start:
EXPR_LABEL,
};
struct expr {
enum expr_type type;
+
+ uint32_t line_no;
+ uint32_t byte_start;
+ uint32_t byte_end;
+
union {
// directive
- struct mips_directive directive;
+ struct expr_directive directive;
// constant
- struct const_expr constant;
+ struct expr_const constant;
// instruction
- struct ins_expr ins;
+ struct expr_ins instruction;
// label
- char label[MAX_LEX_LENGTH];
+ struct string label;
};
};
+void expr_free(struct expr *expr);
+
struct parser {
// the lexer
// *weak* ponter, we do not own this
- struct lexer *lexer;
+ struct lexer lexer;
// the last token peeked
struct token peek;
};
@@ -97,7 +155,7 @@ struct parser {
int parser_next(struct parser *parser, struct expr *expr);
/* initalize the base parser */
-int parser_init(struct lexer *lexer, struct parser *parser);
+int parser_init(const char *file, struct parser *parser);
/* free the base parser */
void parser_free(struct parser *parser);
diff --git a/masm/reftab.c b/masm/reftab.c
new file mode 100644
index 0000000..f8793e1
--- /dev/null
+++ b/masm/reftab.c
@@ -0,0 +1,43 @@
+#include <stdlib.h>
+#include <merror.h>
+
+#include "tab.h"
+
+#define REFTAB_INIT_LEN 8
+
+int reftab_init(struct reference_table *reftab)
+{
+ reftab->size = REFTAB_INIT_LEN;
+ reftab->len = 0;
+ reftab->references = malloc(sizeof(struct reference)
+ * REFTAB_INIT_LEN);
+
+ if (reftab->references == NULL) {
+ PERROR("cannot alloc");
+ return M_ERROR;
+ }
+
+ return M_SUCCESS;
+}
+
+void reftab_free(struct reference_table *reftab)
+{
+ free(reftab->references);
+}
+
+int reftab_push(struct reference_table *reftab, struct reference *ref)
+{
+ if (reftab->len >= reftab->size) {
+ reftab->size *= 2;
+ reftab->references = realloc(reftab->references,
+ sizeof(struct reference) * reftab->size);
+
+ if (reftab->references == NULL) {
+ PERROR("cannot realloc");
+ return M_ERROR;
+ }
+ }
+
+ reftab->references[reftab->len++] = *ref;
+ return M_SUCCESS;
+}
diff --git a/masm/reltab.c b/masm/reltab.c
deleted file mode 100644
index afbd5e7..0000000
--- a/masm/reltab.c
+++ /dev/null
@@ -1,43 +0,0 @@
-#include <elf.h>
-#include <stdlib.h>
-#include <merror.h>
-
-#include "asm.h"
-
-#define RELTAB_INIT_LEN 8
-
-int reltab_init(struct relocation_table *reltab)
-{
- reltab->size = RELTAB_INIT_LEN;
- reltab->len = 0;
- reltab->data = malloc(sizeof(Elf32_Rela) * RELTAB_INIT_LEN);
-
- if (reltab->data == NULL) {
- PERROR("cannot alloc");
- return M_ERROR;
- }
-
- return M_SUCCESS;
-}
-
-void reltab_free(struct relocation_table *reltab)
-{
- free(reltab->data);
-}
-
-int reltab_push(struct relocation_table *reltab, const Elf32_Rela rel)
-{
- if (reltab->len >= reltab->size) {
- reltab->size *= 2;
- reltab->data = realloc(reltab->data, sizeof(Elf32_Rela)
- * reltab->size);
-
- if (reltab->data == NULL) {
- PERROR("cannot realloc");
- return M_ERROR;
- }
- }
-
- reltab->data[reltab->len++] = rel;
- return M_SUCCESS;
-}
diff --git a/masm/sectab.c b/masm/sectab.c
deleted file mode 100644
index caf34dd..0000000
--- a/masm/sectab.c
+++ /dev/null
@@ -1,166 +0,0 @@
-#include <string.h>
-#include <stdlib.h>
-#include <mips.h>
-#include <merror.h>
-#include <mlimits.h>
-
-#include "asm.h"
-
-#define SECTBL_INIT_LEN 8
-static const char inital_section[MAX_LEX_LENGTH] = "data";
-
-int sectab_init(struct section_table *sectab)
-{
- sectab->size = SECTBL_INIT_LEN;
- sectab->len = 0;
- sectab->sections = malloc(sizeof(struct section) * SECTBL_INIT_LEN);
-
- if (sectab->sections == NULL) {
- PERROR("cannot alloc");
- return M_ERROR;
- }
-
- if (sectab_alloc(sectab, &sectab->current, inital_section))
- return M_ERROR;
-
- return M_SUCCESS;
-}
-
-void sectab_free(struct section_table *sectab)
-{
- for (size_t i = 0; i < sectab->len; i++) {
- reltab_free(&sectab->sections[i].reltab);
- free(sectab->sections[i].entries);
- }
- free(sectab->sections);
-}
-
-struct section_settings {
- const char *name;
- bool read;
- bool write;
- bool execute;
- size_t align;
-};
-
-static struct section_settings default_section_settings[] = {
- {"data", true, true, false, 1},
- {"bss", true, true, false, 1},
- {"rodata", true, false, false, 1},
- {"text", true, false, true, 4},
-};
-
-int sectab_alloc(struct section_table *sectab, struct section **res,
- const char name[MAX_LEX_LENGTH])
-{
- if (sectab->len >= sectab->size) {
- sectab->size *= 2;
- sectab->sections = realloc(sectab->sections,
- sizeof(struct section) * sectab->size);
-
- if (sectab->sections == NULL) {
- PERROR("cannot realloc");
- return M_ERROR;
- }
- }
-
- /* set the sectio defaults */
- struct section *sec;
- sec = &sectab->sections[sectab->len];
- strcpy(sec->name,name);
- sec->len = 0;
- sec->size = SECTBL_INIT_LEN;
- sec->alignment = 1;
- sec->read = true;
- sec->write = true;
- sec->execute = false;
- sec->index = sectab->len;
- sec->entries = malloc(sizeof(struct section_entry) * SECTBL_INIT_LEN);
-
- if (reltab_init(&sec->reltab))
- return M_ERROR;
-
- /* overwrite the default if the given name has their own
- * defaults */
- for (int i = 0; i < 4; i++) {
- struct section_settings *set = &default_section_settings[i];
- if (strcmp(set->name, name) == 0) {
- sec->read = set->read;
- sec->write = set->write;
- sec->execute = set->execute;
- sec->alignment = set->align;
- break;
- }
- }
-
- if (sec->entries == NULL) {
- PERROR("cannot alloc");
- return M_ERROR;
- }
-
- sectab->len++;
-
- *res = sec;
- return M_SUCCESS;
-}
-
-int sectab_get(struct section_table *sectab, struct section **sec,
- const char name[MAX_LEX_LENGTH])
-{
- for (size_t i = 0; i < sectab->len; i++) {
- struct section *temp = &sectab->sections[i];
- if (strcmp(name, temp->name) == 0) {
- if (sec != NULL)
- *sec = temp;
- return M_SUCCESS;
- }
- }
-
- return M_ERROR;
-}
-
-int sec_push(struct section *section, struct section_entry entry)
-{
- if (section->len >= section->size) {
- section->size *= 2;
- void *new = realloc(section->entries,
- sizeof(struct section_entry) * section->size);
-
- if (new == NULL) {
- PERROR("cannot realloc");
- return M_ERROR;
- }
-
- section->entries = new;
- }
-
- section->entries[section->len++] = entry;
-
- return M_SUCCESS;
-}
-
-size_t sec_size(struct section *sec)
-{
- size_t n = 0;
- for (size_t i = 0; i < sec->len; i++) {
- size_t t = sec->entries[i].size;
- size_t m = t % sec->alignment;
- if (m)
- t += sec->alignment - m;
- n += t;
- }
- return n;
-}
-
-size_t sec_index(struct section *sec, size_t idx)
-{
- size_t n = 0;
- for (size_t i = 0; i < idx; i++) {
- size_t t = sec->entries[i].size;
- size_t m = t % sec->alignment;
- if (m)
- t += sec->alignment - m;
- n += t;
- }
- return n;
-}
diff --git a/masm/string.c b/masm/string.c
new file mode 100644
index 0000000..c05e182
--- /dev/null
+++ b/masm/string.c
@@ -0,0 +1,81 @@
+#include <merror.h>
+#include <stdlib.h>
+
+#include "lex.h"
+
+/* init a empty string buffer */
+inline void string_init(struct string *string)
+{
+ string->len = 0;
+ string->size = 0;
+ string->allocated = true;
+ string->str = NULL;
+}
+
+/* free a string buffer */
+inline void string_free(struct string *string)
+{
+ if (string->allocated && string->str)
+ free(string->str);
+}
+
+/* clone a string buffer */
+inline int string_clone(struct string *dst, const struct string *const src)
+{
+ dst->len = src->len;
+ dst->size = src->len;
+ dst->allocated = src->allocated;
+
+ /// bss strings do not need to be
+ /// malloced or copied
+ if (src->allocated == false) {
+ dst->str = src->str;
+ return M_SUCCESS;
+ }
+
+ dst->str = malloc(sizeof(char) * src->len);
+ if (dst->str == NULL) {
+ PERROR("cannot alloc");
+ return M_ERROR;
+ }
+ memcpy(dst->str, src->str, sizeof(char) * src->len);
+ return M_SUCCESS;
+}
+
+/* moves a string */
+inline void string_move(struct string *dst, struct string *src)
+{
+ dst->len = src->len;
+ dst->size = src->len;
+ dst->allocated = src->allocated;
+ dst->str = src->str;
+
+ // delete ptr in src
+ src->str = NULL;
+}
+
+/* pushes a char onto a string */
+int string_push(struct string *string, char c)
+{
+ if (string->len >= string->size) {
+ int len = string->size ? string->size * 2 : 8;
+ char *new = realloc(string->str, sizeof(char) + len);
+ if (new == NULL) {
+ PERROR("cannot realloc");
+ return M_ERROR;
+ }
+ string->size = len;
+ string->str = new;
+ }
+ string->str[string->len++] = c;
+ return M_SUCCESS;
+}
+
+void string_bss(struct string *string, char *src)
+{
+ int len = strlen(src);
+ string->str = src;
+ string->len = len;
+ string->size = len;
+ string->allocated = false;
+}
diff --git a/masm/strtab.c b/masm/strtab.c
index 404ea73..bd914b0 100644
--- a/masm/strtab.c
+++ b/masm/strtab.c
@@ -4,7 +4,7 @@
#include "asm.h"
-int strtab_get_str(struct str_table *strtab, const char *str, size_t *res)
+int strtab_get_str(struct elf_str_table *strtab, const char *str, size_t *res)
{
for (size_t i = 0; i < strtab->size; i ++) {
if (strcmp(strtab->ptr + i, str) == 0) {
@@ -17,7 +17,7 @@ int strtab_get_str(struct str_table *strtab, const char *str, size_t *res)
return M_ERROR;
}
-int strtab_write_str(struct str_table *strtab, const char *str, size_t *res)
+int strtab_write_str(struct elf_str_table *strtab, const char *str, size_t *res)
{
if (strtab_get_str(strtab, str, res) == M_SUCCESS)
return M_SUCCESS;
@@ -36,7 +36,7 @@ int strtab_write_str(struct str_table *strtab, const char *str, size_t *res)
return M_SUCCESS;
}
-int strtab_init(struct str_table *strtab)
+int strtab_init(struct elf_str_table *strtab)
{
strtab->size = 1;
strtab->ptr = malloc(1);
@@ -48,7 +48,7 @@ int strtab_init(struct str_table *strtab)
return M_SUCCESS;
}
-void strtab_free(struct str_table *strtab)
+void strtab_free(struct elf_str_table *strtab)
{
free(strtab->ptr);
}
diff --git a/masm/symtab.c b/masm/symtab.c
index 652bd42..990be46 100644
--- a/masm/symtab.c
+++ b/masm/symtab.c
@@ -1,4 +1,3 @@
-#include <elf.h>
#include <merror.h>
#include <netinet/in.h>
#include <stddef.h>
@@ -6,70 +5,85 @@
#include <stdlib.h>
#include <string.h>
-#include "asm.h"
+#include "lex.h"
+#include "tab.h"
#define SYMTBL_INIT_LEN 24
int symtab_init(struct symbol_table *symtab)
{
- symtab->size = SYMTBL_INIT_LEN;
- symtab->len = 0;
- symtab->symbols = malloc(sizeof(Elf32_Sym) * SYMTBL_INIT_LEN);
- symtab->sections = malloc(sizeof(ssize_t) * SYMTBL_INIT_LEN);
+ symtab->size = SYMTBL_INIT_LEN;
+ symtab->len = 0;
+ symtab->symbols = malloc(sizeof(struct symbol) * SYMTBL_INIT_LEN);
- if (symtab->symbols == NULL || symtab->sections == NULL) {
- PERROR("cannot alloc");
- return M_ERROR;
- }
+ if (symtab->symbols == NULL) {
+ PERROR("cannot alloc");
+ return M_ERROR;
+ }
- Elf32_Sym null = {0};
- if (symtab_push(symtab, null, -1))
- return M_ERROR;
-
- return M_SUCCESS;
+ return M_SUCCESS;
}
void symtab_free(struct symbol_table *symtab)
{
- free(symtab->symbols);
- free(symtab->sections);
+ for (uint32_t i = 0; i < symtab->len; i++)
+ string_free(&symtab->symbols[i].name);
+ free(symtab->symbols);
}
-int symtab_push(struct symbol_table *symtab, Elf32_Sym sym, ssize_t sec_idx)
+int symtab_push(struct symbol_table *symtab, struct symbol *sym)
{
- if (symtab->len >= symtab->size) {
- symtab->size *= 2;
- symtab->symbols = realloc(symtab->symbols,
- sizeof(Elf32_Sym) * symtab->size);
- symtab->sections = realloc(symtab->sections,
- sizeof(ssize_t) * symtab->size);
- if (symtab->symbols == NULL || symtab->sections == NULL) {
- PERROR("cannot realloc");
- return M_ERROR;
- }
- }
+ if (symtab->len >= symtab->size) {
+ symtab->size *= 2;
+ symtab->symbols = realloc(symtab->symbols,
+ sizeof(struct symbol) * symtab->size);
+ if (symtab->symbols == NULL) {
+ PERROR("cannot realloc");
+ return M_ERROR;
+ }
+ }
- symtab->symbols[symtab->len] = sym;
- symtab->sections[symtab->len++] = sec_idx;
- return M_SUCCESS;
+ sym->tabidx = symtab->len;
+ symtab->symbols[symtab->len++] = *sym;
+ return M_SUCCESS;
}
-int symtab_find(struct symbol_table *symtab, Elf32_Sym **ptr,
- size_t *idx, const char name[MAX_LEX_LENGTH])
+int symtab_find(struct symbol_table *symtab, struct symbol **res,
+ const char *name)
{
- for (uint32_t i = 0; i < symtab->len; i++) {
- Elf32_Sym *sym = &symtab->symbols[i];
- const char *str = &symtab->strtab->ptr[ntohl(sym->st_name)];
- if (strcmp(str, name) == 0) {
- if (ptr != NULL)
- *ptr = sym;
+ for (uint32_t i = 0; i < symtab->len; i++) {
+ struct symbol *sym = &symtab->symbols[i];
+ if (strcmp(sym->name.str, name) == 0) {
+ if (res != NULL)
+ *res = sym;
+ return M_SUCCESS;
+ }
+ }
+
+ return M_ERROR;
+}
+
+int symtab_find_or_stub(struct symbol_table *symtab, struct symbol **res,
+ const struct string *const name)
+{
+ if (symtab_find(symtab, res, name->str) == M_SUCCESS)
+ return M_SUCCESS;
+
+ struct symbol temp = {
+ .offset = 0,
+ .secidx = SYM_SEC_STUB,
+ .type = SYM_LOCAL,
+ };
+ if (string_clone(&temp.name, name))
+ return M_ERROR;
+
+ if (symtab_push(symtab, &temp)) {
+ string_free(&temp.name);
+ return M_ERROR;
+ }
- ptrdiff_t diff = sym - symtab->symbols;
- if (idx != NULL)
- *idx = diff;
+ if (res != NULL)
+ *res = &symtab->symbols[symtab->len - 1];
- return M_SUCCESS;
- }
- }
- return M_ERROR;
+ return M_SUCCESS;
}
diff --git a/masm/tab.h b/masm/tab.h
new file mode 100644
index 0000000..c9e66c5
--- /dev/null
+++ b/masm/tab.h
@@ -0,0 +1,98 @@
+/* Copyright (c) 2024 Freya Murphy */
+
+#ifndef __TAB_H__
+#define __TAB_H__
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "lex.h"
+
+///
+/// Symbol table
+///
+
+#define SYM_SEC_STUB (UINT32_MAX)
+
+enum symbol_type {
+ SYM_LOCAL,
+ SYM_GLOBAL,
+ SYM_EXTERN,
+};
+
+struct symbol {
+ // the offset of the symbol in a section
+ uint32_t offset;
+ // the index of section the symbol is in
+ uint32_t secidx;
+ // index into this table
+ uint32_t tabidx;
+ // the name of the symbol
+ struct string name;
+ // type
+ enum symbol_type type;
+};
+
+struct symbol_table {
+ // length in size in sym ammt
+ size_t len;
+ size_t size;
+
+ // symbols
+ struct symbol *symbols;
+};
+
+/* initalize a symbol table */
+int symtab_init(struct symbol_table *symtab);
+
+/* free the symbol table */
+void symtab_free(struct symbol_table *symtab);
+
+/* add a symbol to the symbol tbl */
+int symtab_push(struct symbol_table *symtab, struct symbol *sym);
+
+/* find a symbol by name in the symbol table */
+int symtab_find(struct symbol_table *symtab, struct symbol **sym,
+ const char *name);
+/* find an existing symbol with a name or stub a temp one */
+int symtab_find_or_stub(struct symbol_table *symtab, struct symbol **sym,
+ const struct string *const name);
+
+///
+/// Reference table
+///
+
+enum reference_type {
+ REF_NONE,
+ REF_MIPS_16,
+ REF_MIPS_26,
+ REF_MIPS_PC16,
+ REF_MIPS_LO16,
+ REF_MIPS_HI16,
+};
+
+struct reference {
+ enum reference_type type;
+ struct symbol *symbol;
+ uint32_t offset;
+};
+
+struct reference_table {
+ // size
+ size_t len;
+ size_t size;
+
+ // references
+ struct reference *references;
+};
+
+/* initalize a reference table */
+int reftab_init(struct reference_table *reftab);
+
+/* free the reference table */
+void reftab_free(struct reference_table *reftab);
+
+/* add a reference to the reference tbl */
+int reftab_push(struct reference_table *reftab, struct reference *ref);
+
+#endif /* __TAB_H__ */