summaryrefslogtreecommitdiff
path: root/masm
diff options
context:
space:
mode:
authorFreya Murphy <freya@freyacat.org>2024-09-09 12:41:49 -0400
committerFreya Murphy <freya@freyacat.org>2024-09-09 12:41:49 -0400
commit2ed275821676a0d5baea6c7fd843d71c72c2342c (patch)
tree480297f28e5c42d02a47b3b94027a7abe507d010 /masm
downloadmips-2ed275821676a0d5baea6c7fd843d71c72c2342c.tar.gz
mips-2ed275821676a0d5baea6c7fd843d71c72c2342c.tar.bz2
mips-2ed275821676a0d5baea6c7fd843d71c72c2342c.zip
initial mips32 (r2000ish mips32r6) assembler
Diffstat (limited to 'masm')
-rw-r--r--masm/:363
-rw-r--r--masm/Makefile7
-rw-r--r--masm/asm.h33
-rw-r--r--masm/asm_mips32.c365
-rw-r--r--masm/lex.c343
-rw-r--r--masm/lex.h55
-rw-r--r--masm/main.c9
-rw-r--r--masm/parse.c198
-rw-r--r--masm/parse.h156
-rw-r--r--masm/parse_mips32.c847
-rw-r--r--masm/parse_mips32.h14
-rw-r--r--masm/reftbl.c47
-rw-r--r--masm/sectbl.c103
-rw-r--r--masm/strtbl.c49
-rw-r--r--masm/symtbl.c57
-rw-r--r--masm/test.asm22
16 files changed, 2668 insertions, 0 deletions
diff --git a/masm/: b/masm/:
new file mode 100644
index 0000000..96d212f
--- /dev/null
+++ b/masm/:
@@ -0,0 +1,363 @@
+#include <merror.h>
+#include <mips.h>
+#include <mips32.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <elf.h>
+#include <string.h>
+#include <stddef.h>
+
+#include "asm.h"
+#include "mlimits.h"
+#include "parse.h"
+#include "parse_mips32.h"
+
+extern char *current_file;
+
+#define SHDR_STRTBL 0
+#define SHDR_SYMTBL 1
+#define SHDR_SECTIONS 2
+
+static int parse_file(struct parser *parser)
+{
+ while (1) {
+ struct expr expr;
+ if (parser_next(parser, &expr)) {
+ break;
+ }
+
+ if (expr.type == EXPR_INS)
+ if (sectbl_push(&parser->sec_tbl,
+ parser->sec_tbl.current, expr.ins))
+ return M_ERROR;
+ }
+
+ for (uint32_t i = 0; i < parser->ref_tbl.count; i++) {
+ struct reference *ref = &parser->ref_tbl.references[i];
+ struct symbol *sym;
+ struct mips32_instruction *ins;
+
+ if (symtbl_find(&parser->sym_tbl, &sym, ref->name)) {
+ ERROR("undefined symbol '%s'", ref->name);
+ return M_ERROR;
+ }
+
+ ins = &ref->section->ins[ref->index].mips32;
+
+ switch (ref->type) {
+ case REF_OFFESET:
+ ins->B_data.offset += sym->position -
+ (ref->section->start + ref->index);
+ break;
+ case REF_TARGET:
+ ins->J_data.target += sym->position;
+ break;
+ }
+ };
+
+ return M_SUCCESS;
+}
+
+static int assemble_phdr(struct assembler *asm, Elf32_Phdr **res,
+ uint32_t *res2)
+{
+ struct parser *parser = asm->parser;
+ Elf32_Phdr *phdr = malloc(sizeof(Elf32_Phdr) *
+ parser->sec_tbl.count);
+ size_t ins_sz = sizeof(struct mips32_instruction);
+
+ if (phdr == NULL) {
+ ERROR("cannot alloc");
+ return M_ERROR;;
+ }
+
+ for (uint32_t i = 0; i < parser->sec_tbl.count; i++) {
+ Elf32_Phdr *hdr = &phdr[i];
+ struct section *sec = &parser->sec_tbl.sections[i];
+
+ hdr->p_type = PT_LOAD;
+ hdr->p_flags = PF_X | PF_W | PF_R; // FIXME: this is bad
+ hdr->p_offset = sec->start * ins_sz;
+ hdr->p_vaddr = sec->start * ins_sz;
+ hdr->p_paddr = 0x00;
+ hdr->p_filesz = sec->count * ins_sz;
+ hdr->p_memsz = sec->count * ins_sz;
+ hdr->p_align = sec->alignment;
+ }
+
+ *res = phdr;
+ *res2 = parser->sec_tbl.count;
+ return M_SUCCESS;
+}
+
+static int assemble_symtbl(struct assembler *asm, Elf32_Sym **res,
+ uint32_t *res2)
+{
+ Elf32_Sym *stbl = malloc(sizeof(Elf32_Sym) * asm->parser->sym_tbl
+ .count);
+
+ if (stbl == NULL)
+ return M_ERROR;
+
+ for (uint32_t i = 0; i < asm->parser->sym_tbl.count; i++) {
+ struct symbol *sym = &asm->parser->sym_tbl.symbols[i];
+ size_t str_off;
+
+ if (strtbl_write_str(&asm->str_tbl, sym->name, &str_off)) {
+ free(stbl);
+ return M_ERROR;
+ }
+
+ int viz = STB_LOCAL;
+ switch (sym->flag) {
+ case SYM_LOCAL:
+ viz = STB_LOCAL;
+ break;
+ case SYM_GLOBAL:
+ case SYM_EXTERNAL:
+ viz = STB_GLOBAL;
+ break;
+ }
+
+ stbl[i] = (Elf32_Sym) {
+ .st_name = str_off,
+ .st_value = sym->position,
+ .st_size = 0,
+ .st_info = (unsigned char)
+ ELF32_ST_INFO(SYMINFO_BT_SELF,
+ SYMINFO_FLG_DIRECT),
+ .st_other = (unsigned char)
+ ELF32_ST_VISIBILITY(viz),
+ .st_shndx = 0, // FIXME: specify section
+ };
+ };
+
+ *res = stbl;
+ *res2 = asm->parser->sym_tbl.count;
+
+ return M_SUCCESS;
+}
+
+static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res,
+ uint32_t *res2)
+{
+ uint32_t entries = 2; // str table and sym tabel
+ entries += asm->parser->sec_tbl.count; // sections
+
+ Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * entries);
+
+ size_t str_off;
+ if (strtbl_write_str(&asm->str_tbl, ".shstrtab", &str_off)) {
+ free(shdr);
+ return M_ERROR;
+ }
+
+ // string table
+ shdr[SHDR_STRTBL] = (Elf32_Shdr) {
+ .sh_name = str_off,
+ .sh_type = SHT_STRTAB,
+ .sh_flags = SHF_STRINGS,
+ .sh_addr = 0,
+ .sh_offset = 0,
+ .sh_size = 0,
+ .sh_link = 0,
+ .sh_info = 0,
+ .sh_addralign = 1,
+ .sh_entsize = 0,
+ };
+
+ if (strtbl_write_str(&asm->str_tbl, ".shsymtab", &str_off)) {
+ free(shdr);
+ return M_ERROR;
+ }
+
+ // symbol table
+ shdr[SHDR_SYMTBL] = (Elf32_Shdr) {
+ .sh_name = str_off,
+ .sh_type = SHT_SYMTAB,
+ .sh_flags = 0,
+ .sh_addr = 0,
+ .sh_offset = 0,
+ .sh_size = 0,
+ .sh_link = 0,
+ .sh_info = 0,
+ .sh_addralign = 1,
+ .sh_entsize = sizeof(Elf32_Sym),
+ };
+
+ // for each section
+ for (uint32_t i = 0; i < asm->parser->sec_tbl.count; i++) {
+ struct section *sec = &asm->parser->sec_tbl.sections[i];
+ char name[MAX_LEX_LENGTH+1] = ".";
+ strcat(name, sec->name);
+ if (strtbl_write_str(&asm->str_tbl, name, &str_off)) {
+ free(shdr);
+ return M_ERROR;
+ }
+ shdr[i+SHDR_SECTIONS] = (Elf32_Shdr) {
+ .sh_name = str_off,
+ .sh_type = SHT_PROGBITS,
+ .sh_flags = SHF_WRITE | SHF_ALLOC | SHF_EXECINSTR,
+ .sh_addr = 0,
+ .sh_offset = 0,
+ .sh_size = 0,
+ .sh_link = 0,
+ .sh_info = 0,
+ .sh_addralign = sec->alignment,
+ .sh_entsize = sizeof(struct mips32_instruction),
+ };
+ }
+
+ *res = shdr;
+ *res2 = entries;
+
+ return M_SUCCESS;
+}
+
+static int assemble_file(struct assembler *asm)
+{
+ Elf32_Phdr *phdr;
+ Elf32_Shdr *shdr;
+ Elf32_Sym *symtbl;
+ uint32_t phdr_len;
+ uint32_t shdr_len;
+ uint32_t symtbl_len;
+
+ if (assemble_symtbl(asm, &symtbl, &symtbl_len))
+ return M_ERROR;
+
+ if (assemble_phdr(asm, &phdr, &phdr_len)) {
+ free(symtbl);
+ return M_ERROR;
+ }
+
+ if (assemble_shdr(asm, &shdr, &shdr_len)) {
+ free(symtbl);
+ free(phdr);
+ return M_ERROR;
+ };
+
+ Elf32_Ehdr ehdr = {
+ .e_ident = {
+ [EI_MAG0] = ELFMAG0,
+ [EI_MAG1] = ELFMAG1,
+ [EI_MAG2] = ELFMAG2,
+ [EI_MAG3] = ELFMAG3,
+ [EI_CLASS] = ELFCLASS32,
+ [EI_DATA] = ELFDATA2LSB,
+ [EI_VERSION] = EV_CURRENT,
+ [EI_OSABI] = ELFOSABI_STANDALONE,
+ [EI_ABIVERSION] = 0x00,
+ [EI_PAD] = 0x00,
+ },
+ .e_type = ET_REL,
+ .e_machine = EM_MIPS,
+ .e_version = EV_CURRENT,
+ .e_entry = 0x00,
+ .e_phoff = 0x00,
+ .e_shoff = 0x00,
+ .e_flags = EF_MIPS_ARCH_32R6,
+ .e_ehsize = sizeof(Elf32_Ehdr),
+ .e_phentsize = 0x20,
+ .e_phnum = phdr_len,
+ .e_shentsize = 0x28,
+ .e_shnum = shdr_len,
+ .e_shstrndx = 0x00, // str table is always inx 0
+ };
+
+ uint32_t ptr = 0;
+
+ // we must now correct offets and sizes inside the ehdr, phdr,
+ // and shdr
+
+ ptr += sizeof(Elf32_Ehdr);
+
+ // phdr
+ ehdr.e_phoff = ptr;
+ ptr += phdr_len * sizeof(Elf32_Phdr);
+
+ // sections
+ for (uint32_t i = 0; i < asm->parser->sec_tbl.count; i++) {
+ phdr[i].p_offset = ptr;
+ phdr[i].p_vaddr = ptr;
+ shdr[i+SHDR_SECTIONS].sh_offset = ptr;
+ shdr[i+SHDR_SECTIONS].sh_size = phdr[i].p_filesz;
+ ptr += phdr[i].p_filesz;
+ }
+
+ // strtbl
+ shdr[SHDR_STRTBL].sh_offset = ptr;
+ shdr[SHDR_STRTBL].sh_size = asm->str_tbl.size;
+ ptr += asm->str_tbl.size;
+
+ // symtbl
+ ehdr.e_shoff = ptr;
+ shdr[SHDR_SYMTBL].sh_offset = ptr;
+ shdr[SHDR_SYMTBL].sh_size = symtbl_len * sizeof(Elf32_Sym);
+ ptr += symtbl_len * sizeof(Elf32_Sym);
+
+ FILE *out = fopen("/home/freya/out.o", "w");
+
+ // ehdr
+ fwrite(&ehdr, sizeof(Elf32_Ehdr), 1, out);
+
+ // phdr
+ fwrite(phdr, sizeof(Elf32_Phdr), phdr_len, out);
+
+ // sections
+ for (uint32_t i = 0; i < asm->parser->sec_tbl.count; i++) {
+ struct section *sec = &asm->parser->sec_tbl.sections[i];
+ for (uint32_t j = 0; j < sec->count; j++) {
+ struct mips32_instruction *ins = &sec->ins[j].mips32;
+ fwrite(ins, sizeof(struct mips32_instruction),
+ 1, out);
+ }
+ }
+
+ // str tbl
+ fwrite(asm->str_tbl.ptr, asm->str_tbl.size, 1, out);
+
+ // sym tbl
+ fwrite(symtbl, sizeof(Elf32_Sym), symtbl_len, out);
+
+ // shdr
+ fwrite(shdr, sizeof(Elf32_Shdr), shdr_len, out);
+
+ fclose(out);
+
+ free(shdr);
+ free(phdr);
+ free(symtbl);
+
+ return M_SUCCESS;
+}
+
+int assemble_file_mips32(char *path)
+{
+ struct lexer lexer;
+ struct parser parser;
+ current_file = path;
+ int res = M_SUCCESS;
+
+ if (lexer_init(current_file, &lexer))
+ return M_ERROR;
+
+ if (mips32_parser_init(&lexer, &parser))
+ return M_ERROR;
+
+ if (res == M_SUCCESS)
+ res = parse_file(&parser);
+
+ struct assembler assembler;
+ assembler.parser = &parser;
+ strtbl_init(&assembler.str_tbl);
+
+ if (res == M_SUCCESS)
+ res = assemble_file(&assembler);
+
+ strtbl_free(&assembler.str_tbl);
+ lexer_free(&lexer);
+ parser_free(&parser);
+
+ return res;
+}
diff --git a/masm/Makefile b/masm/Makefile
new file mode 100644
index 0000000..cd1dae3
--- /dev/null
+++ b/masm/Makefile
@@ -0,0 +1,7 @@
+include ../config.mk
+
+SRC=.
+BIN=../bin/masm
+OUT=masm
+
+include ../makefile.mk
diff --git a/masm/asm.h b/masm/asm.h
new file mode 100644
index 0000000..1bc7cf5
--- /dev/null
+++ b/masm/asm.h
@@ -0,0 +1,33 @@
+/* Copyright (c) 2024 Freya Murphy */
+
+#ifndef __ASM_H__
+#define __ASM_H__
+
+#include <stddef.h>
+
+struct str_table {
+ char *ptr;
+ size_t size;
+};
+
+/* initalize a string table */
+void strtbl_init(struct str_table *str_tbl);
+
+/* free a string table */
+void strtbl_free(struct str_table *str_tbl);
+
+/* get a string form the string table */
+int strtbl_get_str(struct str_table *str_tbl, const char *str, size_t *res);
+
+/* get or append a string into the string table */
+int strtbl_write_str(struct str_table *str_tbl, const char *str, size_t *res);
+
+struct assembler {
+ struct parser *parser;
+ struct str_table str_tbl;
+};
+
+/* assemble a mips32 file*/
+int assemble_file_mips32(char *path);
+
+#endif /* __ASM_H__ */
diff --git a/masm/asm_mips32.c b/masm/asm_mips32.c
new file mode 100644
index 0000000..dcb81e5
--- /dev/null
+++ b/masm/asm_mips32.c
@@ -0,0 +1,365 @@
+#include <merror.h>
+#include <mips.h>
+#include <mips32.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <elf.h>
+#include <string.h>
+#include <stddef.h>
+
+#include "asm.h"
+#include "mlimits.h"
+#include "parse.h"
+#include "parse_mips32.h"
+
+extern char *current_file;
+
+#define SHDR_SYMTBL 0
+#define SHDR_STRTBL 1
+#define SHDR_SECTIONS 2
+
+static int parse_file(struct parser *parser)
+{
+ while (1) {
+ struct expr expr;
+ if (parser_next(parser, &expr)) {
+ break;
+ }
+
+ if (expr.type == EXPR_INS)
+ if (sectbl_push(&parser->sec_tbl,
+ parser->sec_tbl.current, expr.ins))
+ return M_ERROR;
+ }
+
+ for (uint32_t i = 0; i < parser->ref_tbl.count; i++) {
+ struct reference *ref = &parser->ref_tbl.references[i];
+ struct symbol *sym;
+ struct mips32_instruction *ins;
+
+ if (symtbl_find(&parser->sym_tbl, &sym, ref->name)) {
+ ERROR("undefined symbol '%s'", ref->name);
+ return M_ERROR;
+ }
+
+ ins = &ref->section->ins[ref->index].mips32;
+
+ switch (ref->type) {
+ case REF_OFFESET:
+ ins->B_data.offset += sym->position -
+ (ref->section->start + ref->index);
+ break;
+ case REF_TARGET:
+ ins->J_data.target += sym->position;
+ break;
+ }
+ };
+
+ return M_SUCCESS;
+}
+
+static int assemble_phdr(struct assembler *asm, Elf32_Phdr **res,
+ uint32_t *res2)
+{
+ struct parser *parser = asm->parser;
+ Elf32_Phdr *phdr = malloc(sizeof(Elf32_Phdr) *
+ parser->sec_tbl.count);
+ size_t ins_sz = sizeof(struct mips32_instruction);
+
+ if (phdr == NULL) {
+ ERROR("cannot alloc");
+ return M_ERROR;;
+ }
+
+ for (uint32_t i = 0; i < parser->sec_tbl.count; i++) {
+ Elf32_Phdr *hdr = &phdr[i];
+ struct section *sec = &parser->sec_tbl.sections[i];
+
+ hdr->p_type = PT_LOAD;
+ hdr->p_flags = PF_X | PF_W | PF_R; // FIXME: this is bad
+ hdr->p_offset = sec->start * ins_sz;
+ hdr->p_vaddr = sec->start * ins_sz;
+ hdr->p_paddr = 0x00;
+ hdr->p_filesz = sec->count * ins_sz;
+ hdr->p_memsz = sec->count * ins_sz;
+ hdr->p_align = sec->alignment;
+ }
+
+ *res = phdr;
+ *res2 = parser->sec_tbl.count;
+ return M_SUCCESS;
+}
+
+static int assemble_symtbl(struct assembler *asm, Elf32_Sym **res,
+ uint32_t *res2)
+{
+ Elf32_Sym *stbl = malloc(sizeof(Elf32_Sym) * asm->parser->sym_tbl
+ .count);
+
+ if (stbl == NULL)
+ return M_ERROR;
+
+ for (uint32_t i = 0; i < asm->parser->sym_tbl.count; i++) {
+ struct symbol *sym = &asm->parser->sym_tbl.symbols[i];
+ size_t str_off;
+
+ if (strtbl_write_str(&asm->str_tbl, sym->name, &str_off)) {
+ free(stbl);
+ return M_ERROR;
+ }
+
+ int viz = STB_LOCAL;
+ switch (sym->flag) {
+ case SYM_LOCAL:
+ viz = STB_LOCAL;
+ break;
+ case SYM_GLOBAL:
+ case SYM_EXTERNAL:
+ viz = STB_GLOBAL;
+ break;
+ }
+
+ stbl[i] = (Elf32_Sym) {
+ .st_name = str_off,
+ .st_value = sym->position,
+ .st_size = 0,
+ .st_info = (unsigned char)
+ ELF32_ST_INFO(SYMINFO_BT_SELF,
+ SYMINFO_FLG_DIRECT),
+ .st_other = (unsigned char)
+ ELF32_ST_VISIBILITY(viz),
+ .st_shndx = 0, // FIXME: specify section
+ };
+ };
+
+ *res = stbl;
+ *res2 = asm->parser->sym_tbl.count;
+
+ return M_SUCCESS;
+}
+
+static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res,
+ uint32_t *res2)
+{
+ uint32_t entries = 2; // str table and sym tabel
+ entries += asm->parser->sec_tbl.count; // sections
+
+ Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * entries);
+
+ size_t str_off;
+ if (strtbl_write_str(&asm->str_tbl, ".shsymtab", &str_off)) {
+ free(shdr);
+ return M_ERROR;
+ }
+
+ // symbol table
+ shdr[SHDR_SYMTBL] = (Elf32_Shdr) {
+ .sh_name = str_off,
+ .sh_type = SHT_SYMTAB,
+ .sh_flags = 0,
+ .sh_addr = 0,
+ .sh_offset = 0,
+ .sh_size = 0,
+ .sh_link = 1,
+ .sh_info = 0,
+ .sh_addralign = 1,
+ .sh_entsize = sizeof(Elf32_Sym),
+ };
+
+ if (strtbl_write_str(&asm->str_tbl, ".shstrtab", &str_off)) {
+ free(shdr);
+ return M_ERROR;
+ }
+
+ // string table
+ shdr[SHDR_STRTBL] = (Elf32_Shdr) {
+ .sh_name = str_off,
+ .sh_type = SHT_STRTAB,
+ .sh_flags = SHF_STRINGS,
+ .sh_addr = 0,
+ .sh_offset = 0,
+ .sh_size = 0,
+ .sh_link = 0,
+ .sh_info = 0,
+ .sh_addralign = 1,
+ .sh_entsize = 0,
+ };
+
+ // for each section
+ for (uint32_t i = 0; i < asm->parser->sec_tbl.count; i++) {
+ struct section *sec = &asm->parser->sec_tbl.sections[i];
+ char name[MAX_LEX_LENGTH+1] = ".";
+ strcat(name, sec->name);
+ if (strtbl_write_str(&asm->str_tbl, name, &str_off)) {
+ free(shdr);
+ return M_ERROR;
+ }
+ shdr[i+SHDR_SECTIONS] = (Elf32_Shdr) {
+ .sh_name = str_off,
+ .sh_type = SHT_PROGBITS,
+ .sh_flags = SHF_WRITE | SHF_ALLOC | SHF_EXECINSTR,
+ .sh_addr = 0,
+ .sh_offset = 0,
+ .sh_size = 0,
+ .sh_link = 0,
+ .sh_info = 0,
+ .sh_addralign = sec->alignment,
+ .sh_entsize = sizeof(struct mips32_instruction),
+ };
+ }
+
+ *res = shdr;
+ *res2 = entries;
+
+ return M_SUCCESS;
+}
+
+static int assemble_file(struct assembler *asm)
+{
+ Elf32_Phdr *phdr;
+ Elf32_Shdr *shdr;
+ Elf32_Sym *symtbl;
+ uint32_t phdr_len;
+ uint32_t shdr_len;
+ uint32_t symtbl_len;
+
+ if (assemble_symtbl(asm, &symtbl, &symtbl_len))
+ return M_ERROR;
+
+ if (assemble_phdr(asm, &phdr, &phdr_len)) {
+ free(symtbl);
+ return M_ERROR;
+ }
+
+ if (assemble_shdr(asm, &shdr, &shdr_len)) {
+ free(symtbl);
+ free(phdr);
+ return M_ERROR;
+ };
+
+ Elf32_Ehdr ehdr = {
+ .e_ident = {
+ [EI_MAG0] = ELFMAG0,
+ [EI_MAG1] = ELFMAG1,
+ [EI_MAG2] = ELFMAG2,
+ [EI_MAG3] = ELFMAG3,
+ [EI_CLASS] = ELFCLASS32,
+ [EI_DATA] = ELFDATA2LSB,
+ [EI_VERSION] = EV_CURRENT,
+ [EI_OSABI] = ELFOSABI_NONE,
+ [EI_ABIVERSION] = 0x00,
+ [EI_PAD] = 0x00,
+ },
+ .e_type = ET_REL,
+ .e_machine = EM_MIPS,
+ .e_version = EV_CURRENT,
+ .e_entry = 0x00,
+ .e_phoff = 0x00,
+ .e_shoff = 0x00,
+ .e_flags = EF_MIPS_ARCH_32R6,
+ .e_ehsize = sizeof(Elf32_Ehdr),
+ .e_phentsize = sizeof(Elf32_Phdr),
+ .e_phnum = phdr_len,
+ .e_shentsize = sizeof(Elf32_Shdr),
+ .e_shnum = shdr_len,
+ .e_shstrndx = SHDR_STRTBL,
+ };
+
+ uint32_t ptr = 0;
+
+ // we must now correct offets and sizes inside the ehdr, phdr,
+ // and shdr
+
+ ptr += sizeof(Elf32_Ehdr);
+
+ // phdr
+ ehdr.e_phoff = ptr;
+ ptr += phdr_len * sizeof(Elf32_Phdr);
+
+ // sections
+ for (uint32_t i = 0; i < asm->parser->sec_tbl.count; i++) {
+ phdr[i].p_offset = ptr;
+ phdr[i].p_vaddr = ptr;
+ shdr[i+SHDR_SECTIONS].sh_offset = ptr;
+ shdr[i+SHDR_SECTIONS].sh_size = phdr[i].p_filesz;
+ ptr += phdr[i].p_filesz;
+ }
+
+ // symtbl
+ shdr[SHDR_SYMTBL].sh_offset = ptr;
+ shdr[SHDR_SYMTBL].sh_size = symtbl_len * sizeof(Elf32_Sym);
+ ptr += symtbl_len * sizeof(Elf32_Sym);
+
+ // strtbl
+ shdr[SHDR_STRTBL].sh_offset = ptr;
+ shdr[SHDR_STRTBL].sh_size = asm->str_tbl.size;
+ ptr += asm->str_tbl.size;
+
+ // shdr
+ ehdr.e_shoff = ptr;
+
+ FILE *out = fopen("out.o", "w");
+
+ // ehdr
+ fwrite(&ehdr, sizeof(Elf32_Ehdr), 1, out);
+
+ // phdr
+ fwrite(phdr, sizeof(Elf32_Phdr), phdr_len, out);
+
+ // sections
+ for (uint32_t i = 0; i < asm->parser->sec_tbl.count; i++) {
+ struct section *sec = &asm->parser->sec_tbl.sections[i];
+ for (uint32_t j = 0; j < sec->count; j++) {
+ struct mips32_instruction *ins = &sec->ins[j].mips32;
+ fwrite(ins, sizeof(struct mips32_instruction),
+ 1, out);
+ }
+ }
+
+ // sym tbl
+ fwrite(symtbl, sizeof(Elf32_Sym), symtbl_len, out);
+
+ // str tbl
+ fwrite(asm->str_tbl.ptr, asm->str_tbl.size, 1, out);
+
+ // shdr
+ fwrite(shdr, sizeof(Elf32_Shdr), shdr_len, out);
+
+ fclose(out);
+
+ free(shdr);
+ free(phdr);
+ free(symtbl);
+
+ return M_SUCCESS;
+}
+
+int assemble_file_mips32(char *path)
+{
+ struct lexer lexer;
+ struct parser parser;
+ current_file = path;
+ int res = M_SUCCESS;
+
+ if (lexer_init(current_file, &lexer))
+ return M_ERROR;
+
+ if (mips32_parser_init(&lexer, &parser))
+ return M_ERROR;
+
+ if (res == M_SUCCESS)
+ res = parse_file(&parser);
+
+ struct assembler assembler;
+ assembler.parser = &parser;
+ strtbl_init(&assembler.str_tbl);
+
+ if (res == M_SUCCESS)
+ res = assemble_file(&assembler);
+
+ strtbl_free(&assembler.str_tbl);
+ lexer_free(&lexer);
+ parser_free(&parser);
+
+ return res;
+}
diff --git a/masm/lex.c b/masm/lex.c
new file mode 100644
index 0000000..06c7114
--- /dev/null
+++ b/masm/lex.c
@@ -0,0 +1,343 @@
+#include "lex.h"
+
+#include <mlimits.h>
+#include <merror.h>
+
+static struct {
+ int x;
+ int y;
+} pos;
+
+/* get next char in lexer */
+static int lex_next(struct lexer *lexer)
+{
+ if (lexer->peek != EOF) {
+ int c = lexer->peek;
+ lexer->peek = EOF;
+ return c;
+ }
+
+ int c = getc(lexer->file);
+ if (c == '\n') {
+ lexer->x = 0;
+ lexer->y++;
+ } else {
+ lexer->x++;
+ }
+ return c;
+}
+
+/* peek next char in lexer */
+static int lex_peek(struct lexer *lexer)
+{
+ if (lexer->peek == EOF)
+ lexer->peek = lex_next(lexer);
+ return lexer->peek;
+}
+
+/* skip all characters until EOF or newline */
+static void skip_comment(struct lexer *lexer)
+{
+ int c;
+ while (1) {
+ c = lex_next(lexer);
+ if (c == EOF || c == '\n')
+ break;
+ }
+}
+
+/* lexes text until whitespace
+ * returns error on zero length or too long */
+static int lex_ident(struct lexer *lexer, char text[MAX_LEX_LENGTH])
+{
+ int len = 0;
+ char *ptr = text;
+ int c;
+
+ while (1) {
+ c = lex_peek(lexer);
+ if (!(
+ (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ (c == '_')
+ )) {
+ break;
+ }
+
+ // pop char out of lexer
+ lex_next(lexer);
+
+ if (len + 1 == MAX_LEX_LENGTH) {
+ ERROR_POS(pos, "ident has max length of %d",
+ MAX_LEX_LENGTH);
+ return M_ERROR;
+ }
+
+ *ptr++ = c;
+ len++;
+ }
+
+ if (len == 0) {
+ ERROR_POS(pos, "attempted to lex empty ident %d",
+ MAX_LEX_LENGTH);
+ return M_ERROR;
+ }
+
+ *ptr = '\0';
+ return M_SUCCESS;
+}
+
+/* lexes a string until closing quote
+ * returns error if string is too long or hit newline */
+static int lex_string(struct lexer *lexer,char text[MAX_LEX_LENGTH])
+{
+ int len = 0;
+ char *ptr = text;
+ int c;
+
+ while (1) {
+ c = lex_next(lexer);
+ if (c == '"')
+ break;
+
+ // match escape character
+ if (c == '\\') {
+ switch (lex_peek(lexer)) {
+ case 'n':
+ c = '\n';
+ lex_next(lexer);
+ break;
+ case 't':
+ c = '\t';
+ lex_next(lexer);
+ break;
+ case '\\':
+ c = '\\';
+ lex_next(lexer);
+ break;
+ case '"':
+ c = '"';
+ lex_next(lexer);
+ break;
+ }
+ }
+
+ // strings cannot span multiple lines
+ if (c == '\n') {
+ ERROR_POS(pos, "reached newline before end of string");
+ return M_ERROR;
+ }
+
+ if (len + 1 == MAX_LEX_LENGTH) {
+ ERROR_POS(pos, "string has max length of %d",
+ MAX_LEX_LENGTH);
+ return M_ERROR;
+ }
+
+ *ptr++ = c;
+ len++;
+ }
+
+ *ptr = '\0';
+ return M_SUCCESS;
+}
+
+/* lexes a integer number in base 2,8,10, or 16,
+ * uses base 10 by default but chan be changed by 0b, 0o, and 0x */
+static int lex_number(struct lexer *lexer, int64_t *n)
+{
+ int64_t number = 0;
+ int base = 10;
+
+ // skip all leading zeros, they dont do anything.
+ // this also allows us to directly check for 0b, 0o, and 0x
+ // right away!
+ while (1) {
+ if (lex_peek(lexer) == '0')
+ lex_next(lexer);
+ else
+ break;
+ }
+
+ // match change of base
+ switch (lex_peek(lexer)) {
+ case 'b':
+ base = 2;
+ lex_next(lexer);
+ break;
+ case 'o':
+ base = 8;
+ lex_next(lexer);
+ break;
+ case 'x':
+ base = 16;
+ lex_next(lexer);
+ break;
+ }
+
+ while (1) {
+ char c = lex_peek(lexer);
+ int n = 0;
+ if (c >= '0' && c <= '9') {
+ n = c - '0';
+ } else if (c >= 'a' && c <= 'z') { // match A-Z so we can
+ n = c - 'a' + 10; // catch the errors
+ } else if (c >= 'A' && c <= 'Z') { // here instead of later
+ n = c - 'A' + 10;
+ } else {
+ break; // no longer a number
+ }
+ // if number provided is bigger than my base,
+ // error !
+ if (n >= base) {
+ ERROR_POS(pos, "character '%c' is bigger than number base"
+ "'%d'", c, base);
+ return M_ERROR;
+ }
+ lex_next(lexer);
+ number *= base;
+ number += n;
+ }
+
+ *n = number;
+ return M_SUCCESS;
+}
+
+/* lex the next token on the file */
+int lexer_next(struct lexer *lexer, struct token *token)
+{
+again: // use label to avoid whitespace recursion
+ token->x = lexer->x;
+ token->y = lexer->y;
+ pos.x = lexer->x;
+ pos.y = lexer->y;
+ token->type = TOK_EOF;
+
+ int c = lex_peek(lexer);
+ int res = M_SUCCESS;
+
+ switch (c) {
+
+ case EOF:
+ case '\0':
+ token->type = TOK_EOF;
+ break;
+ case ';':
+ case '#':
+ skip_comment(lexer);
+ goto again;
+ case ' ':
+ case '\t':
+ // skip white space
+ lex_next(lexer);
+ goto again;
+ case '\n':
+ lex_next(lexer);
+ token->type = TOK_NL;
+ break;
+ case ',':
+ lex_next(lexer);
+ token->type = TOK_COMMA;
+ break;
+ case '=':
+ lex_next(lexer);
+ token->type = TOK_EQUAL;
+ break;
+ case '(':
+ lex_next(lexer);
+ token->type = TOK_LPAREN;
+ break;
+ case ')':
+ token->type = TOK_RPAREN;
+ lex_next(lexer);
+ break;
+ case '$':
+ token->type = TOK_REG;
+ lex_next(lexer);
+ res = lex_ident(lexer, token->text);
+ break;
+ case '.':
+ token->type = TOK_DIRECTIVE;
+ lex_next(lexer);
+ res = lex_ident(lexer, token->text);
+ break;
+ case '"':
+ token->type = TOK_STRING;
+ lex_next(lexer);
+ res = lex_string(lexer, token->text);
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ token->type = TOK_NUMBER;
+ res = lex_number(lexer, &token->number);
+ break;
+ default:
+ token->type = TOK_IDENT;
+ res = lex_ident(lexer, token->text);
+ if (lex_peek(lexer) == ':') {
+ lex_next(lexer);
+ token->type = TOK_LABEL;
+ }
+ break;
+ }
+ return res;
+}
+
+int lexer_init(const char *path, struct lexer *lexer)
+{
+ FILE *file = fopen(path, "r");
+ if (file == NULL) {
+ ERROR_POS(pos, "cannot file '%s'", path);
+ return M_ERROR;
+ }
+ lexer->file = file;
+ lexer->peek = EOF;
+ lexer->x = 0;
+ lexer->y = 0;
+ return M_SUCCESS;
+}
+
+int lexer_free(struct lexer *lexer)
+{
+ return fclose(lexer->file);
+}
+
+char *token_str(enum token_type type)
+{
+ switch (type) {
+ case TOK_IDENT:
+ return "ident";
+ case TOK_REG:
+ return "register";
+ case TOK_LABEL:
+ return "label";
+ case TOK_STRING:
+ return "string";
+ case TOK_COMMA:
+ return "comma";
+ case TOK_EQUAL:
+ return "equal";
+ case TOK_LPAREN:
+ return "left parentheses";
+ case TOK_RPAREN:
+ return "right parentheses";
+ case TOK_NUMBER:
+ return "number";
+ case TOK_EOF:
+ return "end of file";
+ case TOK_NL:
+ return "new line";
+ case TOK_DIRECTIVE:
+ return "directive";
+ }
+ return "unknown";
+}
diff --git a/masm/lex.h b/masm/lex.h
new file mode 100644
index 0000000..f1c482a
--- /dev/null
+++ b/masm/lex.h
@@ -0,0 +1,55 @@
+/* Copyright (c) 2024 Freya Murphy */
+
+#ifndef __LEX_H__
+#define __LEX_H__
+
+#include <mlimits.h>
+#include <stdio.h>
+#include <stdint.h>
+
+struct lexer {
+ FILE *file;
+ int peek;
+ int x;
+ int y;
+};
+
+enum token_type {
+ TOK_IDENT,
+ TOK_REG,
+ TOK_LABEL,
+ TOK_STRING,
+ TOK_COMMA,
+ TOK_EQUAL,
+ TOK_LPAREN,
+ TOK_RPAREN,
+ TOK_NUMBER,
+ TOK_EOF,
+ TOK_NL,
+ TOK_DIRECTIVE,
+};
+
+struct token {
+ enum token_type type;
+ union {
+ int64_t number;
+ char text[MAX_LEX_LENGTH];
+ };
+ int x;
+ int y;
+};
+
+/* initalize a lexer */
+int lexer_init(const char *file, struct lexer *lexer);
+
+/* free the lxer */
+int lexer_free(struct lexer *lexer);
+
+/* lexes the next token, returns M_ERROR on error,
+ * and TOK_EOF on EOF */
+int lexer_next(struct lexer *lexer, struct token *token);
+
+/* token type to string */
+char *token_str(enum token_type);
+
+#endif /* __LEX_H__ */
diff --git a/masm/main.c b/masm/main.c
new file mode 100644
index 0000000..957b34c
--- /dev/null
+++ b/masm/main.c
@@ -0,0 +1,9 @@
+#include "asm.h"
+
+int main(int argc, char **argv) {
+
+ if (argc != 2)
+ return 0;
+
+ return assemble_file_mips32(argv[1]);
+}
diff --git a/masm/parse.c b/masm/parse.c
new file mode 100644
index 0000000..9876311
--- /dev/null
+++ b/masm/parse.c
@@ -0,0 +1,198 @@
+#include <mlimits.h>
+#include <merror.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "parse.h"
+#include "lex.h"
+
+int next_token(struct parser *parser, struct token *tok)
+{
+ if (parser->peek.type != TOK_EOF) {
+ if (tok != NULL)
+ *tok = parser->peek;
+ parser->peek.type = TOK_EOF;
+ return M_SUCCESS;
+ }
+ struct token token;
+ if (lexer_next(parser->lexer, &token))
+ return M_ERROR;
+ if (tok != NULL)
+ *tok = token;
+ return M_SUCCESS;
+}
+
+
+int peek_token(struct parser *parser, struct token *tok)
+{
+ if (parser->peek.type == TOK_EOF) {
+ if (next_token(parser, &parser->peek))
+ return M_ERROR;
+ }
+ if (tok != NULL)
+ *tok = parser->peek;
+ return M_SUCCESS;
+}
+
+
+int assert_token(struct parser *parser, enum token_type type,
+ struct token *tok)
+{
+ struct token token;
+ if (next_token(parser, &token))
+ return M_ERROR;
+ if (token.type != type) {
+ ERROR_POS(token, "expected a token of type '%s', got '%s'",
+ token_str(type), token_str(token.type));
+ return M_ERROR;
+ }
+ if (tok != NULL)
+ *tok = token;
+ return M_SUCCESS;
+}
+
+int assert_eol(struct parser *parser)
+{
+ struct token token;
+ if (next_token(parser, &token))
+ return M_ERROR;
+ if (token.type != TOK_NL && token.type != TOK_EOF) {
+ ERROR_POS(token, "expected a new line or end of file");
+ return M_ERROR;
+ }
+ return M_SUCCESS;
+}
+
+static int parse_constant(struct parser *parser, struct const_expr *expr,
+ struct token ident)
+{
+ struct token number;
+
+ if (assert_token(parser, TOK_EQUAL, NULL))
+ return M_ERROR;
+
+ if (assert_token(parser, TOK_NUMBER, &number))
+ return M_ERROR;
+
+ strcpy(expr->name,ident.text);
+ expr->value = number.number;
+
+ return M_SUCCESS;
+}
+
+static int parser_handle_ident(struct parser *parser, struct expr *expr)
+{
+ struct token ident;
+ struct token peek;
+
+ if (assert_token(parser, TOK_IDENT, &ident))
+ return M_ERROR;
+
+ if (peek_token(parser, &peek))
+ return M_ERROR;
+
+ if (peek.type == TOK_EQUAL) {
+ expr->type = EXPR_CONSTANT;
+ return parse_constant(parser, &expr->constant, ident);
+ } else {
+ expr->type = EXPR_INS;
+ return parser->parse_instruction(parser, &expr->ins, ident);
+ }
+}
+
+
+static int parse_label(struct parser *parser,
+ struct expr *expr)
+{
+ struct token token;
+ struct symbol symbol;
+ uint32_t index;
+
+ if (assert_token(parser, TOK_LABEL, &token))
+ return M_ERROR;
+ strcpy(expr->text, token.text);
+
+ if (symtbl_find(&parser->sym_tbl, NULL, token.text) == M_SUCCESS) {
+ ERROR_POS(token, "redefined symbol '%s'", token.text);
+ return M_ERROR;
+ }
+
+ index = parser->sec_tbl.current->start +
+ parser->sec_tbl.current->count;
+ symbol = (struct symbol) {
+ .name = "",
+ .position = index,
+ .flag = SYM_LOCAL,
+ };
+ strcpy(symbol.name, token.text);
+
+ if (symtbl_push(&parser->sym_tbl, symbol))
+ return M_ERROR;
+
+ return M_SUCCESS;
+}
+
+
+int parser_next(struct parser *parser, struct expr *expr)
+{
+ struct token token;
+ int res = M_SUCCESS;
+
+again:
+ if (peek_token(parser, &token))
+ return M_ERROR;
+
+ switch (token.type) {
+ case TOK_NL:
+ next_token(parser, NULL);
+ goto again;
+
+ case TOK_EOF:
+ res = M_EOF;
+ break;
+
+ case TOK_LABEL:
+ expr->type = EXPR_LABEL;
+ res = parse_label(parser, expr);
+ break;
+
+ case TOK_DIRECTIVE:
+ expr->type = EXPR_DIRECTIVE;
+ res = parser->parse_directive(parser,
+ &expr->directive);
+ break;
+
+ case TOK_IDENT:
+ res = parser_handle_ident(parser, expr);
+ break;
+
+ default:
+ ERROR_POS(token, "unexpected token '%s'",
+ token_str(token.type));
+ return M_ERROR;
+
+ }
+
+ return res;
+}
+
+int parser_init(struct lexer *lexer, struct parser *parser)
+{
+ parser->lexer = lexer;
+ parser->peek.type = TOK_EOF;
+ if (symtbl_init(&parser->sym_tbl))
+ return M_ERROR;
+ if (sectbl_init(&parser->sec_tbl))
+ return M_ERROR;
+ if (reftbl_init(&parser->ref_tbl))
+ return M_ERROR;
+ return M_SUCCESS;
+}
+
+
+void parser_free(struct parser *parser)
+{
+ symtbl_free(&parser->sym_tbl);
+ sectbl_free(&parser->sec_tbl);
+ reftbl_free(&parser->ref_tbl);
+}
diff --git a/masm/parse.h b/masm/parse.h
new file mode 100644
index 0000000..2aea0be
--- /dev/null
+++ b/masm/parse.h
@@ -0,0 +1,156 @@
+/* Copyright (c) 2024 Freya Murphy */
+
+#ifndef __PARSE_H__
+#define __PARSE_H__
+
+#include "lex.h"
+
+#include <mlimits.h>
+#include <mips.h>
+#include <stdint.h>
+
+struct const_expr {
+ char name[MAX_LEX_LENGTH];
+ uint32_t value;
+};
+
+enum expr_type {
+ EXPR_INS,
+ EXPR_DIRECTIVE,
+ EXPR_CONSTANT,
+ EXPR_SEGMENT,
+ EXPR_LABEL,
+};
+
+struct expr {
+ enum expr_type type;
+ union {
+ // instruction
+ union mips_instruction ins;
+ // directive
+ union mips_directive directive;
+ // constant
+ struct const_expr constant;
+ // segment or label
+ char text[MAX_LEX_LENGTH];
+ };
+};
+
+enum symbol_flag {
+ SYM_LOCAL,
+ SYM_GLOBAL,
+ SYM_EXTERNAL,
+};
+
+struct symbol {
+ char name[MAX_LEX_LENGTH];
+ uint32_t position;
+ enum symbol_flag flag;
+
+};
+
+struct symbol_table {
+ uint32_t count;
+ uint32_t len;
+ struct symbol *symbols;
+};
+
+int symtbl_init(struct symbol_table *sym_tbl);
+void symtbl_free(struct symbol_table *sym_tbl);
+
+int symtbl_push(struct symbol_table *sym_tbl, struct symbol sym);
+int symtbl_find(struct symbol_table *sym_tbl, struct symbol **sym,
+ const char name[MAX_LEX_LENGTH]);
+
+struct section {
+ uint32_t count;
+ uint32_t len;
+ uint32_t start;
+ uint32_t alignment;
+ union mips_instruction *ins;
+ char name[MAX_LEX_LENGTH];
+};
+
+struct section_table {
+ uint32_t count;
+ uint32_t len;
+ struct section *sections;
+ struct section *current;
+ char name[MAX_LEX_LENGTH];
+ uint32_t total_ins;
+};
+
+int sectbl_init(struct section_table *sec_tbl);
+void sectbl_free(struct section_table *sec_tbl);
+
+int sectbl_alloc(struct section_table *sec_tbl, struct section **sec,
+ const char name[MAX_LEX_LENGTH]);
+int sectbl_push(struct section_table *sec_tbl, struct section *section,
+ union mips_instruction ins);
+int sectbl_get(struct section_table *sec_tbl, struct section **sec,
+ const char name[MAX_LEX_LENGTH]);
+
+enum reference_type {
+ REF_OFFESET,
+ REF_TARGET,
+};
+
+struct reference {
+ enum reference_type type;
+ struct section *section;
+ uint32_t index;
+ char name[MAX_LEX_LENGTH];
+};
+
+struct reference_table {
+ uint32_t count;
+ uint32_t len;
+ struct reference *references;
+};
+
+int reftbl_init(struct reference_table *ref_tbl);
+void reftbl_free(struct reference_table *ref_tbl);
+int reftbl_push(struct reference_table *ref_tbl, struct reference reference);
+
+struct parser {
+ struct lexer *lexer;
+ struct token peek;
+
+ // sections
+ struct section_table sec_tbl;
+
+ // symbols
+ struct symbol_table sym_tbl;
+
+ // references
+ struct reference_table ref_tbl;
+
+ int (*parse_instruction)(struct parser *, union mips_instruction *,
+ struct token);
+ int (*parse_directive)(struct parser *, union mips_directive *);
+ int (*is_instruction)(const char *ident);
+};
+
+/* get the next token in the parser */
+int next_token(struct parser *parser, struct token *tok);
+
+/* peek the next token in the parser */
+int peek_token(struct parser *parser, struct token *tok);
+
+/* assert the next token is a specific type */
+int assert_token(struct parser *parser, enum token_type type,
+ struct token *tok);
+
+/* assert the next token is EOF or NL */
+int assert_eol(struct parser *parser);
+
+/* get the next expression in the parser */
+int parser_next(struct parser *parser, struct expr *expr);
+
+/* initalize the base parser */
+int parser_init(struct lexer *lexer, struct parser *parser);
+
+/* free the base parser */
+void parser_free(struct parser *parser);
+
+#endif /* __PARSE_H__ */
diff --git a/masm/parse_mips32.c b/masm/parse_mips32.c
new file mode 100644
index 0000000..bd07ce0
--- /dev/null
+++ b/masm/parse_mips32.c
@@ -0,0 +1,847 @@
+#include <mips.h>
+#include <mips32.h>
+#include <merror.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+
+#include "parse_mips32.h"
+#include "parse.h"
+#include "mlimits.h"
+#include "parse.h"
+#include "lex.h"
+
+/* each instruction has a given parse format
+ * internal to the parser */
+enum mips32_parse_format {
+ // register type: rs, rt, td
+ MIPS32_PARSE_R,
+ // register type: rs, rt
+ MIPS32_PARSE_R2,
+ // register type: rd
+ MIPS32_PARSE_RD,
+ // register type: rs
+ MIPS32_PARSE_RS,
+ // imeediate type: rs, rt, immd
+ MIPS32_PARSE_I,
+ // jump type: offset
+ MIPS32_PARSE_J,
+ // jump type: register
+ MIPS32_PARSE_JR,
+ // offset 16b type: offset
+ MIPS32_PARSE_O16,
+ // offset 26b type: offset
+ MIPS32_PARSE_O26,
+ // breanch equal type: rs, rt, offset
+ MIPS32_PARSE_BE,
+ // branch zero type: rs, offset
+ MIPS32_PARSE_BZ,
+ // store and load: rt, offset(base)
+ MIPS32_PARSE_SL,
+ // store and load immediate: rt, immediate
+ MIPS32_PARSE_SLI,
+ // shift: rd, rt, sa
+ MIPS32_PARSE_S,
+ // shift variable: rd, rt, rs
+ MIPS32_PARSE_SV,
+};
+
+#define FORMAT(ins, format) \
+ [MIPS32_INS_##ins] = MIPS32_PARSE_##format, \
+
+const enum mips32_parse_format mips32_parse_formats[] = {
+ FORMAT(ADD, R)
+ FORMAT(ADDI, I)
+ FORMAT(ADDIU, I)
+ FORMAT(ADDU, R)
+ FORMAT(AND, R)
+ FORMAT(ANDI, I)
+ FORMAT(BAL, O16)
+ FORMAT(BALC, O26)
+ FORMAT(BC, O26)
+ FORMAT(BEQ, BE)
+ FORMAT(BEQL, BE)
+ FORMAT(BGEZ, BZ)
+ FORMAT(BGEZAL, BZ)
+ FORMAT(BGEZALL, BZ)
+ FORMAT(BGEZL, BZ)
+ FORMAT(BGTZ, BZ)
+ FORMAT(BGTZL, BZ)
+ FORMAT(BLEZ, BZ)
+ FORMAT(BLEZL, BZ)
+ FORMAT(BLTZ, BZ)
+ FORMAT(BLTZAL, BZ)
+ FORMAT(BLTZALL, BZ)
+ FORMAT(BLTZL, BZ)
+ FORMAT(BNE, BE)
+ FORMAT(BNEL, BE)
+ FORMAT(DDIV, R2)
+ FORMAT(DDIVU, R2)
+ FORMAT(DIV, R2)
+ FORMAT(DIVU, R2)
+ FORMAT(J, J)
+ FORMAT(JAL, J)
+ FORMAT(JALR, JR) // TODO: handle rd
+ FORMAT(JALX, J)
+ FORMAT(JR, JR)
+ FORMAT(LB, SL)
+ FORMAT(LBU, SL)
+ FORMAT(LH, SL)
+ FORMAT(LHU, SL)
+ FORMAT(LUI, SLI)
+ FORMAT(LW, SL)
+ FORMAT(LWL, SL)
+ FORMAT(LWR, SL)
+ FORMAT(MFHI, RD)
+ FORMAT(MFLO, RD)
+ FORMAT(MTHI, RS)
+ FORMAT(MTLO, RS)
+ FORMAT(MULT, R2)
+ FORMAT(MULTU, R2)
+ FORMAT(SB, SL)
+ FORMAT(SH, SL)
+ FORMAT(SW, SL)
+ FORMAT(SWL, SL)
+ FORMAT(SLL, S)
+ FORMAT(SLLV, SV)
+ FORMAT(SLT, R)
+ FORMAT(SLTI, I)
+ FORMAT(SLTIU, I)
+ FORMAT(SLTU, R)
+ FORMAT(SRA, S)
+ FORMAT(SRAV, SV)
+ FORMAT(SRL, S)
+ FORMAT(SRLV, SV)
+ FORMAT(OR, R)
+ FORMAT(ORI, I)
+ FORMAT(NOR, R)
+ FORMAT(SUB, R)
+ FORMAT(SUBU, R)
+ FORMAT(XOR, R)
+ FORMAT(XORI, I)
+};
+
+#undef FORMAT
+
+#define MAX5 32
+#define MAX16 65536
+#define MAX26 67108864
+
+static int get_reference(struct parser *parser, uint32_t *offset,
+ enum reference_type type)
+{
+ struct token token;
+
+ if (next_token(parser, &token))
+ return M_ERROR;
+
+ if (token.type == TOK_NUMBER) {
+ *offset = token.number;
+ return M_SUCCESS;
+ }
+
+ if (token.type != TOK_IDENT) {
+ ERROR_POS(token, "unexpected token of type '%s'",
+ token_str(token.type));
+ return M_ERROR;
+ }
+
+ struct reference reference = {
+ .section = parser->sec_tbl.current,
+ .index = parser->sec_tbl.current->count,
+ .type = type,
+ };
+ strcpy(reference.name, token.text);
+
+ if (reftbl_push(&parser->ref_tbl, reference))
+ return M_ERROR;
+
+ *offset = 0;
+
+ return M_SUCCESS;
+}
+
+static int get_offset(struct parser *parser, uint32_t *offset)
+{
+ return get_reference(parser, offset, REF_OFFESET);
+}
+
+static int get_target(struct parser *parser, uint32_t *offset)
+{
+ return get_reference(parser, offset, REF_TARGET);
+}
+
+static int get_instruction(const char *ident, struct mips32_instruction *res)
+{
+ for (int i = 0; i < __MIPS32_INS_LEN; i++) {
+ struct mips32_instruction ins =
+ mips32_instructions[i];
+ if (strcasecmp(ident, ins.name) == 0) {
+ if (res != NULL)
+ *res = ins;
+ return M_SUCCESS;
+ }
+ }
+ return M_ERROR;
+}
+
+static int is_instruction(const char *ident)
+{
+ return get_instruction(ident, NULL);
+}
+
+static int parse_register(struct parser *parser, enum mips32_register *reg)
+{
+ struct token token;
+ if (assert_token(parser, TOK_REG, &token))
+ return M_ERROR;
+
+ int len = strlen(token.text);
+ int c0 = len > 0 ? token.text[0] : '\0',
+ c1 = len > 1 ? token.text[1] : '\0',
+ c2 = len > 2 ? token.text[2] : '\0',
+ c3 = len > 3 ? token.text[3] : '\0';
+
+ // $zero
+ if (c0 == 'z') {
+ if (c1 == 'e' && c2 == 'r' && c3 == 'o') {
+ *reg = MIPS32_REG_ZERO;
+ return M_SUCCESS;
+ }
+ }
+
+ // $a0-a3 $at
+ else if (c0 == 'a') {
+ if (c1 == 't') {
+ *reg = MIPS32_REG_AT;
+ return M_SUCCESS;
+ }
+ if (c1 >= '0' && c1 <= '3') {
+ *reg = MIPS32_REG_A0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ }
+
+ // $v0-v1
+ else if (c0 == 'v') {
+ if (c1 >= '0' && c1 <= '1') {
+ *reg = MIPS32_REG_V0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ }
+
+ // $t0-t9
+ else if (c0 == 't') {
+ if (c1 >= '0' && c1 <= '7') {
+ *reg = MIPS32_REG_T0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ // reg T8-T9 are not in order with T0-T7
+ if (c1 >= '8' && c1 <= '9') {
+ *reg = MIPS32_REG_T8;
+ *reg += c1 - '8';
+ return M_SUCCESS;
+ }
+ }
+
+ // $s0-s7 $sp
+ else if (c0 == 's') {
+ if (c1 >= '0' && c1 <= '7') {
+ *reg = MIPS32_REG_S0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ if (c1 == 'p') {
+ *reg = MIPS32_REG_SP;
+ return M_SUCCESS;
+ }
+ }
+
+ // $k0-k1
+ else if (c0 == 'k') {
+ if (c1 >= '0' && c1 <= '1') {
+ *reg = MIPS32_REG_K0;
+ *reg += c1 - '0';
+ return M_SUCCESS;
+ }
+ }
+
+ // $gp
+ else if (c0 == 'g') {
+ if (c1 == 'p') {
+ *reg = MIPS32_REG_GP;
+ return M_SUCCESS;
+ }
+ }
+
+ // $fp
+ else if (c0 == 'f') {
+ if (c1 == 'p') {
+ *reg = MIPS32_REG_FP;
+ return M_SUCCESS;
+ }
+ }
+
+ // $rp
+ else if (c0 == 'r') {
+ if (c1 == 'p') {
+ *reg = MIPS32_REG_RA;
+ return M_SUCCESS;
+ }
+ }
+
+ // $0-31 (non aliased register names)
+ else if (c0 >= '0' && c0 <= '9') {
+ int i = c0 - '0';
+ if (c1 >= '0' && c1 <= '9') {
+ i *= 10;
+ i += c1 - '0';
+ }
+ if (i <= 31) {
+ *reg = i;
+ return M_SUCCESS;
+ }
+ }
+
+ ERROR_POS(token, "unknown register $%s", token.text);
+ return M_ERROR;
+}
+
+static int parse_instruction_r(struct parser *parser,
+ struct mips32_instruction *ins)
+{
+ // format: rs, rt, rd
+ enum mips32_register reg;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->R_data.rd = reg;
+
+ if (assert_token(parser, TOK_COMMA, NULL))
+ return M_ERROR;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->R_data.rs = reg;
+
+ if (assert_token(parser, TOK_COMMA, NULL))
+ return M_ERROR;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->R_data.rt = reg;
+
+ return M_SUCCESS;
+}
+
+static int parse_instruction_r2(struct parser *parser,
+ struct mips32_instruction *ins)
+{
+ // format: rs, rt
+ enum mips32_register reg;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->R_data.rs = reg;
+
+ if (assert_token(parser, TOK_COMMA, NULL))
+ return M_ERROR;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->R_data.rt = reg;
+
+ return M_SUCCESS;
+}
+
+static int parse_instruction_rs(struct parser *parser,
+ struct mips32_instruction *ins)
+{
+ // format: rs
+ enum mips32_register reg;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->R_data.rs = reg;
+
+ return M_SUCCESS;
+}
+
+static int parse_instruction_rd(struct parser *parser,
+ struct mips32_instruction *ins)
+{
+ // format: rd
+ enum mips32_register reg;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->R_data.rd = reg;
+
+ return M_SUCCESS;
+}
+
+static int parse_instruction_i(struct parser *parser,
+ struct mips32_instruction *ins)
+{
+ // format: rs, rt, immd
+ enum mips32_register reg;
+ struct token token;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->I_data.rt = reg;
+
+ if (assert_token(parser, TOK_COMMA, NULL))
+ return M_ERROR;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->I_data.rs = reg;
+
+ if (assert_token(parser, TOK_COMMA, NULL))
+ return M_ERROR;
+
+ if (assert_token(parser, TOK_NUMBER, &token))
+ return M_ERROR;
+
+ if (token.number >= MAX16)
+ return M_ERROR;
+ ins->I_data.immd = token.number;
+
+ return M_SUCCESS;
+}
+
+static int parse_instruction_offset(struct parser *parser,
+ uint32_t max,
+ struct mips32_instruction *ins)
+{
+ uint32_t n;
+ if (get_offset(parser, &n) || n > max)
+ return M_ERROR;
+
+ switch (max) {
+ case MAX26:
+ ins->J_data.target = n;
+ break;
+ case MAX16:
+ ins->B_data.offset = n;
+ break;
+ }
+
+ return M_SUCCESS;
+}
+
+static int parse_instruction_j(struct parser *parser,
+ struct mips32_instruction *ins)
+{
+ uint32_t n;
+ if (get_target(parser, &n) || n > MAX26)
+ return M_ERROR;
+ ins->J_data.target = n;
+
+ return M_SUCCESS;
+}
+
+static int parse_instruction_jr(struct parser *parser,
+ struct mips32_instruction *ins)
+{
+ uint32_t n;
+ if (get_target(parser, &n) || n > MAX26)
+ return M_ERROR;
+ ins->J_data.target = n;
+
+ return M_SUCCESS;
+}
+
+static int parse_instruction_branch_equal(struct parser *parser,
+ struct mips32_instruction *ins)
+{
+ enum mips32_register reg;
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->R_data.rs = reg;
+
+ return M_SUCCESS;
+}
+
+static int parse_instruction_branch(struct parser *parser,
+ struct mips32_instruction *ins)
+{
+ enum mips32_register reg;
+ uint32_t n;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->B_data.rs = reg;
+
+ if (assert_token(parser, TOK_COMMA, NULL))
+ return M_ERROR;
+
+ if (get_offset(parser, &n) || n > MAX16)
+ return M_ERROR;
+ ins->B_data.offset = n;
+
+ return M_SUCCESS;
+}
+
+static int parse_instruction_sl(struct parser *parser,
+ struct mips32_instruction *ins)
+{
+ enum mips32_register reg;
+ uint32_t offset = 0;
+ struct token token;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->I_data.rt = reg;
+
+ if (assert_token(parser, TOK_COMMA, NULL))
+ return M_ERROR;
+
+ if (peek_token(parser, &token))
+ return M_ERROR;
+
+ if (token.type != TOK_LPAREN)
+ if (get_offset(parser, &offset))
+ return M_ERROR;
+ ins->I_data.immd = offset;
+
+ if (peek_token(parser, &token))
+ return M_ERROR;
+
+ if (token.type == TOK_NL) {
+ ins->I_data.rs = MIPS32_REG_ZERO;
+ return M_SUCCESS;
+ }
+
+ if (assert_token(parser, TOK_LPAREN, NULL))
+ return M_ERROR;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->I_data.rs = reg;
+
+ if (assert_token(parser, TOK_RPAREN, NULL))
+ return M_ERROR;
+
+ return M_SUCCESS;
+}
+
+static int parse_instruction_sli(struct parser *parser,
+ struct mips32_instruction *ins)
+{
+ enum mips32_register reg;
+ struct token token;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->I_data.rt = reg;
+
+ if (assert_token(parser, TOK_COMMA, NULL))
+ return M_ERROR;
+
+ if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX16)
+ return M_ERROR;
+ ins->I_data.immd = token.number;
+
+ return M_SUCCESS;
+}
+
+static int parse_instruction_s(struct parser *parser,
+ struct mips32_instruction *ins)
+{
+ enum mips32_register reg;
+ struct token token;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->R_data.rd = reg;
+
+ if (assert_token(parser, TOK_COMMA, NULL))
+ return M_ERROR;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->R_data.rt = reg;
+
+ if (assert_token(parser, TOK_COMMA, NULL))
+ return M_ERROR;
+
+ if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX5)
+ return M_ERROR;
+ ins->R_data.shamt = token.number;
+
+ return M_SUCCESS;
+}
+
+static int parse_instruction_sv(struct parser *parser,
+ struct mips32_instruction *ins)
+{
+ enum mips32_register reg;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->R_data.rd = reg;
+
+ if (assert_token(parser, TOK_COMMA, NULL))
+ return M_ERROR;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->R_data.rt = reg;
+
+ if (assert_token(parser, TOK_COMMA, NULL))
+ return M_ERROR;
+
+ if (parse_register(parser, &reg))
+ return M_ERROR;
+ ins->R_data.rs = reg;
+
+ return M_SUCCESS;
+}
+
+static int parse_instruction(struct parser *parser,
+ union mips_instruction *ins,
+ struct token ident)
+{
+ struct mips32_instruction instruction;
+ enum mips32_parse_format format;
+ int res = M_SUCCESS;
+
+ if (get_instruction(ident.text, &instruction)) {
+ ERROR_POS(ident, "unknown instruction '%s'", ident.text);
+ return M_ERROR;
+ }
+
+ ins->mips32 = instruction;
+ format = mips32_parse_formats[instruction.type];
+
+ switch (format) {
+ case MIPS32_PARSE_R:
+ res = parse_instruction_r(parser, &ins->mips32);
+ break;
+ case MIPS32_PARSE_R2:
+ res = parse_instruction_r2(parser, &ins->mips32);
+ break;
+ case MIPS32_PARSE_RS:
+ res = parse_instruction_rs(parser, &ins->mips32);
+ break;
+ case MIPS32_PARSE_RD:
+ res = parse_instruction_rd(parser, &ins->mips32);
+ break;
+ case MIPS32_PARSE_I:
+ res = parse_instruction_i(parser, &ins->mips32);
+ break;
+ case MIPS32_PARSE_J:
+ res = parse_instruction_j(parser, &ins->mips32);
+ break;
+ case MIPS32_PARSE_JR:
+ res = parse_instruction_jr(parser, &ins->mips32);
+ break;
+ case MIPS32_PARSE_O16:
+ res = parse_instruction_offset(parser, MAX16, &ins->mips32);
+ break;
+ case MIPS32_PARSE_O26:
+ res = parse_instruction_offset(parser, MAX26, &ins->mips32);
+ break;
+ case MIPS32_PARSE_BE:
+ res = parse_instruction_branch_equal(parser, &ins->mips32);
+ break;
+ case MIPS32_PARSE_BZ:
+ res = parse_instruction_branch(parser, &ins->mips32);
+ break;
+ case MIPS32_PARSE_SL:
+ res = parse_instruction_sl(parser, &ins->mips32);
+ break;
+ case MIPS32_PARSE_SLI:
+ res = parse_instruction_sli(parser, &ins->mips32);
+ break;
+ case MIPS32_PARSE_S:
+ res = parse_instruction_s(parser, &ins->mips32);
+ break;
+ case MIPS32_PARSE_SV:
+ res = parse_instruction_sv(parser, &ins->mips32);
+ break;
+ }
+
+ if (res == M_SUCCESS && assert_eol(parser))
+ return M_ERROR;
+
+ return res;
+}
+
+
+static int parse_directive_align(struct parser *parser,
+ struct mips32_directive *directive)
+{
+ struct token token;
+ if (assert_token(parser, TOK_NUMBER, &token))
+ return M_ERROR;
+
+ if (token.number < 0) {
+ ERROR_POS(token, "cannot align negative");
+ return M_ERROR;
+ }
+
+ if (token.number > MAX16) {
+ ERROR_POS(token, "cannot align more than 65kb");
+ return M_ERROR;
+ }
+
+ directive->type = MIPS32_DIRECTIVE_ALIGN;
+ directive->align = token.number;
+
+ return M_SUCCESS;
+}
+
+static int parse_directive_space(struct parser *parser,
+ struct mips32_directive *directive)
+{
+ struct token token;
+ if (assert_token(parser, TOK_NUMBER, &token))
+ return M_ERROR;
+
+ if (token.number < 0) {
+ ERROR_POS(token, "cannot reserve negative");
+ return M_ERROR;
+ }
+
+ if (token.number > MAX16) {
+ ERROR_POS(token, "cannot reserve more than 65kb");
+ return M_ERROR;
+ }
+
+ directive->type = MIPS32_DIRECTIVE_SPACE;
+ directive->space = token.number;
+
+ return M_SUCCESS;
+}
+
+static int parse_directive_whb(struct parser *parser,
+ struct mips32_directive *directive,
+ enum mips32_directive_type type)
+{
+ struct token token;
+ uint32_t size = 0;
+ uint32_t len = 0;
+
+ switch (type) {
+ case MIPS32_DIRECTIVE_WORD:
+ size = UINT32_MAX;
+ break;
+ case MIPS32_DIRECTIVE_HALF:
+ size = UINT16_MAX;
+ break;
+ case MIPS32_DIRECTIVE_BYTE:
+ size = UINT8_MAX;
+ break;
+ default:
+ }
+
+ directive->type = type;
+
+ while (1) {
+ if (assert_token(parser, TOK_NUMBER, &token))
+ return M_ERROR;
+
+ if (len >= MAX_ARG_LENGTH) {
+ ERROR_POS(token, "directives cannot be longer than "
+ "%d arguments", MAX_ARG_LENGTH);
+ return M_ERROR;
+ }
+
+ if (token.number > size) {
+ ERROR_POS(token, "number cannot execede max size of: "
+ "%d", size);
+ return M_ERROR;
+ }
+
+ switch (type) {
+ case MIPS32_DIRECTIVE_WORD:
+ directive->words[len++] = token.number;
+ break;
+ case MIPS32_DIRECTIVE_HALF:
+ directive->halfs[len++] = token.number;
+ break;
+ case MIPS32_DIRECTIVE_BYTE:
+ directive->bytes[len++] = token.number;
+ break;
+ default:
+ }
+
+ if (peek_token(parser, &token))
+ return M_ERROR;
+
+ if (token.type == TOK_COMMA) {
+ next_token(parser, NULL);
+ continue;
+ }
+
+ break;
+ }
+
+ return M_SUCCESS;
+}
+
+static int parse_section(struct parser *parser,
+ struct mips32_directive *directive,
+ char name[MAX_LEX_LENGTH])
+{
+ directive->type = MIPS32_DIRECTIVE_SECTION;
+ strcpy(directive->name, name);
+
+ struct section *sec;
+ if (sectbl_get(&parser->sec_tbl, &sec, name) == M_SUCCESS) {
+ parser->sec_tbl.current = sec;
+ return M_SUCCESS;
+ }
+
+ if (sectbl_alloc(&parser->sec_tbl, &sec, name))
+ return M_ERROR;
+
+ parser->sec_tbl.current = sec;
+ return M_SUCCESS;
+}
+
+static int parse_directive(struct parser *parser,
+ union mips_directive *directive)
+{
+ struct token token;
+ if (assert_token(parser, TOK_DIRECTIVE, &token))
+ return M_ERROR;
+
+ // .align n
+ if (strcmp(token.text, "align") == 0)
+ return parse_directive_align(parser, &directive->mips32);
+ else if (strcmp(token.text, "space") == 0)
+ return parse_directive_space(parser, &directive->mips32);
+ else if (strcmp(token.text, "word") == 0)
+ return parse_directive_whb(parser, &directive->mips32,
+ MIPS32_DIRECTIVE_WORD);
+ else if (strcmp(token.text, "half") == 0)
+ return parse_directive_whb(parser, &directive->mips32,
+ MIPS32_DIRECTIVE_HALF);
+ else if (strcmp(token.text, "byte") == 0)
+ return parse_directive_whb(parser, &directive->mips32,
+ MIPS32_DIRECTIVE_BYTE);
+ else
+ return parse_section(parser, &directive->mips32, token.text);
+}
+
+int mips32_parser_init(struct lexer *lexer, struct parser *parser)
+{
+ if (parser_init(lexer, parser))
+ return M_ERROR;
+ parser->parse_instruction = parse_instruction;
+ parser->is_instruction = is_instruction;
+ parser->parse_directive = parse_directive;
+ return M_SUCCESS;
+}
+
+void mips32_parser_free(struct parser *parser)
+{
+ parser_free(parser);
+}
diff --git a/masm/parse_mips32.h b/masm/parse_mips32.h
new file mode 100644
index 0000000..6de154f
--- /dev/null
+++ b/masm/parse_mips32.h
@@ -0,0 +1,14 @@
+/* Copyright (c) 2024 Freya Murphy */
+
+#ifndef __PARSE_MIPS32_H__
+#define __PARSE_MIPS32_H__
+
+#include "parse.h"
+
+/* initzlize a mips32 parser*/
+int mips32_parser_init(struct lexer *lexer, struct parser *parser);
+
+/* free the mips32 parser */
+void mips32_parser_free(struct parser *parser);
+
+#endif /* __PARSE_MIPS32_H__ */
diff --git a/masm/reftbl.c b/masm/reftbl.c
new file mode 100644
index 0000000..bdd1f07
--- /dev/null
+++ b/masm/reftbl.c
@@ -0,0 +1,47 @@
+#include <string.h>
+#include <stdlib.h>
+#include <mips.h>
+#include <merror.h>
+#include <mlimits.h>
+
+#include "parse.h"
+
+#define RELTBL_INIT_LEN 8
+
+int reftbl_init(struct reference_table *ref_tbl)
+{
+ ref_tbl->len = RELTBL_INIT_LEN;
+ ref_tbl->count = 0;
+ ref_tbl->references = malloc(sizeof(struct reference) *
+ RELTBL_INIT_LEN);
+
+ if (ref_tbl->references == NULL) {
+ ERROR("cannot alloc");
+ return M_ERROR;
+ }
+
+ return M_SUCCESS;
+}
+
+void reftbl_free(struct reference_table *ref_tbl)
+{
+ free(ref_tbl->references);
+}
+
+int reftbl_push(struct reference_table *ref_tbl, struct reference reference)
+{
+ if (ref_tbl->count >= ref_tbl->len) {
+ ref_tbl->len *= 2;
+ ref_tbl->references = realloc(ref_tbl->references,
+ sizeof(struct reference) * ref_tbl->len);
+
+ if (ref_tbl->references == NULL) {
+ ERROR("cannot realloc");
+ return M_ERROR;
+ }
+ }
+
+ ref_tbl->references[ref_tbl->count++] = reference;
+
+ return M_SUCCESS;
+}
diff --git a/masm/sectbl.c b/masm/sectbl.c
new file mode 100644
index 0000000..f568a6f
--- /dev/null
+++ b/masm/sectbl.c
@@ -0,0 +1,103 @@
+#include <string.h>
+#include <stdlib.h>
+#include <mips.h>
+#include <merror.h>
+#include <mlimits.h>
+
+#include "parse.h"
+
+#define SECTBL_INIT_LEN 8
+static const char inital_section[MAX_LEX_LENGTH] = "data";
+
+int sectbl_init(struct section_table *sec_tbl)
+{
+ sec_tbl->len = SECTBL_INIT_LEN;
+ sec_tbl->count = 0;
+ sec_tbl->total_ins = 0;
+ sec_tbl->sections = malloc(sizeof(struct section) * SECTBL_INIT_LEN);
+
+ if (sec_tbl->sections == NULL) {
+ ERROR("cannot alloc");
+ return M_ERROR;
+ }
+
+ if (sectbl_alloc(sec_tbl, &sec_tbl->current, inital_section))
+ return M_ERROR;
+
+ return M_SUCCESS;
+}
+
+void sectbl_free(struct section_table *sec_tbl)
+{
+ for (uint32_t i = 0; i < sec_tbl->count; i++) {
+ free(sec_tbl->sections[i].ins);
+ }
+ free(sec_tbl->sections);
+}
+
+int sectbl_alloc(struct section_table *sec_tbl, struct section **sec,
+ const char name[MAX_LEX_LENGTH])
+{
+ if (sec_tbl->count >= sec_tbl->len) {
+ sec_tbl->len *= 2;
+ sec_tbl->sections = realloc(sec_tbl->sections,
+ sizeof(struct section) * sec_tbl->len);
+
+ if (sec_tbl->sections == NULL) {
+ ERROR("cannot realloc");
+ return M_ERROR;
+ }
+ }
+
+ struct section *temp;
+ temp = &sec_tbl->sections[sec_tbl->count++];
+ strcpy(temp->name,name);
+ temp->count = 0;
+ temp->len = SECTBL_INIT_LEN;
+ temp->start = sec_tbl->total_ins;
+ temp->alignment = 1;
+ temp->ins = malloc(sizeof(union mips_instruction) * SECTBL_INIT_LEN);
+
+ if (temp->ins == NULL) {
+ ERROR("cannot alloc");
+ return M_ERROR;
+ }
+
+ *sec = temp;
+ return M_SUCCESS;
+}
+
+int sectbl_push(struct section_table *sec_tbl, struct section *section,
+ union mips_instruction ins)
+{
+ if (section->count >= section->len) {
+ section->len *= 2;
+ section->ins = realloc(section->ins,
+ sizeof(union mips_instruction) * section->len);
+
+ if (section->ins == NULL) {
+ ERROR("cannot realloc");
+ return M_ERROR;
+ }
+ }
+
+ section->ins[section->count++] = ins;
+ sec_tbl->total_ins++;
+
+ return M_SUCCESS;
+}
+
+int sectbl_get(struct section_table *sec_tbl, struct section **sec,
+ const char name[MAX_LEX_LENGTH])
+{
+ for (uint32_t i = 0; i < sec_tbl->count; i++) {
+ struct section *temp = &sec_tbl->sections[i];
+ if (strcmp(name, temp->name) == 0) {
+ if (sec != NULL)
+ *sec = temp;
+ return M_SUCCESS;
+ }
+ }
+
+ return M_ERROR;
+}
diff --git a/masm/strtbl.c b/masm/strtbl.c
new file mode 100644
index 0000000..b01bb92
--- /dev/null
+++ b/masm/strtbl.c
@@ -0,0 +1,49 @@
+#include <merror.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "asm.h"
+
+int strtbl_get_str(struct str_table *str_tbl, const char *str, size_t *res)
+{
+ for (size_t i = 0; i < str_tbl->size; i ++) {
+ if (strcmp(str_tbl->ptr + i, str) == 0) {
+ if (res != NULL)
+ *res = i;
+ return M_SUCCESS;
+ }
+ }
+
+ return M_ERROR;
+}
+
+int strtbl_write_str(struct str_table *str_tbl, const char *str, size_t *res)
+{
+ if (strtbl_get_str(str_tbl, str, res) == M_SUCCESS)
+ return M_SUCCESS;
+
+ size_t len = strlen(str);
+ char *new = realloc(str_tbl->ptr, str_tbl->size + len + 1);
+ if (new == NULL)
+ return M_ERROR;
+ str_tbl->ptr = new;
+ memcpy(str_tbl->ptr + str_tbl->size, str, len + 1);
+
+ if (res != NULL)
+ *res = str_tbl->size;
+
+ str_tbl->size += len + 1;
+ return M_SUCCESS;
+}
+
+void strtbl_init(struct str_table *str_tbl)
+{
+ str_tbl->size = 1;
+ str_tbl->ptr = malloc(1);
+ *str_tbl->ptr = '\0';
+}
+
+void strtbl_free(struct str_table *str_tbl)
+{
+ free(str_tbl->ptr);
+}
diff --git a/masm/symtbl.c b/masm/symtbl.c
new file mode 100644
index 0000000..b75c752
--- /dev/null
+++ b/masm/symtbl.c
@@ -0,0 +1,57 @@
+#include <merror.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "parse.h"
+
+#define SYMTBL_INIT_LEN 24
+
+int symtbl_init(struct symbol_table *sym_tbl)
+{
+ sym_tbl->len = SYMTBL_INIT_LEN;
+ sym_tbl->count = 0;
+ sym_tbl->symbols = malloc(sizeof(struct symbol) * SYMTBL_INIT_LEN);
+
+ if (sym_tbl->symbols == NULL) {
+ ERROR("cannot alloc");
+ return M_ERROR;
+ }
+
+ return M_SUCCESS;
+}
+
+void symtbl_free(struct symbol_table *sym_tbl)
+{
+ free(sym_tbl->symbols);
+}
+
+int symtbl_push(struct symbol_table *sym_tbl, struct symbol sym)
+{
+ if (sym_tbl->count >= sym_tbl->len) {
+ sym_tbl->len *= 2;
+ sym_tbl->symbols = realloc(sym_tbl->symbols,
+ sizeof(struct symbol) * sym_tbl->len);
+ if (sym_tbl->symbols == NULL) {
+ ERROR("cannot relloc");
+ return M_ERROR;
+ }
+ }
+
+ sym_tbl->symbols[sym_tbl->count++] = sym;
+ return M_SUCCESS;
+}
+
+int symtbl_find(struct symbol_table *sym_tbl, struct symbol **ptr,
+ const char name[MAX_LEX_LENGTH])
+{
+ for (uint32_t i = 0; i < sym_tbl->count; i++) {
+ struct symbol *sym = &sym_tbl->symbols[i];
+ if (strcmp(sym->name, name) == 0) {
+ if (ptr != NULL)
+ *ptr = sym;
+ return M_SUCCESS;
+ }
+ }
+ return M_ERROR;
+}
diff --git a/masm/test.asm b/masm/test.asm
new file mode 100644
index 0000000..c3b61fb
--- /dev/null
+++ b/masm/test.asm
@@ -0,0 +1,22 @@
+.text
+.align 2
+
+main:
+ add $zero,$t7,$t7
+ xori $a0, $v1, 69
+ addi $a0, $v1, 69
+ nor $s0, $s1, $s2
+
+ bltzall $s7, 0x50
+
+ lui $t7, 0x55
+ lw $t0, 18($t7)
+
+ sll $t0, $s0, 17
+test:
+ mult $a0, $s6
+
+ mfhi $s0
+ mtlo $s7
+
+ j test