diff options
author | Freya Murphy <freya@freyacat.org> | 2024-09-09 12:41:49 -0400 |
---|---|---|
committer | Freya Murphy <freya@freyacat.org> | 2024-09-09 12:41:49 -0400 |
commit | 2ed275821676a0d5baea6c7fd843d71c72c2342c (patch) | |
tree | 480297f28e5c42d02a47b3b94027a7abe507d010 /masm | |
download | mips-2ed275821676a0d5baea6c7fd843d71c72c2342c.tar.gz mips-2ed275821676a0d5baea6c7fd843d71c72c2342c.tar.bz2 mips-2ed275821676a0d5baea6c7fd843d71c72c2342c.zip |
initial mips32 (r2000ish mips32r6) assembler
Diffstat (limited to '')
-rw-r--r-- | masm/: | 363 | ||||
-rw-r--r-- | masm/Makefile | 7 | ||||
-rw-r--r-- | masm/asm.h | 33 | ||||
-rw-r--r-- | masm/asm_mips32.c | 365 | ||||
-rw-r--r-- | masm/lex.c | 343 | ||||
-rw-r--r-- | masm/lex.h | 55 | ||||
-rw-r--r-- | masm/main.c | 9 | ||||
-rw-r--r-- | masm/parse.c | 198 | ||||
-rw-r--r-- | masm/parse.h | 156 | ||||
-rw-r--r-- | masm/parse_mips32.c | 847 | ||||
-rw-r--r-- | masm/parse_mips32.h | 14 | ||||
-rw-r--r-- | masm/reftbl.c | 47 | ||||
-rw-r--r-- | masm/sectbl.c | 103 | ||||
-rw-r--r-- | masm/strtbl.c | 49 | ||||
-rw-r--r-- | masm/symtbl.c | 57 | ||||
-rw-r--r-- | masm/test.asm | 22 |
16 files changed, 2668 insertions, 0 deletions
@@ -0,0 +1,363 @@ +#include <merror.h> +#include <mips.h> +#include <mips32.h> +#include <stdio.h> +#include <stdlib.h> +#include <elf.h> +#include <string.h> +#include <stddef.h> + +#include "asm.h" +#include "mlimits.h" +#include "parse.h" +#include "parse_mips32.h" + +extern char *current_file; + +#define SHDR_STRTBL 0 +#define SHDR_SYMTBL 1 +#define SHDR_SECTIONS 2 + +static int parse_file(struct parser *parser) +{ + while (1) { + struct expr expr; + if (parser_next(parser, &expr)) { + break; + } + + if (expr.type == EXPR_INS) + if (sectbl_push(&parser->sec_tbl, + parser->sec_tbl.current, expr.ins)) + return M_ERROR; + } + + for (uint32_t i = 0; i < parser->ref_tbl.count; i++) { + struct reference *ref = &parser->ref_tbl.references[i]; + struct symbol *sym; + struct mips32_instruction *ins; + + if (symtbl_find(&parser->sym_tbl, &sym, ref->name)) { + ERROR("undefined symbol '%s'", ref->name); + return M_ERROR; + } + + ins = &ref->section->ins[ref->index].mips32; + + switch (ref->type) { + case REF_OFFESET: + ins->B_data.offset += sym->position - + (ref->section->start + ref->index); + break; + case REF_TARGET: + ins->J_data.target += sym->position; + break; + } + }; + + return M_SUCCESS; +} + +static int assemble_phdr(struct assembler *asm, Elf32_Phdr **res, + uint32_t *res2) +{ + struct parser *parser = asm->parser; + Elf32_Phdr *phdr = malloc(sizeof(Elf32_Phdr) * + parser->sec_tbl.count); + size_t ins_sz = sizeof(struct mips32_instruction); + + if (phdr == NULL) { + ERROR("cannot alloc"); + return M_ERROR;; + } + + for (uint32_t i = 0; i < parser->sec_tbl.count; i++) { + Elf32_Phdr *hdr = &phdr[i]; + struct section *sec = &parser->sec_tbl.sections[i]; + + hdr->p_type = PT_LOAD; + hdr->p_flags = PF_X | PF_W | PF_R; // FIXME: this is bad + hdr->p_offset = sec->start * ins_sz; + hdr->p_vaddr = sec->start * ins_sz; + hdr->p_paddr = 0x00; + hdr->p_filesz = sec->count * ins_sz; + hdr->p_memsz = sec->count * ins_sz; + hdr->p_align = sec->alignment; + } + + *res = phdr; + *res2 = parser->sec_tbl.count; + return M_SUCCESS; +} + +static int assemble_symtbl(struct assembler *asm, Elf32_Sym **res, + uint32_t *res2) +{ + Elf32_Sym *stbl = malloc(sizeof(Elf32_Sym) * asm->parser->sym_tbl + .count); + + if (stbl == NULL) + return M_ERROR; + + for (uint32_t i = 0; i < asm->parser->sym_tbl.count; i++) { + struct symbol *sym = &asm->parser->sym_tbl.symbols[i]; + size_t str_off; + + if (strtbl_write_str(&asm->str_tbl, sym->name, &str_off)) { + free(stbl); + return M_ERROR; + } + + int viz = STB_LOCAL; + switch (sym->flag) { + case SYM_LOCAL: + viz = STB_LOCAL; + break; + case SYM_GLOBAL: + case SYM_EXTERNAL: + viz = STB_GLOBAL; + break; + } + + stbl[i] = (Elf32_Sym) { + .st_name = str_off, + .st_value = sym->position, + .st_size = 0, + .st_info = (unsigned char) + ELF32_ST_INFO(SYMINFO_BT_SELF, + SYMINFO_FLG_DIRECT), + .st_other = (unsigned char) + ELF32_ST_VISIBILITY(viz), + .st_shndx = 0, // FIXME: specify section + }; + }; + + *res = stbl; + *res2 = asm->parser->sym_tbl.count; + + return M_SUCCESS; +} + +static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res, + uint32_t *res2) +{ + uint32_t entries = 2; // str table and sym tabel + entries += asm->parser->sec_tbl.count; // sections + + Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * entries); + + size_t str_off; + if (strtbl_write_str(&asm->str_tbl, ".shstrtab", &str_off)) { + free(shdr); + return M_ERROR; + } + + // string table + shdr[SHDR_STRTBL] = (Elf32_Shdr) { + .sh_name = str_off, + .sh_type = SHT_STRTAB, + .sh_flags = SHF_STRINGS, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = 0, + }; + + if (strtbl_write_str(&asm->str_tbl, ".shsymtab", &str_off)) { + free(shdr); + return M_ERROR; + } + + // symbol table + shdr[SHDR_SYMTBL] = (Elf32_Shdr) { + .sh_name = str_off, + .sh_type = SHT_SYMTAB, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = sizeof(Elf32_Sym), + }; + + // for each section + for (uint32_t i = 0; i < asm->parser->sec_tbl.count; i++) { + struct section *sec = &asm->parser->sec_tbl.sections[i]; + char name[MAX_LEX_LENGTH+1] = "."; + strcat(name, sec->name); + if (strtbl_write_str(&asm->str_tbl, name, &str_off)) { + free(shdr); + return M_ERROR; + } + shdr[i+SHDR_SECTIONS] = (Elf32_Shdr) { + .sh_name = str_off, + .sh_type = SHT_PROGBITS, + .sh_flags = SHF_WRITE | SHF_ALLOC | SHF_EXECINSTR, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = sec->alignment, + .sh_entsize = sizeof(struct mips32_instruction), + }; + } + + *res = shdr; + *res2 = entries; + + return M_SUCCESS; +} + +static int assemble_file(struct assembler *asm) +{ + Elf32_Phdr *phdr; + Elf32_Shdr *shdr; + Elf32_Sym *symtbl; + uint32_t phdr_len; + uint32_t shdr_len; + uint32_t symtbl_len; + + if (assemble_symtbl(asm, &symtbl, &symtbl_len)) + return M_ERROR; + + if (assemble_phdr(asm, &phdr, &phdr_len)) { + free(symtbl); + return M_ERROR; + } + + if (assemble_shdr(asm, &shdr, &shdr_len)) { + free(symtbl); + free(phdr); + return M_ERROR; + }; + + Elf32_Ehdr ehdr = { + .e_ident = { + [EI_MAG0] = ELFMAG0, + [EI_MAG1] = ELFMAG1, + [EI_MAG2] = ELFMAG2, + [EI_MAG3] = ELFMAG3, + [EI_CLASS] = ELFCLASS32, + [EI_DATA] = ELFDATA2LSB, + [EI_VERSION] = EV_CURRENT, + [EI_OSABI] = ELFOSABI_STANDALONE, + [EI_ABIVERSION] = 0x00, + [EI_PAD] = 0x00, + }, + .e_type = ET_REL, + .e_machine = EM_MIPS, + .e_version = EV_CURRENT, + .e_entry = 0x00, + .e_phoff = 0x00, + .e_shoff = 0x00, + .e_flags = EF_MIPS_ARCH_32R6, + .e_ehsize = sizeof(Elf32_Ehdr), + .e_phentsize = 0x20, + .e_phnum = phdr_len, + .e_shentsize = 0x28, + .e_shnum = shdr_len, + .e_shstrndx = 0x00, // str table is always inx 0 + }; + + uint32_t ptr = 0; + + // we must now correct offets and sizes inside the ehdr, phdr, + // and shdr + + ptr += sizeof(Elf32_Ehdr); + + // phdr + ehdr.e_phoff = ptr; + ptr += phdr_len * sizeof(Elf32_Phdr); + + // sections + for (uint32_t i = 0; i < asm->parser->sec_tbl.count; i++) { + phdr[i].p_offset = ptr; + phdr[i].p_vaddr = ptr; + shdr[i+SHDR_SECTIONS].sh_offset = ptr; + shdr[i+SHDR_SECTIONS].sh_size = phdr[i].p_filesz; + ptr += phdr[i].p_filesz; + } + + // strtbl + shdr[SHDR_STRTBL].sh_offset = ptr; + shdr[SHDR_STRTBL].sh_size = asm->str_tbl.size; + ptr += asm->str_tbl.size; + + // symtbl + ehdr.e_shoff = ptr; + shdr[SHDR_SYMTBL].sh_offset = ptr; + shdr[SHDR_SYMTBL].sh_size = symtbl_len * sizeof(Elf32_Sym); + ptr += symtbl_len * sizeof(Elf32_Sym); + + FILE *out = fopen("/home/freya/out.o", "w"); + + // ehdr + fwrite(&ehdr, sizeof(Elf32_Ehdr), 1, out); + + // phdr + fwrite(phdr, sizeof(Elf32_Phdr), phdr_len, out); + + // sections + for (uint32_t i = 0; i < asm->parser->sec_tbl.count; i++) { + struct section *sec = &asm->parser->sec_tbl.sections[i]; + for (uint32_t j = 0; j < sec->count; j++) { + struct mips32_instruction *ins = &sec->ins[j].mips32; + fwrite(ins, sizeof(struct mips32_instruction), + 1, out); + } + } + + // str tbl + fwrite(asm->str_tbl.ptr, asm->str_tbl.size, 1, out); + + // sym tbl + fwrite(symtbl, sizeof(Elf32_Sym), symtbl_len, out); + + // shdr + fwrite(shdr, sizeof(Elf32_Shdr), shdr_len, out); + + fclose(out); + + free(shdr); + free(phdr); + free(symtbl); + + return M_SUCCESS; +} + +int assemble_file_mips32(char *path) +{ + struct lexer lexer; + struct parser parser; + current_file = path; + int res = M_SUCCESS; + + if (lexer_init(current_file, &lexer)) + return M_ERROR; + + if (mips32_parser_init(&lexer, &parser)) + return M_ERROR; + + if (res == M_SUCCESS) + res = parse_file(&parser); + + struct assembler assembler; + assembler.parser = &parser; + strtbl_init(&assembler.str_tbl); + + if (res == M_SUCCESS) + res = assemble_file(&assembler); + + strtbl_free(&assembler.str_tbl); + lexer_free(&lexer); + parser_free(&parser); + + return res; +} diff --git a/masm/Makefile b/masm/Makefile new file mode 100644 index 0000000..cd1dae3 --- /dev/null +++ b/masm/Makefile @@ -0,0 +1,7 @@ +include ../config.mk + +SRC=. +BIN=../bin/masm +OUT=masm + +include ../makefile.mk diff --git a/masm/asm.h b/masm/asm.h new file mode 100644 index 0000000..1bc7cf5 --- /dev/null +++ b/masm/asm.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2024 Freya Murphy */ + +#ifndef __ASM_H__ +#define __ASM_H__ + +#include <stddef.h> + +struct str_table { + char *ptr; + size_t size; +}; + +/* initalize a string table */ +void strtbl_init(struct str_table *str_tbl); + +/* free a string table */ +void strtbl_free(struct str_table *str_tbl); + +/* get a string form the string table */ +int strtbl_get_str(struct str_table *str_tbl, const char *str, size_t *res); + +/* get or append a string into the string table */ +int strtbl_write_str(struct str_table *str_tbl, const char *str, size_t *res); + +struct assembler { + struct parser *parser; + struct str_table str_tbl; +}; + +/* assemble a mips32 file*/ +int assemble_file_mips32(char *path); + +#endif /* __ASM_H__ */ diff --git a/masm/asm_mips32.c b/masm/asm_mips32.c new file mode 100644 index 0000000..dcb81e5 --- /dev/null +++ b/masm/asm_mips32.c @@ -0,0 +1,365 @@ +#include <merror.h> +#include <mips.h> +#include <mips32.h> +#include <stdio.h> +#include <stdlib.h> +#include <elf.h> +#include <string.h> +#include <stddef.h> + +#include "asm.h" +#include "mlimits.h" +#include "parse.h" +#include "parse_mips32.h" + +extern char *current_file; + +#define SHDR_SYMTBL 0 +#define SHDR_STRTBL 1 +#define SHDR_SECTIONS 2 + +static int parse_file(struct parser *parser) +{ + while (1) { + struct expr expr; + if (parser_next(parser, &expr)) { + break; + } + + if (expr.type == EXPR_INS) + if (sectbl_push(&parser->sec_tbl, + parser->sec_tbl.current, expr.ins)) + return M_ERROR; + } + + for (uint32_t i = 0; i < parser->ref_tbl.count; i++) { + struct reference *ref = &parser->ref_tbl.references[i]; + struct symbol *sym; + struct mips32_instruction *ins; + + if (symtbl_find(&parser->sym_tbl, &sym, ref->name)) { + ERROR("undefined symbol '%s'", ref->name); + return M_ERROR; + } + + ins = &ref->section->ins[ref->index].mips32; + + switch (ref->type) { + case REF_OFFESET: + ins->B_data.offset += sym->position - + (ref->section->start + ref->index); + break; + case REF_TARGET: + ins->J_data.target += sym->position; + break; + } + }; + + return M_SUCCESS; +} + +static int assemble_phdr(struct assembler *asm, Elf32_Phdr **res, + uint32_t *res2) +{ + struct parser *parser = asm->parser; + Elf32_Phdr *phdr = malloc(sizeof(Elf32_Phdr) * + parser->sec_tbl.count); + size_t ins_sz = sizeof(struct mips32_instruction); + + if (phdr == NULL) { + ERROR("cannot alloc"); + return M_ERROR;; + } + + for (uint32_t i = 0; i < parser->sec_tbl.count; i++) { + Elf32_Phdr *hdr = &phdr[i]; + struct section *sec = &parser->sec_tbl.sections[i]; + + hdr->p_type = PT_LOAD; + hdr->p_flags = PF_X | PF_W | PF_R; // FIXME: this is bad + hdr->p_offset = sec->start * ins_sz; + hdr->p_vaddr = sec->start * ins_sz; + hdr->p_paddr = 0x00; + hdr->p_filesz = sec->count * ins_sz; + hdr->p_memsz = sec->count * ins_sz; + hdr->p_align = sec->alignment; + } + + *res = phdr; + *res2 = parser->sec_tbl.count; + return M_SUCCESS; +} + +static int assemble_symtbl(struct assembler *asm, Elf32_Sym **res, + uint32_t *res2) +{ + Elf32_Sym *stbl = malloc(sizeof(Elf32_Sym) * asm->parser->sym_tbl + .count); + + if (stbl == NULL) + return M_ERROR; + + for (uint32_t i = 0; i < asm->parser->sym_tbl.count; i++) { + struct symbol *sym = &asm->parser->sym_tbl.symbols[i]; + size_t str_off; + + if (strtbl_write_str(&asm->str_tbl, sym->name, &str_off)) { + free(stbl); + return M_ERROR; + } + + int viz = STB_LOCAL; + switch (sym->flag) { + case SYM_LOCAL: + viz = STB_LOCAL; + break; + case SYM_GLOBAL: + case SYM_EXTERNAL: + viz = STB_GLOBAL; + break; + } + + stbl[i] = (Elf32_Sym) { + .st_name = str_off, + .st_value = sym->position, + .st_size = 0, + .st_info = (unsigned char) + ELF32_ST_INFO(SYMINFO_BT_SELF, + SYMINFO_FLG_DIRECT), + .st_other = (unsigned char) + ELF32_ST_VISIBILITY(viz), + .st_shndx = 0, // FIXME: specify section + }; + }; + + *res = stbl; + *res2 = asm->parser->sym_tbl.count; + + return M_SUCCESS; +} + +static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res, + uint32_t *res2) +{ + uint32_t entries = 2; // str table and sym tabel + entries += asm->parser->sec_tbl.count; // sections + + Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * entries); + + size_t str_off; + if (strtbl_write_str(&asm->str_tbl, ".shsymtab", &str_off)) { + free(shdr); + return M_ERROR; + } + + // symbol table + shdr[SHDR_SYMTBL] = (Elf32_Shdr) { + .sh_name = str_off, + .sh_type = SHT_SYMTAB, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 1, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = sizeof(Elf32_Sym), + }; + + if (strtbl_write_str(&asm->str_tbl, ".shstrtab", &str_off)) { + free(shdr); + return M_ERROR; + } + + // string table + shdr[SHDR_STRTBL] = (Elf32_Shdr) { + .sh_name = str_off, + .sh_type = SHT_STRTAB, + .sh_flags = SHF_STRINGS, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = 0, + }; + + // for each section + for (uint32_t i = 0; i < asm->parser->sec_tbl.count; i++) { + struct section *sec = &asm->parser->sec_tbl.sections[i]; + char name[MAX_LEX_LENGTH+1] = "."; + strcat(name, sec->name); + if (strtbl_write_str(&asm->str_tbl, name, &str_off)) { + free(shdr); + return M_ERROR; + } + shdr[i+SHDR_SECTIONS] = (Elf32_Shdr) { + .sh_name = str_off, + .sh_type = SHT_PROGBITS, + .sh_flags = SHF_WRITE | SHF_ALLOC | SHF_EXECINSTR, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = sec->alignment, + .sh_entsize = sizeof(struct mips32_instruction), + }; + } + + *res = shdr; + *res2 = entries; + + return M_SUCCESS; +} + +static int assemble_file(struct assembler *asm) +{ + Elf32_Phdr *phdr; + Elf32_Shdr *shdr; + Elf32_Sym *symtbl; + uint32_t phdr_len; + uint32_t shdr_len; + uint32_t symtbl_len; + + if (assemble_symtbl(asm, &symtbl, &symtbl_len)) + return M_ERROR; + + if (assemble_phdr(asm, &phdr, &phdr_len)) { + free(symtbl); + return M_ERROR; + } + + if (assemble_shdr(asm, &shdr, &shdr_len)) { + free(symtbl); + free(phdr); + return M_ERROR; + }; + + Elf32_Ehdr ehdr = { + .e_ident = { + [EI_MAG0] = ELFMAG0, + [EI_MAG1] = ELFMAG1, + [EI_MAG2] = ELFMAG2, + [EI_MAG3] = ELFMAG3, + [EI_CLASS] = ELFCLASS32, + [EI_DATA] = ELFDATA2LSB, + [EI_VERSION] = EV_CURRENT, + [EI_OSABI] = ELFOSABI_NONE, + [EI_ABIVERSION] = 0x00, + [EI_PAD] = 0x00, + }, + .e_type = ET_REL, + .e_machine = EM_MIPS, + .e_version = EV_CURRENT, + .e_entry = 0x00, + .e_phoff = 0x00, + .e_shoff = 0x00, + .e_flags = EF_MIPS_ARCH_32R6, + .e_ehsize = sizeof(Elf32_Ehdr), + .e_phentsize = sizeof(Elf32_Phdr), + .e_phnum = phdr_len, + .e_shentsize = sizeof(Elf32_Shdr), + .e_shnum = shdr_len, + .e_shstrndx = SHDR_STRTBL, + }; + + uint32_t ptr = 0; + + // we must now correct offets and sizes inside the ehdr, phdr, + // and shdr + + ptr += sizeof(Elf32_Ehdr); + + // phdr + ehdr.e_phoff = ptr; + ptr += phdr_len * sizeof(Elf32_Phdr); + + // sections + for (uint32_t i = 0; i < asm->parser->sec_tbl.count; i++) { + phdr[i].p_offset = ptr; + phdr[i].p_vaddr = ptr; + shdr[i+SHDR_SECTIONS].sh_offset = ptr; + shdr[i+SHDR_SECTIONS].sh_size = phdr[i].p_filesz; + ptr += phdr[i].p_filesz; + } + + // symtbl + shdr[SHDR_SYMTBL].sh_offset = ptr; + shdr[SHDR_SYMTBL].sh_size = symtbl_len * sizeof(Elf32_Sym); + ptr += symtbl_len * sizeof(Elf32_Sym); + + // strtbl + shdr[SHDR_STRTBL].sh_offset = ptr; + shdr[SHDR_STRTBL].sh_size = asm->str_tbl.size; + ptr += asm->str_tbl.size; + + // shdr + ehdr.e_shoff = ptr; + + FILE *out = fopen("out.o", "w"); + + // ehdr + fwrite(&ehdr, sizeof(Elf32_Ehdr), 1, out); + + // phdr + fwrite(phdr, sizeof(Elf32_Phdr), phdr_len, out); + + // sections + for (uint32_t i = 0; i < asm->parser->sec_tbl.count; i++) { + struct section *sec = &asm->parser->sec_tbl.sections[i]; + for (uint32_t j = 0; j < sec->count; j++) { + struct mips32_instruction *ins = &sec->ins[j].mips32; + fwrite(ins, sizeof(struct mips32_instruction), + 1, out); + } + } + + // sym tbl + fwrite(symtbl, sizeof(Elf32_Sym), symtbl_len, out); + + // str tbl + fwrite(asm->str_tbl.ptr, asm->str_tbl.size, 1, out); + + // shdr + fwrite(shdr, sizeof(Elf32_Shdr), shdr_len, out); + + fclose(out); + + free(shdr); + free(phdr); + free(symtbl); + + return M_SUCCESS; +} + +int assemble_file_mips32(char *path) +{ + struct lexer lexer; + struct parser parser; + current_file = path; + int res = M_SUCCESS; + + if (lexer_init(current_file, &lexer)) + return M_ERROR; + + if (mips32_parser_init(&lexer, &parser)) + return M_ERROR; + + if (res == M_SUCCESS) + res = parse_file(&parser); + + struct assembler assembler; + assembler.parser = &parser; + strtbl_init(&assembler.str_tbl); + + if (res == M_SUCCESS) + res = assemble_file(&assembler); + + strtbl_free(&assembler.str_tbl); + lexer_free(&lexer); + parser_free(&parser); + + return res; +} diff --git a/masm/lex.c b/masm/lex.c new file mode 100644 index 0000000..06c7114 --- /dev/null +++ b/masm/lex.c @@ -0,0 +1,343 @@ +#include "lex.h" + +#include <mlimits.h> +#include <merror.h> + +static struct { + int x; + int y; +} pos; + +/* get next char in lexer */ +static int lex_next(struct lexer *lexer) +{ + if (lexer->peek != EOF) { + int c = lexer->peek; + lexer->peek = EOF; + return c; + } + + int c = getc(lexer->file); + if (c == '\n') { + lexer->x = 0; + lexer->y++; + } else { + lexer->x++; + } + return c; +} + +/* peek next char in lexer */ +static int lex_peek(struct lexer *lexer) +{ + if (lexer->peek == EOF) + lexer->peek = lex_next(lexer); + return lexer->peek; +} + +/* skip all characters until EOF or newline */ +static void skip_comment(struct lexer *lexer) +{ + int c; + while (1) { + c = lex_next(lexer); + if (c == EOF || c == '\n') + break; + } +} + +/* lexes text until whitespace + * returns error on zero length or too long */ +static int lex_ident(struct lexer *lexer, char text[MAX_LEX_LENGTH]) +{ + int len = 0; + char *ptr = text; + int c; + + while (1) { + c = lex_peek(lexer); + if (!( + (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + (c == '_') + )) { + break; + } + + // pop char out of lexer + lex_next(lexer); + + if (len + 1 == MAX_LEX_LENGTH) { + ERROR_POS(pos, "ident has max length of %d", + MAX_LEX_LENGTH); + return M_ERROR; + } + + *ptr++ = c; + len++; + } + + if (len == 0) { + ERROR_POS(pos, "attempted to lex empty ident %d", + MAX_LEX_LENGTH); + return M_ERROR; + } + + *ptr = '\0'; + return M_SUCCESS; +} + +/* lexes a string until closing quote + * returns error if string is too long or hit newline */ +static int lex_string(struct lexer *lexer,char text[MAX_LEX_LENGTH]) +{ + int len = 0; + char *ptr = text; + int c; + + while (1) { + c = lex_next(lexer); + if (c == '"') + break; + + // match escape character + if (c == '\\') { + switch (lex_peek(lexer)) { + case 'n': + c = '\n'; + lex_next(lexer); + break; + case 't': + c = '\t'; + lex_next(lexer); + break; + case '\\': + c = '\\'; + lex_next(lexer); + break; + case '"': + c = '"'; + lex_next(lexer); + break; + } + } + + // strings cannot span multiple lines + if (c == '\n') { + ERROR_POS(pos, "reached newline before end of string"); + return M_ERROR; + } + + if (len + 1 == MAX_LEX_LENGTH) { + ERROR_POS(pos, "string has max length of %d", + MAX_LEX_LENGTH); + return M_ERROR; + } + + *ptr++ = c; + len++; + } + + *ptr = '\0'; + return M_SUCCESS; +} + +/* lexes a integer number in base 2,8,10, or 16, + * uses base 10 by default but chan be changed by 0b, 0o, and 0x */ +static int lex_number(struct lexer *lexer, int64_t *n) +{ + int64_t number = 0; + int base = 10; + + // skip all leading zeros, they dont do anything. + // this also allows us to directly check for 0b, 0o, and 0x + // right away! + while (1) { + if (lex_peek(lexer) == '0') + lex_next(lexer); + else + break; + } + + // match change of base + switch (lex_peek(lexer)) { + case 'b': + base = 2; + lex_next(lexer); + break; + case 'o': + base = 8; + lex_next(lexer); + break; + case 'x': + base = 16; + lex_next(lexer); + break; + } + + while (1) { + char c = lex_peek(lexer); + int n = 0; + if (c >= '0' && c <= '9') { + n = c - '0'; + } else if (c >= 'a' && c <= 'z') { // match A-Z so we can + n = c - 'a' + 10; // catch the errors + } else if (c >= 'A' && c <= 'Z') { // here instead of later + n = c - 'A' + 10; + } else { + break; // no longer a number + } + // if number provided is bigger than my base, + // error ! + if (n >= base) { + ERROR_POS(pos, "character '%c' is bigger than number base" + "'%d'", c, base); + return M_ERROR; + } + lex_next(lexer); + number *= base; + number += n; + } + + *n = number; + return M_SUCCESS; +} + +/* lex the next token on the file */ +int lexer_next(struct lexer *lexer, struct token *token) +{ +again: // use label to avoid whitespace recursion + token->x = lexer->x; + token->y = lexer->y; + pos.x = lexer->x; + pos.y = lexer->y; + token->type = TOK_EOF; + + int c = lex_peek(lexer); + int res = M_SUCCESS; + + switch (c) { + + case EOF: + case '\0': + token->type = TOK_EOF; + break; + case ';': + case '#': + skip_comment(lexer); + goto again; + case ' ': + case '\t': + // skip white space + lex_next(lexer); + goto again; + case '\n': + lex_next(lexer); + token->type = TOK_NL; + break; + case ',': + lex_next(lexer); + token->type = TOK_COMMA; + break; + case '=': + lex_next(lexer); + token->type = TOK_EQUAL; + break; + case '(': + lex_next(lexer); + token->type = TOK_LPAREN; + break; + case ')': + token->type = TOK_RPAREN; + lex_next(lexer); + break; + case '$': + token->type = TOK_REG; + lex_next(lexer); + res = lex_ident(lexer, token->text); + break; + case '.': + token->type = TOK_DIRECTIVE; + lex_next(lexer); + res = lex_ident(lexer, token->text); + break; + case '"': + token->type = TOK_STRING; + lex_next(lexer); + res = lex_string(lexer, token->text); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + token->type = TOK_NUMBER; + res = lex_number(lexer, &token->number); + break; + default: + token->type = TOK_IDENT; + res = lex_ident(lexer, token->text); + if (lex_peek(lexer) == ':') { + lex_next(lexer); + token->type = TOK_LABEL; + } + break; + } + return res; +} + +int lexer_init(const char *path, struct lexer *lexer) +{ + FILE *file = fopen(path, "r"); + if (file == NULL) { + ERROR_POS(pos, "cannot file '%s'", path); + return M_ERROR; + } + lexer->file = file; + lexer->peek = EOF; + lexer->x = 0; + lexer->y = 0; + return M_SUCCESS; +} + +int lexer_free(struct lexer *lexer) +{ + return fclose(lexer->file); +} + +char *token_str(enum token_type type) +{ + switch (type) { + case TOK_IDENT: + return "ident"; + case TOK_REG: + return "register"; + case TOK_LABEL: + return "label"; + case TOK_STRING: + return "string"; + case TOK_COMMA: + return "comma"; + case TOK_EQUAL: + return "equal"; + case TOK_LPAREN: + return "left parentheses"; + case TOK_RPAREN: + return "right parentheses"; + case TOK_NUMBER: + return "number"; + case TOK_EOF: + return "end of file"; + case TOK_NL: + return "new line"; + case TOK_DIRECTIVE: + return "directive"; + } + return "unknown"; +} diff --git a/masm/lex.h b/masm/lex.h new file mode 100644 index 0000000..f1c482a --- /dev/null +++ b/masm/lex.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2024 Freya Murphy */ + +#ifndef __LEX_H__ +#define __LEX_H__ + +#include <mlimits.h> +#include <stdio.h> +#include <stdint.h> + +struct lexer { + FILE *file; + int peek; + int x; + int y; +}; + +enum token_type { + TOK_IDENT, + TOK_REG, + TOK_LABEL, + TOK_STRING, + TOK_COMMA, + TOK_EQUAL, + TOK_LPAREN, + TOK_RPAREN, + TOK_NUMBER, + TOK_EOF, + TOK_NL, + TOK_DIRECTIVE, +}; + +struct token { + enum token_type type; + union { + int64_t number; + char text[MAX_LEX_LENGTH]; + }; + int x; + int y; +}; + +/* initalize a lexer */ +int lexer_init(const char *file, struct lexer *lexer); + +/* free the lxer */ +int lexer_free(struct lexer *lexer); + +/* lexes the next token, returns M_ERROR on error, + * and TOK_EOF on EOF */ +int lexer_next(struct lexer *lexer, struct token *token); + +/* token type to string */ +char *token_str(enum token_type); + +#endif /* __LEX_H__ */ diff --git a/masm/main.c b/masm/main.c new file mode 100644 index 0000000..957b34c --- /dev/null +++ b/masm/main.c @@ -0,0 +1,9 @@ +#include "asm.h" + +int main(int argc, char **argv) { + + if (argc != 2) + return 0; + + return assemble_file_mips32(argv[1]); +} diff --git a/masm/parse.c b/masm/parse.c new file mode 100644 index 0000000..9876311 --- /dev/null +++ b/masm/parse.c @@ -0,0 +1,198 @@ +#include <mlimits.h> +#include <merror.h> +#include <stdio.h> +#include <string.h> + +#include "parse.h" +#include "lex.h" + +int next_token(struct parser *parser, struct token *tok) +{ + if (parser->peek.type != TOK_EOF) { + if (tok != NULL) + *tok = parser->peek; + parser->peek.type = TOK_EOF; + return M_SUCCESS; + } + struct token token; + if (lexer_next(parser->lexer, &token)) + return M_ERROR; + if (tok != NULL) + *tok = token; + return M_SUCCESS; +} + + +int peek_token(struct parser *parser, struct token *tok) +{ + if (parser->peek.type == TOK_EOF) { + if (next_token(parser, &parser->peek)) + return M_ERROR; + } + if (tok != NULL) + *tok = parser->peek; + return M_SUCCESS; +} + + +int assert_token(struct parser *parser, enum token_type type, + struct token *tok) +{ + struct token token; + if (next_token(parser, &token)) + return M_ERROR; + if (token.type != type) { + ERROR_POS(token, "expected a token of type '%s', got '%s'", + token_str(type), token_str(token.type)); + return M_ERROR; + } + if (tok != NULL) + *tok = token; + return M_SUCCESS; +} + +int assert_eol(struct parser *parser) +{ + struct token token; + if (next_token(parser, &token)) + return M_ERROR; + if (token.type != TOK_NL && token.type != TOK_EOF) { + ERROR_POS(token, "expected a new line or end of file"); + return M_ERROR; + } + return M_SUCCESS; +} + +static int parse_constant(struct parser *parser, struct const_expr *expr, + struct token ident) +{ + struct token number; + + if (assert_token(parser, TOK_EQUAL, NULL)) + return M_ERROR; + + if (assert_token(parser, TOK_NUMBER, &number)) + return M_ERROR; + + strcpy(expr->name,ident.text); + expr->value = number.number; + + return M_SUCCESS; +} + +static int parser_handle_ident(struct parser *parser, struct expr *expr) +{ + struct token ident; + struct token peek; + + if (assert_token(parser, TOK_IDENT, &ident)) + return M_ERROR; + + if (peek_token(parser, &peek)) + return M_ERROR; + + if (peek.type == TOK_EQUAL) { + expr->type = EXPR_CONSTANT; + return parse_constant(parser, &expr->constant, ident); + } else { + expr->type = EXPR_INS; + return parser->parse_instruction(parser, &expr->ins, ident); + } +} + + +static int parse_label(struct parser *parser, + struct expr *expr) +{ + struct token token; + struct symbol symbol; + uint32_t index; + + if (assert_token(parser, TOK_LABEL, &token)) + return M_ERROR; + strcpy(expr->text, token.text); + + if (symtbl_find(&parser->sym_tbl, NULL, token.text) == M_SUCCESS) { + ERROR_POS(token, "redefined symbol '%s'", token.text); + return M_ERROR; + } + + index = parser->sec_tbl.current->start + + parser->sec_tbl.current->count; + symbol = (struct symbol) { + .name = "", + .position = index, + .flag = SYM_LOCAL, + }; + strcpy(symbol.name, token.text); + + if (symtbl_push(&parser->sym_tbl, symbol)) + return M_ERROR; + + return M_SUCCESS; +} + + +int parser_next(struct parser *parser, struct expr *expr) +{ + struct token token; + int res = M_SUCCESS; + +again: + if (peek_token(parser, &token)) + return M_ERROR; + + switch (token.type) { + case TOK_NL: + next_token(parser, NULL); + goto again; + + case TOK_EOF: + res = M_EOF; + break; + + case TOK_LABEL: + expr->type = EXPR_LABEL; + res = parse_label(parser, expr); + break; + + case TOK_DIRECTIVE: + expr->type = EXPR_DIRECTIVE; + res = parser->parse_directive(parser, + &expr->directive); + break; + + case TOK_IDENT: + res = parser_handle_ident(parser, expr); + break; + + default: + ERROR_POS(token, "unexpected token '%s'", + token_str(token.type)); + return M_ERROR; + + } + + return res; +} + +int parser_init(struct lexer *lexer, struct parser *parser) +{ + parser->lexer = lexer; + parser->peek.type = TOK_EOF; + if (symtbl_init(&parser->sym_tbl)) + return M_ERROR; + if (sectbl_init(&parser->sec_tbl)) + return M_ERROR; + if (reftbl_init(&parser->ref_tbl)) + return M_ERROR; + return M_SUCCESS; +} + + +void parser_free(struct parser *parser) +{ + symtbl_free(&parser->sym_tbl); + sectbl_free(&parser->sec_tbl); + reftbl_free(&parser->ref_tbl); +} diff --git a/masm/parse.h b/masm/parse.h new file mode 100644 index 0000000..2aea0be --- /dev/null +++ b/masm/parse.h @@ -0,0 +1,156 @@ +/* Copyright (c) 2024 Freya Murphy */ + +#ifndef __PARSE_H__ +#define __PARSE_H__ + +#include "lex.h" + +#include <mlimits.h> +#include <mips.h> +#include <stdint.h> + +struct const_expr { + char name[MAX_LEX_LENGTH]; + uint32_t value; +}; + +enum expr_type { + EXPR_INS, + EXPR_DIRECTIVE, + EXPR_CONSTANT, + EXPR_SEGMENT, + EXPR_LABEL, +}; + +struct expr { + enum expr_type type; + union { + // instruction + union mips_instruction ins; + // directive + union mips_directive directive; + // constant + struct const_expr constant; + // segment or label + char text[MAX_LEX_LENGTH]; + }; +}; + +enum symbol_flag { + SYM_LOCAL, + SYM_GLOBAL, + SYM_EXTERNAL, +}; + +struct symbol { + char name[MAX_LEX_LENGTH]; + uint32_t position; + enum symbol_flag flag; + +}; + +struct symbol_table { + uint32_t count; + uint32_t len; + struct symbol *symbols; +}; + +int symtbl_init(struct symbol_table *sym_tbl); +void symtbl_free(struct symbol_table *sym_tbl); + +int symtbl_push(struct symbol_table *sym_tbl, struct symbol sym); +int symtbl_find(struct symbol_table *sym_tbl, struct symbol **sym, + const char name[MAX_LEX_LENGTH]); + +struct section { + uint32_t count; + uint32_t len; + uint32_t start; + uint32_t alignment; + union mips_instruction *ins; + char name[MAX_LEX_LENGTH]; +}; + +struct section_table { + uint32_t count; + uint32_t len; + struct section *sections; + struct section *current; + char name[MAX_LEX_LENGTH]; + uint32_t total_ins; +}; + +int sectbl_init(struct section_table *sec_tbl); +void sectbl_free(struct section_table *sec_tbl); + +int sectbl_alloc(struct section_table *sec_tbl, struct section **sec, + const char name[MAX_LEX_LENGTH]); +int sectbl_push(struct section_table *sec_tbl, struct section *section, + union mips_instruction ins); +int sectbl_get(struct section_table *sec_tbl, struct section **sec, + const char name[MAX_LEX_LENGTH]); + +enum reference_type { + REF_OFFESET, + REF_TARGET, +}; + +struct reference { + enum reference_type type; + struct section *section; + uint32_t index; + char name[MAX_LEX_LENGTH]; +}; + +struct reference_table { + uint32_t count; + uint32_t len; + struct reference *references; +}; + +int reftbl_init(struct reference_table *ref_tbl); +void reftbl_free(struct reference_table *ref_tbl); +int reftbl_push(struct reference_table *ref_tbl, struct reference reference); + +struct parser { + struct lexer *lexer; + struct token peek; + + // sections + struct section_table sec_tbl; + + // symbols + struct symbol_table sym_tbl; + + // references + struct reference_table ref_tbl; + + int (*parse_instruction)(struct parser *, union mips_instruction *, + struct token); + int (*parse_directive)(struct parser *, union mips_directive *); + int (*is_instruction)(const char *ident); +}; + +/* get the next token in the parser */ +int next_token(struct parser *parser, struct token *tok); + +/* peek the next token in the parser */ +int peek_token(struct parser *parser, struct token *tok); + +/* assert the next token is a specific type */ +int assert_token(struct parser *parser, enum token_type type, + struct token *tok); + +/* assert the next token is EOF or NL */ +int assert_eol(struct parser *parser); + +/* get the next expression in the parser */ +int parser_next(struct parser *parser, struct expr *expr); + +/* initalize the base parser */ +int parser_init(struct lexer *lexer, struct parser *parser); + +/* free the base parser */ +void parser_free(struct parser *parser); + +#endif /* __PARSE_H__ */ diff --git a/masm/parse_mips32.c b/masm/parse_mips32.c new file mode 100644 index 0000000..bd07ce0 --- /dev/null +++ b/masm/parse_mips32.c @@ -0,0 +1,847 @@ +#include <mips.h> +#include <mips32.h> +#include <merror.h> +#include <stdint.h> +#include <string.h> +#include <strings.h> + +#include "parse_mips32.h" +#include "parse.h" +#include "mlimits.h" +#include "parse.h" +#include "lex.h" + +/* each instruction has a given parse format + * internal to the parser */ +enum mips32_parse_format { + // register type: rs, rt, td + MIPS32_PARSE_R, + // register type: rs, rt + MIPS32_PARSE_R2, + // register type: rd + MIPS32_PARSE_RD, + // register type: rs + MIPS32_PARSE_RS, + // imeediate type: rs, rt, immd + MIPS32_PARSE_I, + // jump type: offset + MIPS32_PARSE_J, + // jump type: register + MIPS32_PARSE_JR, + // offset 16b type: offset + MIPS32_PARSE_O16, + // offset 26b type: offset + MIPS32_PARSE_O26, + // breanch equal type: rs, rt, offset + MIPS32_PARSE_BE, + // branch zero type: rs, offset + MIPS32_PARSE_BZ, + // store and load: rt, offset(base) + MIPS32_PARSE_SL, + // store and load immediate: rt, immediate + MIPS32_PARSE_SLI, + // shift: rd, rt, sa + MIPS32_PARSE_S, + // shift variable: rd, rt, rs + MIPS32_PARSE_SV, +}; + +#define FORMAT(ins, format) \ + [MIPS32_INS_##ins] = MIPS32_PARSE_##format, \ + +const enum mips32_parse_format mips32_parse_formats[] = { + FORMAT(ADD, R) + FORMAT(ADDI, I) + FORMAT(ADDIU, I) + FORMAT(ADDU, R) + FORMAT(AND, R) + FORMAT(ANDI, I) + FORMAT(BAL, O16) + FORMAT(BALC, O26) + FORMAT(BC, O26) + FORMAT(BEQ, BE) + FORMAT(BEQL, BE) + FORMAT(BGEZ, BZ) + FORMAT(BGEZAL, BZ) + FORMAT(BGEZALL, BZ) + FORMAT(BGEZL, BZ) + FORMAT(BGTZ, BZ) + FORMAT(BGTZL, BZ) + FORMAT(BLEZ, BZ) + FORMAT(BLEZL, BZ) + FORMAT(BLTZ, BZ) + FORMAT(BLTZAL, BZ) + FORMAT(BLTZALL, BZ) + FORMAT(BLTZL, BZ) + FORMAT(BNE, BE) + FORMAT(BNEL, BE) + FORMAT(DDIV, R2) + FORMAT(DDIVU, R2) + FORMAT(DIV, R2) + FORMAT(DIVU, R2) + FORMAT(J, J) + FORMAT(JAL, J) + FORMAT(JALR, JR) // TODO: handle rd + FORMAT(JALX, J) + FORMAT(JR, JR) + FORMAT(LB, SL) + FORMAT(LBU, SL) + FORMAT(LH, SL) + FORMAT(LHU, SL) + FORMAT(LUI, SLI) + FORMAT(LW, SL) + FORMAT(LWL, SL) + FORMAT(LWR, SL) + FORMAT(MFHI, RD) + FORMAT(MFLO, RD) + FORMAT(MTHI, RS) + FORMAT(MTLO, RS) + FORMAT(MULT, R2) + FORMAT(MULTU, R2) + FORMAT(SB, SL) + FORMAT(SH, SL) + FORMAT(SW, SL) + FORMAT(SWL, SL) + FORMAT(SLL, S) + FORMAT(SLLV, SV) + FORMAT(SLT, R) + FORMAT(SLTI, I) + FORMAT(SLTIU, I) + FORMAT(SLTU, R) + FORMAT(SRA, S) + FORMAT(SRAV, SV) + FORMAT(SRL, S) + FORMAT(SRLV, SV) + FORMAT(OR, R) + FORMAT(ORI, I) + FORMAT(NOR, R) + FORMAT(SUB, R) + FORMAT(SUBU, R) + FORMAT(XOR, R) + FORMAT(XORI, I) +}; + +#undef FORMAT + +#define MAX5 32 +#define MAX16 65536 +#define MAX26 67108864 + +static int get_reference(struct parser *parser, uint32_t *offset, + enum reference_type type) +{ + struct token token; + + if (next_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_NUMBER) { + *offset = token.number; + return M_SUCCESS; + } + + if (token.type != TOK_IDENT) { + ERROR_POS(token, "unexpected token of type '%s'", + token_str(token.type)); + return M_ERROR; + } + + struct reference reference = { + .section = parser->sec_tbl.current, + .index = parser->sec_tbl.current->count, + .type = type, + }; + strcpy(reference.name, token.text); + + if (reftbl_push(&parser->ref_tbl, reference)) + return M_ERROR; + + *offset = 0; + + return M_SUCCESS; +} + +static int get_offset(struct parser *parser, uint32_t *offset) +{ + return get_reference(parser, offset, REF_OFFESET); +} + +static int get_target(struct parser *parser, uint32_t *offset) +{ + return get_reference(parser, offset, REF_TARGET); +} + +static int get_instruction(const char *ident, struct mips32_instruction *res) +{ + for (int i = 0; i < __MIPS32_INS_LEN; i++) { + struct mips32_instruction ins = + mips32_instructions[i]; + if (strcasecmp(ident, ins.name) == 0) { + if (res != NULL) + *res = ins; + return M_SUCCESS; + } + } + return M_ERROR; +} + +static int is_instruction(const char *ident) +{ + return get_instruction(ident, NULL); +} + +static int parse_register(struct parser *parser, enum mips32_register *reg) +{ + struct token token; + if (assert_token(parser, TOK_REG, &token)) + return M_ERROR; + + int len = strlen(token.text); + int c0 = len > 0 ? token.text[0] : '\0', + c1 = len > 1 ? token.text[1] : '\0', + c2 = len > 2 ? token.text[2] : '\0', + c3 = len > 3 ? token.text[3] : '\0'; + + // $zero + if (c0 == 'z') { + if (c1 == 'e' && c2 == 'r' && c3 == 'o') { + *reg = MIPS32_REG_ZERO; + return M_SUCCESS; + } + } + + // $a0-a3 $at + else if (c0 == 'a') { + if (c1 == 't') { + *reg = MIPS32_REG_AT; + return M_SUCCESS; + } + if (c1 >= '0' && c1 <= '3') { + *reg = MIPS32_REG_A0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $v0-v1 + else if (c0 == 'v') { + if (c1 >= '0' && c1 <= '1') { + *reg = MIPS32_REG_V0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $t0-t9 + else if (c0 == 't') { + if (c1 >= '0' && c1 <= '7') { + *reg = MIPS32_REG_T0; + *reg += c1 - '0'; + return M_SUCCESS; + } + // reg T8-T9 are not in order with T0-T7 + if (c1 >= '8' && c1 <= '9') { + *reg = MIPS32_REG_T8; + *reg += c1 - '8'; + return M_SUCCESS; + } + } + + // $s0-s7 $sp + else if (c0 == 's') { + if (c1 >= '0' && c1 <= '7') { + *reg = MIPS32_REG_S0; + *reg += c1 - '0'; + return M_SUCCESS; + } + if (c1 == 'p') { + *reg = MIPS32_REG_SP; + return M_SUCCESS; + } + } + + // $k0-k1 + else if (c0 == 'k') { + if (c1 >= '0' && c1 <= '1') { + *reg = MIPS32_REG_K0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $gp + else if (c0 == 'g') { + if (c1 == 'p') { + *reg = MIPS32_REG_GP; + return M_SUCCESS; + } + } + + // $fp + else if (c0 == 'f') { + if (c1 == 'p') { + *reg = MIPS32_REG_FP; + return M_SUCCESS; + } + } + + // $rp + else if (c0 == 'r') { + if (c1 == 'p') { + *reg = MIPS32_REG_RA; + return M_SUCCESS; + } + } + + // $0-31 (non aliased register names) + else if (c0 >= '0' && c0 <= '9') { + int i = c0 - '0'; + if (c1 >= '0' && c1 <= '9') { + i *= 10; + i += c1 - '0'; + } + if (i <= 31) { + *reg = i; + return M_SUCCESS; + } + } + + ERROR_POS(token, "unknown register $%s", token.text); + return M_ERROR; +} + +static int parse_instruction_r(struct parser *parser, + struct mips32_instruction *ins) +{ + // format: rs, rt, rd + enum mips32_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + return M_SUCCESS; +} + +static int parse_instruction_r2(struct parser *parser, + struct mips32_instruction *ins) +{ + // format: rs, rt + enum mips32_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + return M_SUCCESS; +} + +static int parse_instruction_rs(struct parser *parser, + struct mips32_instruction *ins) +{ + // format: rs + enum mips32_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + return M_SUCCESS; +} + +static int parse_instruction_rd(struct parser *parser, + struct mips32_instruction *ins) +{ + // format: rd + enum mips32_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + return M_SUCCESS; +} + +static int parse_instruction_i(struct parser *parser, + struct mips32_instruction *ins) +{ + // format: rs, rt, immd + enum mips32_register reg; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (token.number >= MAX16) + return M_ERROR; + ins->I_data.immd = token.number; + + return M_SUCCESS; +} + +static int parse_instruction_offset(struct parser *parser, + uint32_t max, + struct mips32_instruction *ins) +{ + uint32_t n; + if (get_offset(parser, &n) || n > max) + return M_ERROR; + + switch (max) { + case MAX26: + ins->J_data.target = n; + break; + case MAX16: + ins->B_data.offset = n; + break; + } + + return M_SUCCESS; +} + +static int parse_instruction_j(struct parser *parser, + struct mips32_instruction *ins) +{ + uint32_t n; + if (get_target(parser, &n) || n > MAX26) + return M_ERROR; + ins->J_data.target = n; + + return M_SUCCESS; +} + +static int parse_instruction_jr(struct parser *parser, + struct mips32_instruction *ins) +{ + uint32_t n; + if (get_target(parser, &n) || n > MAX26) + return M_ERROR; + ins->J_data.target = n; + + return M_SUCCESS; +} + +static int parse_instruction_branch_equal(struct parser *parser, + struct mips32_instruction *ins) +{ + enum mips32_register reg; + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + return M_SUCCESS; +} + +static int parse_instruction_branch(struct parser *parser, + struct mips32_instruction *ins) +{ + enum mips32_register reg; + uint32_t n; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->B_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (get_offset(parser, &n) || n > MAX16) + return M_ERROR; + ins->B_data.offset = n; + + return M_SUCCESS; +} + +static int parse_instruction_sl(struct parser *parser, + struct mips32_instruction *ins) +{ + enum mips32_register reg; + uint32_t offset = 0; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type != TOK_LPAREN) + if (get_offset(parser, &offset)) + return M_ERROR; + ins->I_data.immd = offset; + + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_NL) { + ins->I_data.rs = MIPS32_REG_ZERO; + return M_SUCCESS; + } + + if (assert_token(parser, TOK_LPAREN, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rs = reg; + + if (assert_token(parser, TOK_RPAREN, NULL)) + return M_ERROR; + + return M_SUCCESS; +} + +static int parse_instruction_sli(struct parser *parser, + struct mips32_instruction *ins) +{ + enum mips32_register reg; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX16) + return M_ERROR; + ins->I_data.immd = token.number; + + return M_SUCCESS; +} + +static int parse_instruction_s(struct parser *parser, + struct mips32_instruction *ins) +{ + enum mips32_register reg; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX5) + return M_ERROR; + ins->R_data.shamt = token.number; + + return M_SUCCESS; +} + +static int parse_instruction_sv(struct parser *parser, + struct mips32_instruction *ins) +{ + enum mips32_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + return M_SUCCESS; +} + +static int parse_instruction(struct parser *parser, + union mips_instruction *ins, + struct token ident) +{ + struct mips32_instruction instruction; + enum mips32_parse_format format; + int res = M_SUCCESS; + + if (get_instruction(ident.text, &instruction)) { + ERROR_POS(ident, "unknown instruction '%s'", ident.text); + return M_ERROR; + } + + ins->mips32 = instruction; + format = mips32_parse_formats[instruction.type]; + + switch (format) { + case MIPS32_PARSE_R: + res = parse_instruction_r(parser, &ins->mips32); + break; + case MIPS32_PARSE_R2: + res = parse_instruction_r2(parser, &ins->mips32); + break; + case MIPS32_PARSE_RS: + res = parse_instruction_rs(parser, &ins->mips32); + break; + case MIPS32_PARSE_RD: + res = parse_instruction_rd(parser, &ins->mips32); + break; + case MIPS32_PARSE_I: + res = parse_instruction_i(parser, &ins->mips32); + break; + case MIPS32_PARSE_J: + res = parse_instruction_j(parser, &ins->mips32); + break; + case MIPS32_PARSE_JR: + res = parse_instruction_jr(parser, &ins->mips32); + break; + case MIPS32_PARSE_O16: + res = parse_instruction_offset(parser, MAX16, &ins->mips32); + break; + case MIPS32_PARSE_O26: + res = parse_instruction_offset(parser, MAX26, &ins->mips32); + break; + case MIPS32_PARSE_BE: + res = parse_instruction_branch_equal(parser, &ins->mips32); + break; + case MIPS32_PARSE_BZ: + res = parse_instruction_branch(parser, &ins->mips32); + break; + case MIPS32_PARSE_SL: + res = parse_instruction_sl(parser, &ins->mips32); + break; + case MIPS32_PARSE_SLI: + res = parse_instruction_sli(parser, &ins->mips32); + break; + case MIPS32_PARSE_S: + res = parse_instruction_s(parser, &ins->mips32); + break; + case MIPS32_PARSE_SV: + res = parse_instruction_sv(parser, &ins->mips32); + break; + } + + if (res == M_SUCCESS && assert_eol(parser)) + return M_ERROR; + + return res; +} + + +static int parse_directive_align(struct parser *parser, + struct mips32_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (token.number < 0) { + ERROR_POS(token, "cannot align negative"); + return M_ERROR; + } + + if (token.number > MAX16) { + ERROR_POS(token, "cannot align more than 65kb"); + return M_ERROR; + } + + directive->type = MIPS32_DIRECTIVE_ALIGN; + directive->align = token.number; + + return M_SUCCESS; +} + +static int parse_directive_space(struct parser *parser, + struct mips32_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (token.number < 0) { + ERROR_POS(token, "cannot reserve negative"); + return M_ERROR; + } + + if (token.number > MAX16) { + ERROR_POS(token, "cannot reserve more than 65kb"); + return M_ERROR; + } + + directive->type = MIPS32_DIRECTIVE_SPACE; + directive->space = token.number; + + return M_SUCCESS; +} + +static int parse_directive_whb(struct parser *parser, + struct mips32_directive *directive, + enum mips32_directive_type type) +{ + struct token token; + uint32_t size = 0; + uint32_t len = 0; + + switch (type) { + case MIPS32_DIRECTIVE_WORD: + size = UINT32_MAX; + break; + case MIPS32_DIRECTIVE_HALF: + size = UINT16_MAX; + break; + case MIPS32_DIRECTIVE_BYTE: + size = UINT8_MAX; + break; + default: + } + + directive->type = type; + + while (1) { + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (len >= MAX_ARG_LENGTH) { + ERROR_POS(token, "directives cannot be longer than " + "%d arguments", MAX_ARG_LENGTH); + return M_ERROR; + } + + if (token.number > size) { + ERROR_POS(token, "number cannot execede max size of: " + "%d", size); + return M_ERROR; + } + + switch (type) { + case MIPS32_DIRECTIVE_WORD: + directive->words[len++] = token.number; + break; + case MIPS32_DIRECTIVE_HALF: + directive->halfs[len++] = token.number; + break; + case MIPS32_DIRECTIVE_BYTE: + directive->bytes[len++] = token.number; + break; + default: + } + + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_COMMA) { + next_token(parser, NULL); + continue; + } + + break; + } + + return M_SUCCESS; +} + +static int parse_section(struct parser *parser, + struct mips32_directive *directive, + char name[MAX_LEX_LENGTH]) +{ + directive->type = MIPS32_DIRECTIVE_SECTION; + strcpy(directive->name, name); + + struct section *sec; + if (sectbl_get(&parser->sec_tbl, &sec, name) == M_SUCCESS) { + parser->sec_tbl.current = sec; + return M_SUCCESS; + } + + if (sectbl_alloc(&parser->sec_tbl, &sec, name)) + return M_ERROR; + + parser->sec_tbl.current = sec; + return M_SUCCESS; +} + +static int parse_directive(struct parser *parser, + union mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_DIRECTIVE, &token)) + return M_ERROR; + + // .align n + if (strcmp(token.text, "align") == 0) + return parse_directive_align(parser, &directive->mips32); + else if (strcmp(token.text, "space") == 0) + return parse_directive_space(parser, &directive->mips32); + else if (strcmp(token.text, "word") == 0) + return parse_directive_whb(parser, &directive->mips32, + MIPS32_DIRECTIVE_WORD); + else if (strcmp(token.text, "half") == 0) + return parse_directive_whb(parser, &directive->mips32, + MIPS32_DIRECTIVE_HALF); + else if (strcmp(token.text, "byte") == 0) + return parse_directive_whb(parser, &directive->mips32, + MIPS32_DIRECTIVE_BYTE); + else + return parse_section(parser, &directive->mips32, token.text); +} + +int mips32_parser_init(struct lexer *lexer, struct parser *parser) +{ + if (parser_init(lexer, parser)) + return M_ERROR; + parser->parse_instruction = parse_instruction; + parser->is_instruction = is_instruction; + parser->parse_directive = parse_directive; + return M_SUCCESS; +} + +void mips32_parser_free(struct parser *parser) +{ + parser_free(parser); +} diff --git a/masm/parse_mips32.h b/masm/parse_mips32.h new file mode 100644 index 0000000..6de154f --- /dev/null +++ b/masm/parse_mips32.h @@ -0,0 +1,14 @@ +/* Copyright (c) 2024 Freya Murphy */ + +#ifndef __PARSE_MIPS32_H__ +#define __PARSE_MIPS32_H__ + +#include "parse.h" + +/* initzlize a mips32 parser*/ +int mips32_parser_init(struct lexer *lexer, struct parser *parser); + +/* free the mips32 parser */ +void mips32_parser_free(struct parser *parser); + +#endif /* __PARSE_MIPS32_H__ */ diff --git a/masm/reftbl.c b/masm/reftbl.c new file mode 100644 index 0000000..bdd1f07 --- /dev/null +++ b/masm/reftbl.c @@ -0,0 +1,47 @@ +#include <string.h> +#include <stdlib.h> +#include <mips.h> +#include <merror.h> +#include <mlimits.h> + +#include "parse.h" + +#define RELTBL_INIT_LEN 8 + +int reftbl_init(struct reference_table *ref_tbl) +{ + ref_tbl->len = RELTBL_INIT_LEN; + ref_tbl->count = 0; + ref_tbl->references = malloc(sizeof(struct reference) * + RELTBL_INIT_LEN); + + if (ref_tbl->references == NULL) { + ERROR("cannot alloc"); + return M_ERROR; + } + + return M_SUCCESS; +} + +void reftbl_free(struct reference_table *ref_tbl) +{ + free(ref_tbl->references); +} + +int reftbl_push(struct reference_table *ref_tbl, struct reference reference) +{ + if (ref_tbl->count >= ref_tbl->len) { + ref_tbl->len *= 2; + ref_tbl->references = realloc(ref_tbl->references, + sizeof(struct reference) * ref_tbl->len); + + if (ref_tbl->references == NULL) { + ERROR("cannot realloc"); + return M_ERROR; + } + } + + ref_tbl->references[ref_tbl->count++] = reference; + + return M_SUCCESS; +} diff --git a/masm/sectbl.c b/masm/sectbl.c new file mode 100644 index 0000000..f568a6f --- /dev/null +++ b/masm/sectbl.c @@ -0,0 +1,103 @@ +#include <string.h> +#include <stdlib.h> +#include <mips.h> +#include <merror.h> +#include <mlimits.h> + +#include "parse.h" + +#define SECTBL_INIT_LEN 8 +static const char inital_section[MAX_LEX_LENGTH] = "data"; + +int sectbl_init(struct section_table *sec_tbl) +{ + sec_tbl->len = SECTBL_INIT_LEN; + sec_tbl->count = 0; + sec_tbl->total_ins = 0; + sec_tbl->sections = malloc(sizeof(struct section) * SECTBL_INIT_LEN); + + if (sec_tbl->sections == NULL) { + ERROR("cannot alloc"); + return M_ERROR; + } + + if (sectbl_alloc(sec_tbl, &sec_tbl->current, inital_section)) + return M_ERROR; + + return M_SUCCESS; +} + +void sectbl_free(struct section_table *sec_tbl) +{ + for (uint32_t i = 0; i < sec_tbl->count; i++) { + free(sec_tbl->sections[i].ins); + } + free(sec_tbl->sections); +} + +int sectbl_alloc(struct section_table *sec_tbl, struct section **sec, + const char name[MAX_LEX_LENGTH]) +{ + if (sec_tbl->count >= sec_tbl->len) { + sec_tbl->len *= 2; + sec_tbl->sections = realloc(sec_tbl->sections, + sizeof(struct section) * sec_tbl->len); + + if (sec_tbl->sections == NULL) { + ERROR("cannot realloc"); + return M_ERROR; + } + } + + struct section *temp; + temp = &sec_tbl->sections[sec_tbl->count++]; + strcpy(temp->name,name); + temp->count = 0; + temp->len = SECTBL_INIT_LEN; + temp->start = sec_tbl->total_ins; + temp->alignment = 1; + temp->ins = malloc(sizeof(union mips_instruction) * SECTBL_INIT_LEN); + + if (temp->ins == NULL) { + ERROR("cannot alloc"); + return M_ERROR; + } + + *sec = temp; + return M_SUCCESS; +} + +int sectbl_push(struct section_table *sec_tbl, struct section *section, + union mips_instruction ins) +{ + if (section->count >= section->len) { + section->len *= 2; + section->ins = realloc(section->ins, + sizeof(union mips_instruction) * section->len); + + if (section->ins == NULL) { + ERROR("cannot realloc"); + return M_ERROR; + } + } + + section->ins[section->count++] = ins; + sec_tbl->total_ins++; + + return M_SUCCESS; +} + +int sectbl_get(struct section_table *sec_tbl, struct section **sec, + const char name[MAX_LEX_LENGTH]) +{ + for (uint32_t i = 0; i < sec_tbl->count; i++) { + struct section *temp = &sec_tbl->sections[i]; + if (strcmp(name, temp->name) == 0) { + if (sec != NULL) + *sec = temp; + return M_SUCCESS; + } + } + + return M_ERROR; +} diff --git a/masm/strtbl.c b/masm/strtbl.c new file mode 100644 index 0000000..b01bb92 --- /dev/null +++ b/masm/strtbl.c @@ -0,0 +1,49 @@ +#include <merror.h> +#include <string.h> +#include <stdlib.h> + +#include "asm.h" + +int strtbl_get_str(struct str_table *str_tbl, const char *str, size_t *res) +{ + for (size_t i = 0; i < str_tbl->size; i ++) { + if (strcmp(str_tbl->ptr + i, str) == 0) { + if (res != NULL) + *res = i; + return M_SUCCESS; + } + } + + return M_ERROR; +} + +int strtbl_write_str(struct str_table *str_tbl, const char *str, size_t *res) +{ + if (strtbl_get_str(str_tbl, str, res) == M_SUCCESS) + return M_SUCCESS; + + size_t len = strlen(str); + char *new = realloc(str_tbl->ptr, str_tbl->size + len + 1); + if (new == NULL) + return M_ERROR; + str_tbl->ptr = new; + memcpy(str_tbl->ptr + str_tbl->size, str, len + 1); + + if (res != NULL) + *res = str_tbl->size; + + str_tbl->size += len + 1; + return M_SUCCESS; +} + +void strtbl_init(struct str_table *str_tbl) +{ + str_tbl->size = 1; + str_tbl->ptr = malloc(1); + *str_tbl->ptr = '\0'; +} + +void strtbl_free(struct str_table *str_tbl) +{ + free(str_tbl->ptr); +} diff --git a/masm/symtbl.c b/masm/symtbl.c new file mode 100644 index 0000000..b75c752 --- /dev/null +++ b/masm/symtbl.c @@ -0,0 +1,57 @@ +#include <merror.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "parse.h" + +#define SYMTBL_INIT_LEN 24 + +int symtbl_init(struct symbol_table *sym_tbl) +{ + sym_tbl->len = SYMTBL_INIT_LEN; + sym_tbl->count = 0; + sym_tbl->symbols = malloc(sizeof(struct symbol) * SYMTBL_INIT_LEN); + + if (sym_tbl->symbols == NULL) { + ERROR("cannot alloc"); + return M_ERROR; + } + + return M_SUCCESS; +} + +void symtbl_free(struct symbol_table *sym_tbl) +{ + free(sym_tbl->symbols); +} + +int symtbl_push(struct symbol_table *sym_tbl, struct symbol sym) +{ + if (sym_tbl->count >= sym_tbl->len) { + sym_tbl->len *= 2; + sym_tbl->symbols = realloc(sym_tbl->symbols, + sizeof(struct symbol) * sym_tbl->len); + if (sym_tbl->symbols == NULL) { + ERROR("cannot relloc"); + return M_ERROR; + } + } + + sym_tbl->symbols[sym_tbl->count++] = sym; + return M_SUCCESS; +} + +int symtbl_find(struct symbol_table *sym_tbl, struct symbol **ptr, + const char name[MAX_LEX_LENGTH]) +{ + for (uint32_t i = 0; i < sym_tbl->count; i++) { + struct symbol *sym = &sym_tbl->symbols[i]; + if (strcmp(sym->name, name) == 0) { + if (ptr != NULL) + *ptr = sym; + return M_SUCCESS; + } + } + return M_ERROR; +} diff --git a/masm/test.asm b/masm/test.asm new file mode 100644 index 0000000..c3b61fb --- /dev/null +++ b/masm/test.asm @@ -0,0 +1,22 @@ +.text +.align 2 + +main: + add $zero,$t7,$t7 + xori $a0, $v1, 69 + addi $a0, $v1, 69 + nor $s0, $s1, $s2 + + bltzall $s7, 0x50 + + lui $t7, 0x55 + lw $t0, 18($t7) + + sll $t0, $s0, 17 +test: + mult $a0, $s6 + + mfhi $s0 + mtlo $s7 + + j test |