diff options
Diffstat (limited to '')
-rw-r--r-- | masm/asm.c | 718 | ||||
-rw-r--r-- | masm/asm.h | 238 | ||||
-rw-r--r-- | masm/asm_mips32.c | 746 | ||||
-rw-r--r-- | masm/main.c | 13 | ||||
-rw-r--r-- | masm/parse.c | 872 | ||||
-rw-r--r-- | masm/parse.h | 141 | ||||
-rw-r--r-- | masm/parse_mips32.c | 872 | ||||
-rw-r--r-- | masm/parse_mips32.h | 14 | ||||
-rw-r--r-- | masm/reftbl.c | 47 | ||||
-rw-r--r-- | masm/reltab.c | 43 | ||||
-rw-r--r-- | masm/sectab.c | 166 | ||||
-rw-r--r-- | masm/sectbl.c | 159 | ||||
-rw-r--r-- | masm/strtab.c | 54 | ||||
-rw-r--r-- | masm/strtbl.c | 54 | ||||
-rw-r--r-- | masm/symtab.c | 70 | ||||
-rw-r--r-- | masm/symtbl.c | 57 |
16 files changed, 2094 insertions, 2170 deletions
@@ -1,77 +1,717 @@ #include <merror.h> +#include <mips.h> +#include <stdio.h> #include <stdlib.h> +#include <elf.h> +#include <string.h> +#include <stddef.h> #include "asm.h" +#include "mlimits.h" +#include "parse.h" -int assembler_init(struct assembler *assembler, const char *path) +extern char *current_file; + +#define SYMSEC_STUB -1 +#define SYMSEC_EXTERN -1 + +static int create_symbol(struct assembler *assembler, + const char name[MAX_LEX_LENGTH], + ssize_t section_idx, + size_t section_offset, + unsigned char bind) { - if (lexer_init(path, &assembler->lexer)) + size_t str_off; + if (strtab_write_str(&assembler->strtab, name, &str_off)) + return M_ERROR; + + Elf32_Sym symbol = { + .st_name = str_off, + .st_value = section_offset, + .st_size = 0, + .st_info = ELF32_ST_INFO(bind, STT_NOTYPE), + .st_other = ELF32_ST_VISIBILITY(STV_DEFAULT), + .st_shndx = section_idx, + }; + + // dont put magic flag values inside symbol, only real indexes + if (section_idx < 0) + symbol.st_shndx = 0; + + if (symtab_push(&assembler->symtab, symbol, section_idx)) + return M_ERROR; + + return M_SUCCESS; +} + +static int find_symbol_or_stub(struct assembler *assembler, + const char name[MAX_LEX_LENGTH], + Elf32_Sym **res, + size_t *res2) +{ + if (symtab_find(&assembler->symtab, res, res2, name) == M_SUCCESS) + return M_SUCCESS; + + if (create_symbol(assembler, name, SYMSEC_STUB, 0, STB_LOCAL)) + return M_ERROR; + + size_t idx = assembler->symtab.len - 1; + + if (res != NULL) + *res = &assembler->symtab.symbols[idx]; + if (res2 != NULL) + *res2 = idx; + + return M_SUCCESS; +} + +static int handle_directive(struct assembler *assembler, + struct mips_directive *directive) +{ + switch (directive->type) { + case MIPS_DIRECTIVE_SECTION: { + struct section_table *sec_tbl = &assembler->sectab; + struct section *sec; + if (sectab_get(sec_tbl, &sec, directive->name) + == M_SUCCESS) { + sec_tbl->current = sec; + break; + } + + if (sectab_alloc(sec_tbl, &sec, directive->name)) + return M_ERROR; + + sec_tbl->current = sec; + break; + } + + case MIPS_DIRECTIVE_ALIGN: { + assembler->sectab.current->alignment = + 1 << directive->align; + break; + } + + case MIPS_DIRECTIVE_SPACE: { + struct section_entry entry; + entry.type = ENT_NO_DATA; + entry.size = directive->space; + if (sec_push(assembler->sectab.current, entry)) + return M_ERROR; + break; + } + + case MIPS_DIRECTIVE_WORD: { + for (uint32_t i = 0; i < directive->len; i++) { + struct section_entry entry; + entry.type = ENT_WORD; + entry.word = directive->words[i]; + if (sec_push(assembler->sectab.current, + entry)) + return M_ERROR; + } + break; + } + + case MIPS_DIRECTIVE_HALF: { + for (uint32_t i = 0; i < directive->len; i++) { + struct section_entry entry; + entry.type = ENT_HALF; + entry.half = directive->halfs[i]; + if (sec_push(assembler->sectab.current, + entry)) + return M_ERROR; + } + break; + } + + case MIPS_DIRECTIVE_BYTE: { + for (uint32_t i = 0; i < directive->len; i++) { + struct section_entry entry; + entry.type = ENT_BYTE; + entry.byte = directive->bytes[i]; + if (sec_push(assembler->sectab.current, + entry)) + return M_ERROR; + } + break; + } + + case MIPS_DIRECTIVE_EXTERN: { + if (symtab_find(&assembler->symtab, NULL, NULL, + directive->name) == M_SUCCESS) { + ERROR("cannot extern local symbol '%s'", + directive->name); + return M_ERROR; + } + + if (create_symbol(assembler, directive->name, SYMSEC_EXTERN, 0, + STB_GLOBAL)) + return M_ERROR; + + break; + } + + case MIPS_DIRECTIVE_GLOBL: { + Elf32_Sym *sym; + if (symtab_find(&assembler->symtab, &sym, NULL, + directive->name) == M_SUCCESS) { + sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_NOTYPE); + break; + } + + if (create_symbol(assembler, directive->name, SYMSEC_STUB, 0, + STB_GLOBAL)) + return M_ERROR; + + break; + } + } + + return M_SUCCESS; +} + +static int handle_label(struct assembler *assembler, + const char name[MAX_LEX_LENGTH]) +{ + struct section *cur = assembler->sectab.current; + + Elf32_Sym *ref; + size_t symidx; + + if (symtab_find(&assembler->symtab, &ref, &symidx, name) == M_SUCCESS) { + ssize_t *sec = &assembler->symtab.sections[symidx]; + + // check if the symbol is acutally jus a stub, if so + // we need to update it + if (*sec == SYMSEC_STUB) { + *sec = cur->index; + ref->st_value = sec_size(cur); + return M_SUCCESS; + } + + ERROR("redefined symbol '%s'", name); return M_ERROR; + } - if (parser_init(&assembler->lexer, &assembler->parser)) { - lexer_free(&assembler->lexer); + if (create_symbol(assembler, name, cur->index, sec_size(cur), + STB_LOCAL)) return M_ERROR; + + return M_SUCCESS; +} + +static int handle_ins(struct assembler *assembler, + struct ins_expr *expr) +{ + struct section *sec = assembler->sectab.current; + size_t secidx = sec->len; + + for (size_t i = 0; i < expr->ins_len; i++) { + struct mips_instruction *ins = + &expr->ins[i]; + struct reference *ref = + &expr->ref[i]; + struct section_entry entry; + + entry.type = ENT_INS; + entry.size = sizeof(struct mips_instruction); + entry.ins = *ins; + + if (sec_push(sec, entry)) + return M_ERROR; + + unsigned char type = 0; + switch (ref->type) { + case REF_NONE: + continue; + case REF_OFFESET: + type = R_MIPS_PC16; + break; + case REF_TARGET: + type = R_MIPS_26; + break; + } + + size_t symidx; + if (find_symbol_or_stub(assembler, ref->name, NULL, &symidx)) + return M_ERROR; + + Elf32_Rela rel = { + .r_info = ELF32_R_INFO(symidx, type), + .r_addend = ref->addend, + .r_offset = sec_index(sec, secidx + i), + }; + + if (reltab_push(&sec->reltab, rel)) + return M_ERROR; + + break; + } + + return M_SUCCESS; +} + +static int parse_file(struct assembler *assembler) +{ + struct parser *parser = &assembler->parser; + + while (1) { + struct expr expr; + int res = parser_next(parser, &expr); + + if (res == M_ERROR) + return M_ERROR; + + if (res == M_EOF) + return M_SUCCESS; + + switch (expr.type) { + case EXPR_INS: + if (handle_ins(assembler, &expr.ins)) + return M_ERROR; + break; + case EXPR_DIRECTIVE: + if (handle_directive(assembler, + &expr.directive)) + return M_ERROR; + break; + + case EXPR_LABEL: + if (handle_label(assembler, expr.label)) + return M_ERROR; + break; + + case EXPR_CONSTANT: + break; + } + } + + return M_SUCCESS; +} + +static int assemble_phdr(struct assembler *assembler, Elf32_Phdr **res, + uint32_t *res2) +{ + Elf32_Phdr *phdr = malloc(sizeof(Elf32_Phdr) * + assembler->sectab.len); + if (phdr == NULL) { + ERROR("cannot alloc"); + return M_ERROR;; + } + + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + Elf32_Phdr *hdr = &phdr[i]; + struct section *sec = &assembler->sectab.sections[i]; + size_t size = sec_size(sec); + hdr->p_type = PT_LOAD; + hdr->p_flags = (sec->execute << 0) | + (sec->write << 1) | + (sec->read << 2); + hdr->p_offset = 0; + hdr->p_vaddr = 0; + hdr->p_paddr = 0; + hdr->p_filesz = size; + hdr->p_memsz = size; + hdr->p_align = sec->alignment; + } + + *res = phdr; + *res2 = assembler->sectab.len; + return M_SUCCESS; +} + +static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res, + uint32_t *res2) +{ + uint32_t max_entries = 4; // symtab, strtab, shstrtab + max_entries += assembler->sectab.len; // sections + max_entries += assembler->sectab.len; // reltabs per section + + Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * max_entries); + + size_t str_off; + uint32_t count = 0; + + // eeltables + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + struct section *sec = &assembler->sectab.sections[i]; + const char *prefix = ".reltab."; + char reltab_name[MAX_LEX_LENGTH + 8]; + + if (sec->reltab.len == 0) + continue; + + strcpy(reltab_name, prefix); + strcat(reltab_name, sec->name); + + if (strtab_write_str(&assembler->shstrtab, + reltab_name, &str_off)) { + free(shdr); + return M_ERROR; + } + + sec->reltab_shidx = count; + shdr[count++] = (Elf32_Shdr) { + .sh_name = str_off, + .sh_type = SHT_RELA, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = sizeof(Elf32_Rela), + }; } - if (strtbl_init(&assembler->shstr_tbl)) { - parser_free(&assembler->parser); - lexer_free(&assembler->lexer); + // for each section + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + struct section *sec = &assembler->sectab.sections[i]; + char name[MAX_LEX_LENGTH+1] = "."; + + strcat(name, sec->name); + if (strtab_write_str(&assembler->shstrtab, name, &str_off)) { + free(shdr); + return M_ERROR; + } + + sec->shdr_idx = count; + if (sec->reltab.len != 0) + shdr[sec->reltab_shidx].sh_info = count; + + shdr[count++] = (Elf32_Shdr){ + .sh_name = str_off, + .sh_type = SHT_PROGBITS, + .sh_flags = (sec->write << 0) | (sec->execute << 2) | + SHF_ALLOC, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = sec->alignment, + .sh_entsize = sizeof(struct mips_instruction), + }; + } + + // symbol table + if (strtab_write_str(&assembler->shstrtab, ".symtab", &str_off)) { + free(shdr); return M_ERROR; } - if (strtbl_init(&assembler->str_tbl)) { - strtbl_free(&assembler->shstr_tbl); - parser_free(&assembler->parser); - lexer_free(&assembler->lexer); + assembler->symtab_shidx = count; + shdr[count++] = (Elf32_Shdr) { + .sh_name = str_off, + .sh_type = SHT_SYMTAB, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 1, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = sizeof(Elf32_Sym), + }; + + // string table + if (strtab_write_str(&assembler->shstrtab, ".strtab", &str_off)) { + free(shdr); return M_ERROR; } - if (symtbl_init(&assembler->sym_tbl)) { - strtbl_free(&assembler->str_tbl); - strtbl_free(&assembler->shstr_tbl); - parser_free(&assembler->parser); - lexer_free(&assembler->lexer); + assembler->strtab_shidx = count; + shdr[count++] = (Elf32_Shdr) { + .sh_name = str_off, + .sh_type = SHT_STRTAB, + .sh_flags = SHF_STRINGS, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = 0, + }; + + // sh string table + if (strtab_write_str(&assembler->shstrtab, ".shstrtab", &str_off)) { + free(shdr); return M_ERROR; } - assembler->meta = NULL; - assembler->phdr = NULL; - assembler->shdr = NULL; - assembler->symtab = NULL; + assembler->shstrtab_shidx = count; + shdr[count++] = (Elf32_Shdr) { + .sh_name = str_off, + .sh_type = SHT_STRTAB, + .sh_flags = SHF_STRINGS, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = 0, + }; + + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + struct section *sec = &assembler->sectab.sections[i]; + if (sec->reltab.len == 0) + continue; + shdr[sec->reltab_shidx].sh_link = + assembler->symtab_shidx; + } + + *res = shdr; + *res2 = count; return M_SUCCESS; } -void assembler_free(struct assembler *assembler) +static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr) +{ + Elf32_Shdr *shdr = (Elf32_Shdr *) assembler->shdr; + Elf32_Phdr *phdr = (Elf32_Phdr *) assembler->phdr; + uint32_t ptr = 0; + + // we must now correct offets and sizes inside the ehdr, phdr, + // and shdr + ptr += sizeof(Elf32_Ehdr); + + // phdr + ehdr->e_phoff = ptr; + ptr += assembler->phdr_len * sizeof(Elf32_Phdr); + + // reltbls + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + struct section *sec = &assembler->sectab.sections[i]; + if (sec->reltab.len == 0) + continue; + int idx = sec->reltab_shidx; + int len = sec->reltab.len; + shdr[idx].sh_offset = ptr; + shdr[idx].sh_size = len * sizeof(Elf32_Rela); + ptr += len * sizeof(Elf32_Rela); + } + + // sections + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + struct section *sec = &assembler->sectab.sections[i]; + int idx = sec->shdr_idx; + phdr[i].p_offset = ptr; + phdr[i].p_vaddr = ptr; + phdr[i].p_paddr = ptr; + shdr[idx].sh_offset = ptr; + shdr[idx].sh_size = phdr[i].p_filesz; + shdr[idx].sh_addr = phdr[i].p_vaddr; + shdr[idx].sh_addralign = phdr[i].p_align; + ptr += phdr[i].p_filesz; + } + + // symtab + shdr[assembler->symtab_shidx].sh_offset = ptr; + shdr[assembler->symtab_shidx].sh_link = assembler->strtab_shidx; + shdr[assembler->symtab_shidx].sh_size = + assembler->symtab.len * sizeof(Elf32_Sym); + ptr += assembler->symtab.len * sizeof(Elf32_Sym); + + // strtab + shdr[assembler->strtab_shidx].sh_offset = ptr; + shdr[assembler->strtab_shidx].sh_size = assembler->strtab.size; + ptr += assembler->strtab.size; + + // shstrtab + shdr[assembler->shstrtab_shidx].sh_offset = ptr; + shdr[assembler->shstrtab_shidx].sh_size = assembler->shstrtab.size; + ptr += assembler->shstrtab.size; + + // shdr + ehdr->e_shoff = ptr; +} + +static void update_sym_shindx(struct assembler *assembler) +{ + for (size_t i = 0; i < assembler->symtab.len; i++) + { + Elf32_Sym *sym = &assembler->symtab.symbols[i]; + ssize_t sec = assembler->symtab.sections[i]; + + if (sec >= 0) { + sym->st_shndx = assembler-> + sectab.sections[sec].shdr_idx; + } + } +} + +static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr, + const char *path) { - if (assembler->meta) { - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - if (assembler->meta[i].reltbl != NULL) { - free(assembler->meta[i].reltbl); + FILE *out = fopen(path, "w"); + + if (out == NULL) { + ERROR("cannot write '%s'", path); + return M_ERROR; + } + + // ehdr + fwrite(ehdr, sizeof(Elf32_Ehdr), 1, out); + + // phdr + fwrite(assembler->phdr, sizeof(Elf32_Phdr), assembler->phdr_len, out); + + // reltbls + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + struct section *sec = &assembler->sectab.sections[i]; + if (sec->reltab.len == 0) + continue; + void *ptr = sec->reltab.data; + int len = sec->reltab.len; + fwrite(ptr, sizeof(Elf32_Rela), len, out); + } + + // sections + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + struct section *sec = &assembler->sectab.sections[i]; + for (uint32_t j = 0; j < sec->len; j++) { + struct section_entry *entry = &sec->entries[j]; + size_t size = entry->size; + fwrite(&entry->data, size, 1, out); + while(size % sec->alignment) { + uint8_t zero = 0; + fwrite(&zero, 1, 1, out); + size++; } } - free(assembler->meta); } + // sym tbl + fwrite(assembler->symtab.symbols, sizeof(Elf32_Sym), + assembler->symtab.len, out); + + // str tbl + fwrite(assembler->strtab.ptr, assembler->strtab.size, 1, out); + + // shstr tbl + fwrite(assembler->shstrtab.ptr, assembler->shstrtab.size, 1, out); + + // shdr + fwrite(assembler->shdr, sizeof(Elf32_Shdr), assembler->shdr_len, out); + + fclose(out); + + return M_SUCCESS; +} + +static int assemble_elf(struct assembler *assembler, const char *out) +{ + if (assemble_phdr(assembler, (Elf32_Phdr **) &assembler->phdr, + &assembler->phdr_len)) { + return M_ERROR; + } + + if (assemble_shdr(assembler, (Elf32_Shdr **) &assembler->shdr, + &assembler->shdr_len)) { + return M_ERROR; + }; + + Elf32_Ehdr ehdr = { + .e_ident = { + [EI_MAG0] = ELFMAG0, + [EI_MAG1] = ELFMAG1, + [EI_MAG2] = ELFMAG2, + [EI_MAG3] = ELFMAG3, + [EI_CLASS] = ELFCLASS32, + [EI_DATA] = ELFDATA2LSB, + [EI_VERSION] = EV_CURRENT, + [EI_OSABI] = ELFOSABI_NONE, + [EI_ABIVERSION] = 0x00, + [EI_PAD] = 0x00, + }, + .e_type = ET_REL, + .e_machine = EM_MIPS, + .e_version = EV_CURRENT, + .e_entry = 0x00, + .e_phoff = 0x00, + .e_shoff = 0x00, + .e_flags = EF_MIPS_ARCH_32R6, + .e_ehsize = sizeof(Elf32_Ehdr), + .e_phentsize = sizeof(Elf32_Phdr), + .e_phnum = assembler->phdr_len, + .e_shentsize = sizeof(Elf32_Shdr), + .e_shnum = assembler->shdr_len, + .e_shstrndx = assembler->shstrtab_shidx, + }; + + update_offsets(assembler, &ehdr); + update_sym_shindx(assembler); + + if (write_file(assembler, &ehdr, out)) + return M_ERROR; + + return M_SUCCESS; +} + +int assemble_file(struct assembler_arguments args) +{ + struct assembler assembler; + int res = M_SUCCESS; + + current_file = args.in_file; + + if (assembler_init(&assembler, args.in_file)) + return M_ERROR; + + if (res == M_SUCCESS) + res = parse_file(&assembler); + + if (res == M_SUCCESS) + res = assemble_elf(&assembler, args.out_file); + + assembler_free(&assembler); + + return res; +} + +int assembler_init(struct assembler *assembler, const char *path) +{ + if (lexer_init(path, &assembler->lexer)) + return M_ERROR; + + if (parser_init(&assembler->lexer, &assembler->parser)) + return M_ERROR; + + if (strtab_init(&assembler->shstrtab)) + return M_ERROR; + + if (strtab_init(&assembler->strtab)) + return M_ERROR; + + if (symtab_init(&assembler->symtab)) + return M_ERROR; + + if (sectab_init(&assembler->sectab)) + return M_ERROR; + + assembler->symtab.strtab = &assembler->strtab; + assembler->phdr = NULL; + assembler->shdr = NULL; + + return M_SUCCESS; +} + +void assembler_free(struct assembler *assembler) +{ if (assembler->phdr) free(assembler->phdr); if (assembler->shdr) free(assembler->shdr); - if (assembler->symtab) - free(assembler->symtab); - symtbl_free(&assembler->sym_tbl); - strtbl_free(&assembler->str_tbl); - strtbl_free(&assembler->shstr_tbl); + sectab_free(&assembler->sectab); + symtab_free(&assembler->symtab); + strtab_free(&assembler->strtab); + strtab_free(&assembler->shstrtab); parser_free(&assembler->parser); lexer_free(&assembler->lexer); } - -int assemble_file(struct assembler_arguments args) { - switch (args.isa) { - case ISA_MIPS32: - return assemble_file_mips32(args); - } - return M_ERROR; -} @@ -4,101 +4,235 @@ #define __ASM_H__ #include <stddef.h> +#include <elf.h> +#include <mips.h> -#include "lex.h" #include "parse.h" +#include "lex.h" -enum symbol_flag { - SYM_LOCAL, - SYM_GLOBAL, - SYM_EXTERNAL, -}; +/// +/// ELF string table +/// -struct symbol { - char name[MAX_LEX_LENGTH]; - uint32_t index; - struct section *sec; - enum symbol_flag flag; +struct str_table { + // size of the ptr in bytes + size_t size; + // pointer that contains + // the strings + char *ptr; }; +/* initalize a string table */ +int strtab_init(struct str_table *strtab); + +/* free a string table */ +void strtab_free(struct str_table *strtab); + +/* get a string form the string table */ +int strtab_get_str(struct str_table *strtab, const char *str, size_t *res); + +/* get or append a string into the string table */ +int strtab_write_str(struct str_table *strtab, const char *str, size_t *res); + + +/// +/// ELF symbol table +/// + struct symbol_table { - uint32_t count; - uint32_t len; - struct symbol *symbols; + // length in size in sym ammt + size_t len; + size_t size; + + // the Elf symbols + Elf32_Sym *symbols; + + // keeps track of what section each ELF symbol is in + // *!!this is NOT the section header index in the ELF ehdr!!* + ssize_t *sections; + + // symbols reference a string table that acutally + // holds the strings + // + // *weak* ptr, we do not own this!!! + struct str_table *strtab; + }; -int symtbl_init(struct symbol_table *sym_tbl); -void symtbl_free(struct symbol_table *sym_tbl); +/* initalize a symbol table */ +int symtab_init(struct symbol_table *symtab); + +/* free the symbol table */ +void symtab_free(struct symbol_table *symtab); -int symtbl_push(struct symbol_table *sym_tbl, struct symbol sym); -int symtbl_find(struct symbol_table *sym_tbl, struct symbol **sym, +/* add a symbol to the symbol tbl */ +int symtab_push(struct symbol_table *symtab, const Elf32_Sym sym, + ssize_t sec_idx); + +/* find a symbol by name in the symbol table */ +int symtab_find(struct symbol_table *symtab, Elf32_Sym **sym, size_t *idx, const char name[MAX_LEX_LENGTH]); +/// +/// ELF relocation table +/// -struct str_table { - char *ptr; +struct relocation_table { + size_t len; size_t size; + Elf32_Rela *data; }; -/* initalize a string table */ -int strtbl_init(struct str_table *str_tbl); +/* initalize a relocation table */ +int reltab_init(struct relocation_table *reltab); -/* free a string table */ -void strtbl_free(struct str_table *str_tbl); +/* free the relocation table */ +void reltab_free(struct relocation_table *reltab); -/* get a string form the string table */ -int strtbl_get_str(struct str_table *str_tbl, const char *str, size_t *res); +/* add a entry to the relocation table */ +int reltab_push(struct relocation_table *reltab, const Elf32_Rela rel); -/* get or append a string into the string table */ -int strtbl_write_str(struct str_table *str_tbl, const char *str, size_t *res); +/// +/// section entry +/// -struct section_meta { - void *reltbl; - uint32_t reltbl_len; - uint32_t reltbl_idx; // reltbl idx in shdr - uint32_t shdr_idx; // sec idx in shdr - uint32_t v_addr; +enum section_entry_type { + ENT_INS, + ENT_WORD, + ENT_HALF, + ENT_BYTE, + ENT_NO_DATA, }; +/* holds a entry inside the section, i.e. a instruction, raw data, + * special directives */ +struct section_entry { + size_t size; + enum section_entry_type type; + + union { + // to get memory address + char data; + + // data + struct mips_instruction ins; + int32_t word; + int16_t half; + int8_t byte; + }; +}; + +/// +/// section +/// + +/* holds a section of the asm file (i.e. .text, .bss, .data) */ +struct section { + // length and size of amount of entries + size_t len; + size_t size; + struct section_entry *entries; + + // section name + char name[MAX_LEX_LENGTH]; + + // index of the section in + // all the sections + size_t index; + + // index of the sectio in + // the ELF shdr + size_t shdr_idx; + + // ELF section data + bool read; + bool write; + bool execute; + uint16_t alignment; + + // ELF tables + size_t reltab_shidx; + struct relocation_table reltab; +}; + +/* get the size of the section in bytes */ +size_t sec_size(struct section *section); + +/* get the index of a entry in bytes */ +size_t sec_index(struct section *section, size_t index); + +/* add a section entry to the section */ +int sec_push(struct section *section, struct section_entry entry); + +/* holds eachs section */ +struct section_table { + // length and size of amount of sections + size_t len; + size_t size; + struct section *sections; + + // the current section + struct section *current; +}; + +/* initalize the section table */ +int sectab_init(struct section_table *sec_tbl); + +/* free the section table */ +void sectab_free(struct section_table *sec_tbl); + +/* create a new section in the section table */ +int sectab_alloc(struct section_table *sec_tbl, struct section **sec, + const char name[MAX_LEX_LENGTH]); + +/* get a section by name from the section table */ +int sectab_get(struct section_table *sec_tbl, struct section **sec, + const char name[MAX_LEX_LENGTH]); + +/// +/// assembler +/// + struct assembler { // the token lexer struct lexer lexer; // the expression parser struct parser parser; - // shdr indexes - struct section_meta *meta; - size_t shstrtbl_idx; - size_t strtbl_idx; - size_t symtab_idx; + /// ELF tables + size_t symtab_shidx; + struct symbol_table symtab; + size_t strtab_shidx; + struct str_table strtab; + size_t shstrtab_shidx; + struct str_table shstrtab; - // symbols and strings - struct symbol_table sym_tbl; - struct str_table shstr_tbl; - struct str_table str_tbl; + /// Segments + struct section_table sectab; - // elf data - void *phdr; // void* since could be Elf32 or Elf64 - void *shdr; - void *symtab; + /// program header + Elf32_Phdr *phdr; uint32_t phdr_len; + + /// section header + Elf32_Shdr *shdr; uint32_t shdr_len; - uint32_t symtab_len; }; +/* defines arguments to the assembler */ struct assembler_arguments { char *in_file; char *out_file; - enum mips_isa isa; }; +/* initalize the assembler */ int assembler_init(struct assembler *assembler, const char *path); + +/* free the assembler */ void assembler_free(struct assembler *assembler); +/* assemble a file */ int assemble_file(struct assembler_arguments args); -/* assemble a mips32 file*/ -int assemble_file_mips32(struct assembler_arguments args); - #endif /* __ASM_H__ */ diff --git a/masm/asm_mips32.c b/masm/asm_mips32.c deleted file mode 100644 index 7716f4d..0000000 --- a/masm/asm_mips32.c +++ /dev/null @@ -1,746 +0,0 @@ -#include <merror.h> -#include <mips.h> -#include <mips32.h> -#include <stdio.h> -#include <stdlib.h> -#include <elf.h> -#include <string.h> -#include <stddef.h> - -#include "asm.h" -#include "mlimits.h" -#include "parse.h" -#include "parse_mips32.h" - -extern char *current_file; - -static int handle_directive(struct assembler *assembler, - struct mips32_directive *directive) -{ - switch (directive->type) { - case MIPS32_DIRECTIVE_SECTION: { - struct section_table *sec_tbl = &assembler->parser.sec_tbl; - struct section *sec; - if (sectbl_get(sec_tbl, &sec, directive->name) - == M_SUCCESS) { - sec_tbl->current = sec; - break; - } - - if (sectbl_alloc(sec_tbl, &sec, directive->name)) - return M_ERROR; - - sec_tbl->current = sec; - break; - } - - case MIPS32_DIRECTIVE_ALIGN: { - assembler->parser.sec_tbl.current->alignment = - 1 << directive->align; - break; - } - - case MIPS32_DIRECTIVE_SPACE: { - struct section_entry entry; - entry.type = ENT_NO_DATA; - entry.size = directive->space; - if (sec_push(assembler->parser.sec_tbl.current, entry)) - return M_ERROR; - break; - } - - case MIPS32_DIRECTIVE_WORD: { - for (uint32_t i = 0; i < directive->len; i++) { - struct section_entry entry; - entry.type = ENT_WORD; - entry.word = directive->words[i]; - if (sec_push(assembler->parser.sec_tbl.current, - entry)) - return M_ERROR; - } - break; - } - - case MIPS32_DIRECTIVE_HALF: { - for (uint32_t i = 0; i < directive->len; i++) { - struct section_entry entry; - entry.type = ENT_HALF; - entry.half = directive->halfs[i]; - if (sec_push(assembler->parser.sec_tbl.current, - entry)) - return M_ERROR; - } - break; - } - - case MIPS32_DIRECTIVE_BYTE: { - for (uint32_t i = 0; i < directive->len; i++) { - struct section_entry entry; - entry.type = ENT_BYTE; - entry.byte = directive->bytes[i]; - if (sec_push(assembler->parser.sec_tbl.current, - entry)) - return M_ERROR; - } - break; - } - - case MIPS32_DIRECTIVE_EXTERN: { - struct symbol symbol; - if (symtbl_find(&assembler->sym_tbl, NULL, directive->name) - == M_SUCCESS) { - ERROR("cannot extern local symbol '%s'", - directive->name); - return M_ERROR; - } - - symbol = (struct symbol) { - .name = "", - .sec = NULL, - .index = 0, - .flag = SYM_EXTERNAL, - }; - strcpy(symbol.name, directive->name); - - if (symtbl_push(&assembler->sym_tbl, symbol)) - return M_ERROR; - - break; - } - - case MIPS32_DIRECTIVE_GLOBL: { - struct symbol symbol; - if (symtbl_find(&assembler->sym_tbl, NULL, directive->name) - == M_SUCCESS) { - symbol.flag = SYM_GLOBAL; - break; - } - - symbol = (struct symbol) { - .name = "", - .sec = NULL, - .index = 0, - .flag = SYM_GLOBAL, - }; - strcpy(symbol.name, directive->name); - - if (symtbl_push(&assembler->sym_tbl, symbol)) - return M_ERROR; - - break; - } - } - - return M_SUCCESS; -} - -static int handle_label(struct assembler *assembler, - const char name[MAX_LEX_LENGTH]) -{ - struct symbol *ref; - if (symtbl_find(&assembler->sym_tbl, &ref, name) == M_SUCCESS) { - if (ref->flag == SYM_GLOBAL && ref->sec == NULL) { - ref->sec = assembler->parser.sec_tbl.current; - ref->index = assembler->parser.sec_tbl.current->count; - return M_SUCCESS; - } - ERROR("redefined symbol '%s'", name); - return M_ERROR; - } - - struct symbol symbol; - symbol = (struct symbol) { - .name = "", - .sec = assembler->parser.sec_tbl.current, - .index = assembler->parser.sec_tbl.current->count, - .flag = SYM_LOCAL, - }; - strcpy(symbol.name, name); - - if (symtbl_push(&assembler->sym_tbl, symbol)) - return M_ERROR; - - return M_SUCCESS; -} - -static int parse_file(struct assembler *assembler) -{ - struct parser *parser = &assembler->parser; - - while (1) { - struct expr expr; - if (parser_next(parser, &expr)) - return M_ERROR; - - switch (expr.type) { - case EXPR_INS: - struct section_entry entry; - entry.type = ENT_INS; - entry.size = sizeof(struct mips32_instruction); - entry.ins = expr.ins; - if (sec_push(parser->sec_tbl.current, entry)) - return M_ERROR; - break; - - case EXPR_DIRECTIVE: - if (handle_directive(assembler, - &expr.directive.mips32)) - return M_ERROR; - break; - - case EXPR_LABEL: - if (handle_label(assembler, expr.text)) - return M_ERROR; - break; - - case EXPR_CONSTANT: - break; - } - } - - struct section_meta *meta = malloc(sizeof(struct section_meta) * - parser->sec_tbl.count); - if (meta == NULL) { - ERROR("cannot alloc"); - return M_ERROR; - } - - assembler->meta = meta; - - size_t ptr = 0; - for (uint32_t i = 0; i < parser->sec_tbl.count; i++) { - struct section *sec = &parser->sec_tbl.sections[i]; - meta[i].v_addr = ptr; - ptr += sec_size(sec); - } - - return M_SUCCESS; -} - -static int assemble_phdr(struct assembler *assembler, Elf32_Phdr **res, - uint32_t *res2) -{ - struct parser *parser = &assembler->parser; - Elf32_Phdr *phdr = malloc(sizeof(Elf32_Phdr) * - parser->sec_tbl.count); - if (phdr == NULL) { - ERROR("cannot alloc"); - return M_ERROR;; - } - - for (uint32_t i = 0; i < parser->sec_tbl.count; i++) { - Elf32_Phdr *hdr = &phdr[i]; - struct section *sec = &parser->sec_tbl.sections[i]; - size_t size = sec_size(sec); - hdr->p_type = PT_LOAD; - hdr->p_flags = (sec->execute << 0) | - (sec->write << 1) | - (sec->read << 2); - hdr->p_offset = 0; - hdr->p_vaddr = 0; - hdr->p_paddr = 0; - hdr->p_filesz = size; - hdr->p_memsz = size; - hdr->p_align = sec->alignment; - } - - *res = phdr; - *res2 = parser->sec_tbl.count; - return M_SUCCESS; -} - -static int assemble_symtab(struct assembler *assembler, Elf32_Sym **res, - uint32_t *res2) -{ - Elf32_Sym *stbl = malloc(sizeof(Elf32_Sym) * assembler->sym_tbl - .count); - size_t size = 0; - - if (stbl == NULL) - return M_ERROR; - - for (uint32_t i = 0; i < assembler->sym_tbl.count; i++) { - struct symbol *sym = &assembler->sym_tbl.symbols[i]; - size_t str_off; - unsigned char bind; - unsigned char type = STT_NOTYPE; - - if (strtbl_write_str(&assembler->str_tbl, - sym->name, &str_off)) { - free(stbl); - return M_ERROR; - } - - if (sym->flag == SYM_GLOBAL && sym->sec == NULL) { - ERROR("never defined global symbol '%s'", sym->name); - return M_ERROR; - } - - if (sym->flag == SYM_LOCAL) - bind = STB_LOCAL; - else - bind = STB_GLOBAL; - - stbl[i] = (Elf32_Sym) { - .st_name = str_off, - .st_value = sym->index, - .st_size = 0, - .st_info = ELF32_ST_INFO(bind, type), - .st_other = ELF32_ST_VISIBILITY(STV_DEFAULT), - .st_shndx = 0, - }; - size = i + 1; - }; - - *res = stbl; - *res2 = size; - - return M_SUCCESS; -} - -static void assemble_symtab_shndx(struct assembler *assembler, Elf32_Sym *tbl) -{ - for (uint32_t i = 0; i < assembler->sym_tbl.count; i++) { - struct symbol *sym = &assembler->sym_tbl.symbols[i]; - if (sym->sec != NULL) - tbl[i].st_shndx = - assembler->meta[sym->sec->index].shdr_idx; - } -} - -static int assemble_reltbl_sec(struct assembler *assembler, Elf32_Sym *symtab, - uint32_t symtab_len, struct section *sec) -{ - uint32_t len = 0; - - for (uint32_t i = 0; i < assembler->parser.ref_tbl.count; i++) { - struct reference *ref = - &assembler->parser.ref_tbl.references[i]; - if (ref->section->index == sec->index) { - len++; - } - } - - if (len == 0) { - assembler->meta[sec->index].reltbl = NULL; - return M_SUCCESS; - } - - Elf32_Rela *reltbl = malloc(sizeof(Elf32_Rela) * len); - - if (reltbl == NULL) { - ERROR("cannot alloc"); - return M_ERROR; - } - - for (uint32_t i = 0; i < assembler->parser.ref_tbl.count; i++) { - struct reference *ref = - &assembler->parser.ref_tbl.references[i]; - struct mips32_instruction *ins = &ref->section-> - entries[ref->index].ins.mips32; - - if (ref->section->index != sec->index) { - continue; - } - - int32_t addend = 0; - unsigned char type = 0; - switch (ref->type) { - case REF_OFFESET: - addend = ins->B_data.offset; - type = R_MIPS_PC16; - break; - case REF_TARGET: - addend = ins->J_data.target; - type = R_MIPS_26; - break; - } - - int32_t symidx = -1; - - for (uint32_t i = 0; i < symtab_len; i++) { - Elf32_Sym *sym = &symtab[i]; - const char *str = - &assembler->str_tbl.ptr[sym->st_name]; - if (strcmp(ref->name, str) == 0) { - symidx = i; - break; - } - } - - if (symidx == -1) { - ERROR("undefined symbol '%s'", ref->name); - free(reltbl); - return M_ERROR; - } - - reltbl[i] = (Elf32_Rela) { - .r_info = ELF32_R_INFO(symidx, type), - .r_addend = addend, - .r_offset = sec_index(ref->section, ref->index), - }; - }; - - assembler->meta[sec->index].reltbl_len = len; - assembler->meta[sec->index].reltbl = reltbl; - - return M_SUCCESS; -} - -static int assemble_reltbl(struct assembler *assembler, Elf32_Sym *symtab, - uint32_t symtab_len) -{ - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - struct section *sec = &assembler->parser.sec_tbl.sections[i]; - if (assemble_reltbl_sec(assembler, symtab, symtab_len, sec)) - return M_ERROR; - } - - return M_SUCCESS; -} - -static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res, - uint32_t *res2) -{ - uint32_t max_entries = 4; // symtab, strtab, shstrtab - max_entries += assembler->parser.sec_tbl.count; // sections - max_entries += assembler->parser.sec_tbl.count; // reltabs per section - - Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * max_entries); - - size_t str_off; - uint32_t count = 0; - - // eeltables - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - - if (assembler->meta[i].reltbl == NULL) - continue; - - struct section *sec = &assembler->parser.sec_tbl.sections[i]; - const char *prefix = ".reltab."; - char reltab_name[MAX_LEX_LENGTH + 8]; - - strcpy(reltab_name, prefix); - strcat(reltab_name, sec->name); - - if (strtbl_write_str(&assembler->shstr_tbl, - reltab_name, &str_off)) { - free(shdr); - return M_ERROR; - } - - assembler->meta[i].reltbl_idx = count; - shdr[count++] = (Elf32_Shdr) { - .sh_name = str_off, - .sh_type = SHT_RELA, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = 1, - .sh_entsize = sizeof(Elf32_Rela), - }; - } - - // for each section - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - struct section *sec = &assembler->parser.sec_tbl.sections[i]; - char name[MAX_LEX_LENGTH+1] = "."; - strcat(name, sec->name); - if (strtbl_write_str(&assembler->shstr_tbl, name, &str_off)) { - free(shdr); - return M_ERROR; - } - assembler->meta[i].shdr_idx = count; - if (assembler->meta[i].reltbl != NULL) - shdr[assembler->meta[i].reltbl_idx].sh_info = count; - shdr[count++] = (Elf32_Shdr){ - .sh_name = str_off, - .sh_type = SHT_PROGBITS, - .sh_flags = (sec->write << 0) | (sec->execute << 2) | - SHF_ALLOC, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = sec->alignment, - .sh_entsize = sizeof(struct mips32_instruction), - }; - } - - // symbol table - if (strtbl_write_str(&assembler->shstr_tbl, ".symtab", &str_off)) { - free(shdr); - return M_ERROR; - } - - assembler->symtab_idx = count; - shdr[count++] = (Elf32_Shdr) { - .sh_name = str_off, - .sh_type = SHT_SYMTAB, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 1, - .sh_info = 0, - .sh_addralign = 1, - .sh_entsize = sizeof(Elf32_Sym), - }; - - // string table - if (strtbl_write_str(&assembler->shstr_tbl, ".strtab", &str_off)) { - free(shdr); - return M_ERROR; - } - - assembler->strtbl_idx = count; - shdr[count++] = (Elf32_Shdr) { - .sh_name = str_off, - .sh_type = SHT_STRTAB, - .sh_flags = SHF_STRINGS, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = 1, - .sh_entsize = 0, - }; - - // sh string table - if (strtbl_write_str(&assembler->shstr_tbl, ".shstrtab", &str_off)) { - free(shdr); - return M_ERROR; - } - - assembler->shstrtbl_idx = count; - shdr[count++] = (Elf32_Shdr) { - .sh_name = str_off, - .sh_type = SHT_STRTAB, - .sh_flags = SHF_STRINGS, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = 1, - .sh_entsize = 0, - }; - - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - if (assembler->meta[i].reltbl == NULL) - continue; - shdr[assembler->meta[i].reltbl_idx].sh_link = - assembler->symtab_idx; - } - - *res = shdr; - *res2 = count; - - return M_SUCCESS; -} - -static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr) -{ - Elf32_Shdr *shdr = (Elf32_Shdr *) assembler->shdr; - Elf32_Phdr *phdr = (Elf32_Phdr *) assembler->phdr; - uint32_t ptr = 0; - - // we must now correct offets and sizes inside the ehdr, phdr, - // and shdr - ptr += sizeof(Elf32_Ehdr); - - // phdr - ehdr->e_phoff = ptr; - ptr += assembler->phdr_len * sizeof(Elf32_Phdr); - - // reltbls - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - if (assembler->meta[i].reltbl == NULL) - continue; - int idx = assembler->meta[i].reltbl_idx; - int len = assembler->meta[i].reltbl_len; - shdr[idx].sh_offset = ptr; - shdr[idx].sh_size = len * sizeof(Elf32_Rela); - ptr += len * sizeof(Elf32_Rela); - } - - // sections - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - int idx = assembler->meta[i].shdr_idx; - phdr[i].p_offset = ptr; - phdr[i].p_vaddr = ptr; - phdr[i].p_paddr = ptr; - shdr[idx].sh_offset = ptr; - shdr[idx].sh_size = phdr[i].p_filesz; - shdr[idx].sh_addr = phdr[i].p_vaddr; - shdr[idx].sh_addralign = phdr[i].p_align; - ptr += phdr[i].p_filesz; - } - - // symtab - shdr[assembler->symtab_idx].sh_offset = ptr; - shdr[assembler->symtab_idx].sh_link = assembler->strtbl_idx; - shdr[assembler->symtab_idx].sh_size = - assembler->symtab_len * sizeof(Elf32_Sym); - ptr += assembler->symtab_len * sizeof(Elf32_Sym); - - // strtbl - shdr[assembler->strtbl_idx].sh_offset = ptr; - shdr[assembler->strtbl_idx].sh_size = assembler->str_tbl.size; - ptr += assembler->str_tbl.size; - - // shstrtbl - shdr[assembler->shstrtbl_idx].sh_offset = ptr; - shdr[assembler->shstrtbl_idx].sh_size = assembler->shstr_tbl.size; - ptr += assembler->shstr_tbl.size; - - // shdr - ehdr->e_shoff = ptr; -} - -static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr, - const char *path) -{ - FILE *out = fopen(path, "w"); - - if (out == NULL) { - ERROR("cannot write '%s'", path); - return M_ERROR; - } - - // ehdr - fwrite(ehdr, sizeof(Elf32_Ehdr), 1, out); - - // phdr - fwrite(assembler->phdr, sizeof(Elf32_Phdr), assembler->phdr_len, out); - - // reltbls - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - if (assembler->meta[i].reltbl == NULL) - continue; - void *ptr = assembler->meta[i].reltbl; - int len = assembler->meta[i].reltbl_len; - fwrite(ptr, sizeof(Elf32_Rela), len, out); - } - - // sections - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - struct section *sec = &assembler->parser.sec_tbl.sections[i]; - for (uint32_t j = 0; j < sec->count; j++) { - struct section_entry *entry = &sec->entries[j]; - size_t size = entry->size; - fwrite(&entry->data, size, 1, out); - while(size % sec->alignment) { - uint8_t zero = 0; - fwrite(&zero, 1, 1, out); - size++; - } - } - } - - // sym tbl - fwrite(assembler->symtab, sizeof(Elf32_Sym), - assembler->symtab_len, out); - - // str tbl - fwrite(assembler->str_tbl.ptr, assembler->str_tbl.size, 1, out); - - // shstr tbl - fwrite(assembler->shstr_tbl.ptr, assembler->shstr_tbl.size, 1, out); - - // shdr - fwrite(assembler->shdr, sizeof(Elf32_Shdr), assembler->shdr_len, out); - - fclose(out); - - return M_SUCCESS; -} - -static int assemble_elf(struct assembler *assembler, const char *out) -{ - if (assemble_symtab(assembler, (Elf32_Sym **) &assembler->symtab, - &assembler->symtab_len)) - return M_ERROR; - - if (assemble_reltbl(assembler, assembler->symtab, - assembler->symtab_len)) { - return M_ERROR; - }; - - if (assemble_phdr(assembler, (Elf32_Phdr **) &assembler->phdr, - &assembler->phdr_len)) { - return M_ERROR; - } - - if (assemble_shdr(assembler, (Elf32_Shdr **) &assembler->shdr, - &assembler->shdr_len)) { - return M_ERROR; - }; - - // update the symbol tables with their given section - assemble_symtab_shndx(assembler, assembler->symtab); - - Elf32_Ehdr ehdr = { - .e_ident = { - [EI_MAG0] = ELFMAG0, - [EI_MAG1] = ELFMAG1, - [EI_MAG2] = ELFMAG2, - [EI_MAG3] = ELFMAG3, - [EI_CLASS] = ELFCLASS32, - [EI_DATA] = ELFDATA2LSB, - [EI_VERSION] = EV_CURRENT, - [EI_OSABI] = ELFOSABI_NONE, - [EI_ABIVERSION] = 0x00, - [EI_PAD] = 0x00, - }, - .e_type = ET_REL, - .e_machine = EM_MIPS, - .e_version = EV_CURRENT, - .e_entry = 0x00, - .e_phoff = 0x00, - .e_shoff = 0x00, - .e_flags = EF_MIPS_ARCH_32R6, - .e_ehsize = sizeof(Elf32_Ehdr), - .e_phentsize = sizeof(Elf32_Phdr), - .e_phnum = assembler->phdr_len, - .e_shentsize = sizeof(Elf32_Shdr), - .e_shnum = assembler->shdr_len, - .e_shstrndx = assembler->shstrtbl_idx, - }; - - update_offsets(assembler, &ehdr); - - if (write_file(assembler, &ehdr, out)) - return M_ERROR; - - return M_SUCCESS; -} - -int assemble_file_mips32(struct assembler_arguments args) -{ - struct assembler assembler; - int res = M_SUCCESS; - - current_file = args.in_file; - - if (assembler_init(&assembler, args.in_file)) - return M_ERROR; - - mips32_parser_init(&assembler.parser); - - if (res == M_SUCCESS) - res = parse_file(&assembler); - - if (res == M_SUCCESS) - res = assemble_elf(&assembler, args.out_file); - - assembler_free(&assembler); - - return res; -} diff --git a/masm/main.c b/masm/main.c index be156d8..760e4fa 100644 --- a/masm/main.c +++ b/masm/main.c @@ -3,27 +3,24 @@ #include <string.h> #include "asm.h" -#include "mips.h" void help(void) { printf("usage: masm [options] source.asm\n\n"); printf("options:\n"); printf("\t-h\t\tprints this help message\n"); - printf("\t-i isa\t\tselect a ISA to assemble to (mips32)\n"); printf("\t-o output\tselect a output file destination\n"); } int main(int argc, char **argv) { struct assembler_arguments args = { - .isa = ISA_MIPS32, .in_file = NULL, .out_file = NULL, }; int c; - while ((c = getopt(argc, argv, "ho:i:")) != 1) { + while ((c = getopt(argc, argv, "ho:")) != 1) { switch(c) { case 'h': help(); @@ -31,14 +28,6 @@ int main(int argc, char **argv) { case 'o': args.out_file = optarg; break; - case 'i': - if (strcmp(optarg, "mips32") == 0) { - args.isa = ISA_MIPS32; - } else { - ERROR("invalid isa '%s'", optarg); - return M_ERROR; - } - break; case '?': return M_ERROR; default: diff --git a/masm/parse.c b/masm/parse.c index 452045b..ccabf41 100644 --- a/masm/parse.c +++ b/masm/parse.c @@ -6,7 +6,7 @@ #include "parse.h" #include "lex.h" -int next_token(struct parser *parser, struct token *tok) +static int next_token(struct parser *parser, struct token *tok) { if (parser->peek.type != TOK_EOF) { if (tok != NULL) @@ -23,7 +23,7 @@ int next_token(struct parser *parser, struct token *tok) } -int peek_token(struct parser *parser, struct token *tok) +static int peek_token(struct parser *parser, struct token *tok) { if (parser->peek.type == TOK_EOF) { if (next_token(parser, &parser->peek)) @@ -35,7 +35,7 @@ int peek_token(struct parser *parser, struct token *tok) } -int assert_token(struct parser *parser, enum token_type type, +static int assert_token(struct parser *parser, enum token_type type, struct token *tok) { struct token token; @@ -51,7 +51,7 @@ int assert_token(struct parser *parser, enum token_type type, return M_SUCCESS; } -int assert_eol(struct parser *parser) +static int assert_eol(struct parser *parser) { struct token token; if (next_token(parser, &token)) @@ -63,6 +63,856 @@ int assert_eol(struct parser *parser) return M_SUCCESS; } +/* each instruction has a given parse format + * internal to the parser */ +enum mips_parse_format { + // register type: rs, rt, td + MIPS_PARSE_R, + // register type: rs, rt + MIPS_PARSE_R2, + // register type: rd + MIPS_PARSE_RD, + // register type: rs + MIPS_PARSE_RS, + // imeediate type: rs, rt, immd + MIPS_PARSE_I, + // jump type: offset + MIPS_PARSE_J, + // jump type: register + MIPS_PARSE_JR, + // offset 16b type: offset + MIPS_PARSE_O16, + // offset 26b type: offset + MIPS_PARSE_O26, + // breanch equal type: rs, rt, offset + MIPS_PARSE_BE, + // branch zero type: rs, offset + MIPS_PARSE_BZ, + // store and load: rt, offset(base) + MIPS_PARSE_SL, + // store and load immediate: rt, immediate + MIPS_PARSE_SLI, + // shift: rd, rt, sa + MIPS_PARSE_S, + // shift variable: rd, rt, rs + MIPS_PARSE_SV, + // none: + MIPS_PARSE_NONE, +}; + +#define FORMAT(ins, format) \ + [MIPS_INS_##ins] = MIPS_PARSE_##format, \ + +const enum mips_parse_format mips_parse_formats[] = { + FORMAT(ADD, R) + FORMAT(ADDI, I) + FORMAT(ADDIU, I) + FORMAT(ADDU, R) + FORMAT(AND, R) + FORMAT(ANDI, I) + FORMAT(BAL, O16) + FORMAT(BALC, O26) + FORMAT(BC, O26) + FORMAT(BEQ, BE) + FORMAT(BEQL, BE) + FORMAT(BGEZ, BZ) + FORMAT(BGEZAL, BZ) + FORMAT(BGEZALL, BZ) + FORMAT(BGEZL, BZ) + FORMAT(BGTZ, BZ) + FORMAT(BGTZL, BZ) + FORMAT(BLEZ, BZ) + FORMAT(BLEZL, BZ) + FORMAT(BLTZ, BZ) + FORMAT(BLTZAL, BZ) + FORMAT(BLTZALL, BZ) + FORMAT(BLTZL, BZ) + FORMAT(BNE, BE) + FORMAT(BNEL, BE) + FORMAT(DDIV, R2) + FORMAT(DDIVU, R2) + FORMAT(DIV, R2) + FORMAT(DIVU, R2) + FORMAT(J, J) + FORMAT(JAL, J) + FORMAT(JALR, JR) // TODO: handle rd + FORMAT(JALX, J) + FORMAT(JR, JR) + FORMAT(LB, SL) + FORMAT(LBU, SL) + FORMAT(LH, SL) + FORMAT(LHU, SL) + FORMAT(LUI, SLI) + FORMAT(LW, SL) + FORMAT(LWL, SL) + FORMAT(LWR, SL) + FORMAT(MFHI, RD) + FORMAT(MFLO, RD) + FORMAT(MTHI, RS) + FORMAT(MTLO, RS) + FORMAT(MULT, R2) + FORMAT(MULTU, R2) + FORMAT(SB, SL) + FORMAT(SH, SL) + FORMAT(SW, SL) + FORMAT(SWL, SL) + FORMAT(SLL, S) + FORMAT(SLLV, SV) + FORMAT(SLT, R) + FORMAT(SLTI, I) + FORMAT(SLTIU, I) + FORMAT(SLTU, R) + FORMAT(SRA, S) + FORMAT(SRAV, SV) + FORMAT(SRL, S) + FORMAT(SRLV, SV) + FORMAT(SYSCALL, NONE) + FORMAT(OR, R) + FORMAT(ORI, I) + FORMAT(NOR, R) + FORMAT(SUB, R) + FORMAT(SUBU, R) + FORMAT(XOR, R) + FORMAT(XORI, I) +}; + +#undef FORMAT + +#define MAX5 32 +#define MAX16 65536 +#define MAX26 67108864 + +static int get_reference(struct parser *parser, uint32_t *offset, + struct reference *ref, enum reference_type type) +{ + struct token token; + + if (next_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_NUMBER) { + *offset = token.number; + return M_SUCCESS; + } + + if (token.type != TOK_IDENT) { + ERROR_POS(token, "unexpected token of type '%s'", + token_str(token.type)); + return M_ERROR; + } + + strcpy(ref->name, token.text); + ref->type = type; + ref->addend = 0; + + // return zero for now + *offset = 0; + return M_SUCCESS; +} + +static int get_offset(struct parser *parser, uint32_t *offset, + struct reference *ref) +{ + return get_reference(parser, offset, ref, REF_OFFESET); +} + +static int get_target(struct parser *parser, uint32_t *offset, + struct reference *ref) +{ + return get_reference(parser, offset, ref, REF_TARGET); +} + +static int get_instruction(const char *ident, struct mips_instruction *res) +{ + for (int i = 0; i < __MIPS_INS_LEN; i++) { + struct mips_instruction ins = + mips_instructions[i]; + if (strcasecmp(ident, ins.name) == 0) { + if (res != NULL) + *res = ins; + return M_SUCCESS; + } + } + return M_ERROR; +} + +static int parse_register(struct parser *parser, enum mips_register *reg) +{ + struct token token; + if (assert_token(parser, TOK_REG, &token)) + return M_ERROR; + + int len = strlen(token.text); + int c0 = len > 0 ? token.text[0] : '\0', + c1 = len > 1 ? token.text[1] : '\0', + c2 = len > 2 ? token.text[2] : '\0', + c3 = len > 3 ? token.text[3] : '\0'; + + // $zero + if (c0 == 'z') { + if (c1 == 'e' && c2 == 'r' && c3 == 'o') { + *reg = MIPS_REG_ZERO; + return M_SUCCESS; + } + } + + // $a0-a3 $at + else if (c0 == 'a') { + if (c1 == 't') { + *reg = MIPS_REG_AT; + return M_SUCCESS; + } + if (c1 >= '0' && c1 <= '3') { + *reg = MIPS_REG_A0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $v0-v1 + else if (c0 == 'v') { + if (c1 >= '0' && c1 <= '1') { + *reg = MIPS_REG_V0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $t0-t9 + else if (c0 == 't') { + if (c1 >= '0' && c1 <= '7') { + *reg = MIPS_REG_T0; + *reg += c1 - '0'; + return M_SUCCESS; + } + // reg T8-T9 are not in order with T0-T7 + if (c1 >= '8' && c1 <= '9') { + *reg = MIPS_REG_T8; + *reg += c1 - '8'; + return M_SUCCESS; + } + } + + // $s0-s7 $sp + else if (c0 == 's') { + if (c1 >= '0' && c1 <= '7') { + *reg = MIPS_REG_S0; + *reg += c1 - '0'; + return M_SUCCESS; + } + if (c1 == 'p') { + *reg = MIPS_REG_SP; + return M_SUCCESS; + } + } + + // $k0-k1 + else if (c0 == 'k') { + if (c1 >= '0' && c1 <= '1') { + *reg = MIPS_REG_K0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $gp + else if (c0 == 'g') { + if (c1 == 'p') { + *reg = MIPS_REG_GP; + return M_SUCCESS; + } + } + + // $fp + else if (c0 == 'f') { + if (c1 == 'p') { + *reg = MIPS_REG_FP; + return M_SUCCESS; + } + } + + // $rp + else if (c0 == 'r') { + if (c1 == 'p') { + *reg = MIPS_REG_RA; + return M_SUCCESS; + } + } + + // $0-31 (non aliased register names) + else if (c0 >= '0' && c0 <= '9') { + int i = c0 - '0'; + if (c1 >= '0' && c1 <= '9') { + i *= 10; + i += c1 - '0'; + } + if (i <= 31) { + *reg = i; + return M_SUCCESS; + } + } + + ERROR_POS(token, "unknown register $%s", token.text); + return M_ERROR; +} + +static int parse_instruction_r(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rs, rt, rd + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + return M_SUCCESS; +} + +static int parse_instruction_r2(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rs, rt + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + return M_SUCCESS; +} + +static int parse_instruction_rs(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rs + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + return M_SUCCESS; +} + +static int parse_instruction_rd(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rd + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + return M_SUCCESS; +} + +static int parse_instruction_i(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rs, rt, immd + enum mips_register reg; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (token.number >= MAX16) + return M_ERROR; + ins->I_data.immd = token.number; + + return M_SUCCESS; +} + +static int parse_instruction_offset(struct parser *parser, + uint32_t max, + struct mips_instruction *ins, + struct reference *ref) +{ + uint32_t n; + if (get_offset(parser, &n, ref) || n > max) + return M_ERROR; + + switch (max) { + case MAX26: + ins->J_data.target = n; + break; + case MAX16: + ins->B_data.offset = n; + break; + } + + return M_SUCCESS; +} + +static int parse_instruction_j(struct parser *parser, + struct mips_instruction *ins, + struct reference *ref) +{ + uint32_t n; + if (get_target(parser, &n, ref) || n > MAX26) + return M_ERROR; + ins->J_data.target = n; + + return M_SUCCESS; +} + +static int parse_instruction_jr(struct parser *parser, + struct mips_instruction *ins, + struct reference *ref) +{ + uint32_t n; + if (get_target(parser, &n, ref) || n > MAX26) + return M_ERROR; + ins->J_data.target = n; + + return M_SUCCESS; +} + +static int parse_instruction_branch_equal(struct parser *parser, + struct mips_instruction *ins) +{ + enum mips_register reg; + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + return M_SUCCESS; +} + +static int parse_instruction_branch(struct parser *parser, + struct mips_instruction *ins, + struct reference *ref) +{ + enum mips_register reg; + uint32_t n; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->B_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (get_offset(parser, &n, ref) || n > MAX16) + return M_ERROR; + ins->B_data.offset = n; + + return M_SUCCESS; +} + +static int parse_instruction_sl(struct parser *parser, + struct mips_instruction *ins, + struct reference *ref) +{ + enum mips_register reg; + uint32_t offset = 0; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type != TOK_LPAREN) + if (get_offset(parser, &offset, ref)) + return M_ERROR; + ins->I_data.immd = offset; + + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_NL) { + ins->I_data.rs = MIPS_REG_ZERO; + return M_SUCCESS; + } + + if (assert_token(parser, TOK_LPAREN, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rs = reg; + + if (assert_token(parser, TOK_RPAREN, NULL)) + return M_ERROR; + + return M_SUCCESS; +} + +static int parse_instruction_sli(struct parser *parser, + struct mips_instruction *ins) +{ + enum mips_register reg; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX16) + return M_ERROR; + ins->I_data.immd = token.number; + + return M_SUCCESS; +} + +static int parse_instruction_s(struct parser *parser, + struct mips_instruction *ins) +{ + enum mips_register reg; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX5) + return M_ERROR; + ins->R_data.shamt = token.number; + + return M_SUCCESS; +} + +static int parse_instruction_sv(struct parser *parser, + struct mips_instruction *ins) +{ + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + return M_SUCCESS; +} + +static int parse_instruction(struct parser *parser, + struct ins_expr *expr, + struct token ident) +{ + struct mips_instruction instruction; + enum mips_parse_format format; + int res = M_SUCCESS; + + if (get_instruction(ident.text, &instruction)) { + ERROR_POS(ident, "unknown instruction '%s'", ident.text); + return M_ERROR; + } + + struct mips_instruction *ins = &expr->ins[0]; + struct reference *ref = &expr->ref[0]; + + // this will only ever generate one instruction + expr->ins_len = 1; + *ins = instruction; + ref->type = REF_NONE; + + format = mips_parse_formats[instruction.type]; + switch (format) { + case MIPS_PARSE_R: + res = parse_instruction_r(parser, ins); + break; + case MIPS_PARSE_R2: + res = parse_instruction_r2(parser, ins); + break; + case MIPS_PARSE_RS: + res = parse_instruction_rs(parser, ins); + break; + case MIPS_PARSE_RD: + res = parse_instruction_rd(parser, ins); + break; + case MIPS_PARSE_I: + res = parse_instruction_i(parser, ins); + break; + case MIPS_PARSE_J: + res = parse_instruction_j(parser, ins, ref); + break; + case MIPS_PARSE_JR: + res = parse_instruction_jr(parser, ins, ref); + break; + case MIPS_PARSE_O16: + res = parse_instruction_offset(parser, MAX16, ins, ref); + break; + case MIPS_PARSE_O26: + res = parse_instruction_offset(parser, MAX26, ins, ref); + break; + case MIPS_PARSE_BE: + res = parse_instruction_branch_equal(parser, ins); + break; + case MIPS_PARSE_BZ: + res = parse_instruction_branch(parser, ins, ref); + break; + case MIPS_PARSE_SL: + res = parse_instruction_sl(parser, ins, ref); + break; + case MIPS_PARSE_SLI: + res = parse_instruction_sli(parser, ins); + break; + case MIPS_PARSE_S: + res = parse_instruction_s(parser, ins); + break; + case MIPS_PARSE_SV: + res = parse_instruction_sv(parser, ins); + break; + case MIPS_PARSE_NONE: + res = M_SUCCESS; + break; + } + + if (res == M_SUCCESS && assert_eol(parser)) + return M_ERROR; + + return res; +} + + +static int parse_directive_align(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (token.number < 0) { + ERROR_POS(token, "cannot align negative"); + return M_ERROR; + } + + if (token.number > MAX16) { + ERROR_POS(token, "cannot align more than 65kb"); + return M_ERROR; + } + + directive->type = MIPS_DIRECTIVE_ALIGN; + directive->align = token.number; + + return M_SUCCESS; +} + +static int parse_directive_space(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (token.number < 0) { + ERROR_POS(token, "cannot reserve negative"); + return M_ERROR; + } + + if (token.number > MAX16) { + ERROR_POS(token, "cannot reserve more than 65kb"); + return M_ERROR; + } + + directive->type = MIPS_DIRECTIVE_SPACE; + directive->space = token.number; + + return M_SUCCESS; +} + +static int parse_directive_whb(struct parser *parser, + struct mips_directive *directive, + enum mips_directive_type type) +{ + struct token token; + uint32_t size = 0; + uint32_t len = 0; + + switch (type) { + case MIPS_DIRECTIVE_WORD: + size = UINT32_MAX; + break; + case MIPS_DIRECTIVE_HALF: + size = UINT16_MAX; + break; + case MIPS_DIRECTIVE_BYTE: + size = UINT8_MAX; + break; + default: + } + + directive->type = type; + + while (1) { + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (len >= MAX_ARG_LENGTH) { + ERROR_POS(token, "directives cannot be longer than " + "%d arguments", MAX_ARG_LENGTH); + return M_ERROR; + } + + if (token.number > size) { + ERROR_POS(token, "number cannot execede max size of: " + "%d", size); + return M_ERROR; + } + + switch (type) { + case MIPS_DIRECTIVE_WORD: + directive->words[len++] = token.number; + + break; + case MIPS_DIRECTIVE_HALF: + directive->halfs[len++] = token.number; + break; + case MIPS_DIRECTIVE_BYTE: + directive->bytes[len++] = token.number; + break; + default: + } + + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_COMMA) { + next_token(parser, NULL); + continue; + } + + break; + } + + directive->len = len; + + return M_SUCCESS; +} + +static int parse_directive_extern(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_IDENT, &token)) + return M_ERROR; + + directive->type = MIPS_DIRECTIVE_EXTERN; + strcpy(directive->name, token.text); + + return M_SUCCESS; +} + +static int parse_directive_globl(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_IDENT, &token)) + return M_ERROR; + + directive->type = MIPS_DIRECTIVE_GLOBL; + strcpy(directive->name, token.text); + + return M_SUCCESS; +} + +static int parse_section(struct mips_directive *directive, + char name[MAX_LEX_LENGTH]) +{ + directive->type = MIPS_DIRECTIVE_SECTION; + strcpy(directive->name, name); + + return M_SUCCESS; +} + +static int parse_directive(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_DIRECTIVE, &token)) + return M_ERROR; + + // .align n + if (strcmp(token.text, "align") == 0) + return parse_directive_align(parser, directive); + else if (strcmp(token.text, "space") == 0) + return parse_directive_space(parser, directive); + else if (strcmp(token.text, "word") == 0) + return parse_directive_whb(parser, directive, + MIPS_DIRECTIVE_WORD); + else if (strcmp(token.text, "half") == 0) + return parse_directive_whb(parser, directive, + MIPS_DIRECTIVE_HALF); + else if (strcmp(token.text, "byte") == 0) + return parse_directive_whb(parser, directive, + MIPS_DIRECTIVE_BYTE); + else if (strcmp(token.text, "extern") == 0) + return parse_directive_extern(parser, directive); + else if (strcmp(token.text, "globl") == 0) + return parse_directive_globl(parser, directive); + else + return parse_section(directive, token.text); +} + static int parse_constant(struct parser *parser, struct const_expr *expr, struct token ident) { @@ -96,7 +946,7 @@ static int parser_handle_ident(struct parser *parser, struct expr *expr) return parse_constant(parser, &expr->constant, ident); } else { expr->type = EXPR_INS; - return parser->parse_instruction(parser, &expr->ins, ident); + return parse_instruction(parser, &expr->ins, ident); } } @@ -108,7 +958,7 @@ static int parse_label(struct parser *parser, if (assert_token(parser, TOK_LABEL, &token)) return M_ERROR; - strcpy(expr->text, token.text); + strcpy(expr->label, token.text); return M_SUCCESS; } @@ -139,8 +989,7 @@ again: case TOK_DIRECTIVE: expr->type = EXPR_DIRECTIVE; - res = parser->parse_directive(parser, - &expr->directive); + res = parse_directive(parser, &expr->directive); break; case TOK_IDENT: @@ -161,16 +1010,11 @@ int parser_init(struct lexer *lexer, struct parser *parser) { parser->lexer = lexer; parser->peek.type = TOK_EOF; - if (sectbl_init(&parser->sec_tbl)) - return M_ERROR; - if (reftbl_init(&parser->ref_tbl)) - return M_ERROR; return M_SUCCESS; } void parser_free(struct parser *parser) { - sectbl_free(&parser->sec_tbl); - reftbl_free(&parser->ref_tbl); + (void) parser; } diff --git a/masm/parse.h b/masm/parse.h index ea8f929..9181899 100644 --- a/masm/parse.h +++ b/masm/parse.h @@ -9,135 +9,68 @@ #include <mips.h> #include <stdint.h> +/// +/// reference +/// + +enum reference_type { + REF_NONE, + REF_OFFESET, + REF_TARGET, +}; + +struct reference { + enum reference_type type; + + /// symbol name + char name[MAX_LEX_LENGTH]; + + /// integer addend + int64_t addend; +}; + struct const_expr { char name[MAX_LEX_LENGTH]; uint32_t value; }; +struct ins_expr { + /// pesudo instructions can return + /// more than one instruction + size_t ins_len; + struct mips_instruction ins[2]; + + /// instructions can reference symbols. + /// instruction `n` will be paried with reference `n` + struct reference ref[2]; +}; + enum expr_type { - EXPR_INS, EXPR_DIRECTIVE, EXPR_CONSTANT, + EXPR_INS, EXPR_LABEL, }; struct expr { enum expr_type type; union { - // instruction - union mips_instruction ins; // directive - union mips_directive directive; + struct mips_directive directive; // constant struct const_expr constant; - // segment or label - char text[MAX_LEX_LENGTH]; - }; -}; - -enum section_entry_type { - ENT_INS, - ENT_WORD, - ENT_HALF, - ENT_BYTE, - ENT_NO_DATA, -}; - -struct section_entry { - enum section_entry_type type; - size_t size; - - union { - char data; // to get memory address - union mips_instruction ins; - int32_t word; - int16_t half; - int8_t byte; + // instruction + struct ins_expr ins; + // label + char label[MAX_LEX_LENGTH]; }; }; -struct section { - uint32_t count; - uint32_t len; - uint32_t alignment; - uint32_t index; // what index is my section - char name[MAX_LEX_LENGTH]; - bool read; - bool write; - bool execute; - struct section_entry *entries; -}; - -struct section_table { - uint32_t count; - uint32_t len; - struct section *sections; - struct section *current; - char name[MAX_LEX_LENGTH]; -}; - -int sectbl_init(struct section_table *sec_tbl); -void sectbl_free(struct section_table *sec_tbl); - -int sectbl_alloc(struct section_table *sec_tbl, struct section **sec, - const char name[MAX_LEX_LENGTH]); -int sectbl_get(struct section_table *sec_tbl, struct section **sec, - const char name[MAX_LEX_LENGTH]); -int sec_push(struct section *section, struct section_entry entry); -size_t sec_size(struct section *section); -size_t sec_index(struct section *section, uint32_t index); - -enum reference_type { - REF_OFFESET, - REF_TARGET, -}; - -struct reference { - enum reference_type type; - struct section *section; - uint32_t index; - char name[MAX_LEX_LENGTH]; -}; - -struct reference_table { - uint32_t count; - uint32_t len; - struct reference *references; -}; - -int reftbl_init(struct reference_table *ref_tbl); -void reftbl_free(struct reference_table *ref_tbl); -int reftbl_push(struct reference_table *ref_tbl, struct reference reference); - struct parser { struct lexer *lexer; struct token peek; - - // sections - struct section_table sec_tbl; - - // references - struct reference_table ref_tbl; - - int (*parse_instruction)(struct parser *, union mips_instruction *, - struct token); - int (*parse_directive)(struct parser *, union mips_directive *); - int (*is_instruction)(const char *ident); }; -/* get the next token in the parser */ -int next_token(struct parser *parser, struct token *tok); - -/* peek the next token in the parser */ -int peek_token(struct parser *parser, struct token *tok); - -/* assert the next token is a specific type */ -int assert_token(struct parser *parser, enum token_type type, - struct token *tok); - -/* assert the next token is EOF or NL */ -int assert_eol(struct parser *parser); - /* get the next expression in the parser */ int parser_next(struct parser *parser, struct expr *expr); diff --git a/masm/parse_mips32.c b/masm/parse_mips32.c deleted file mode 100644 index db7f346..0000000 --- a/masm/parse_mips32.c +++ /dev/null @@ -1,872 +0,0 @@ -#include <mips.h> -#include <mips32.h> -#include <merror.h> -#include <stdint.h> -#include <string.h> -#include <strings.h> - -#include "parse_mips32.h" -#include "parse.h" -#include "mlimits.h" -#include "parse.h" -#include "lex.h" - -/* each instruction has a given parse format - * internal to the parser */ -enum mips32_parse_format { - // register type: rs, rt, td - MIPS32_PARSE_R, - // register type: rs, rt - MIPS32_PARSE_R2, - // register type: rd - MIPS32_PARSE_RD, - // register type: rs - MIPS32_PARSE_RS, - // imeediate type: rs, rt, immd - MIPS32_PARSE_I, - // jump type: offset - MIPS32_PARSE_J, - // jump type: register - MIPS32_PARSE_JR, - // offset 16b type: offset - MIPS32_PARSE_O16, - // offset 26b type: offset - MIPS32_PARSE_O26, - // breanch equal type: rs, rt, offset - MIPS32_PARSE_BE, - // branch zero type: rs, offset - MIPS32_PARSE_BZ, - // store and load: rt, offset(base) - MIPS32_PARSE_SL, - // store and load immediate: rt, immediate - MIPS32_PARSE_SLI, - // shift: rd, rt, sa - MIPS32_PARSE_S, - // shift variable: rd, rt, rs - MIPS32_PARSE_SV, - // none: - MIPS32_PARSE_NONE, -}; - -#define FORMAT(ins, format) \ - [MIPS32_INS_##ins] = MIPS32_PARSE_##format, \ - -const enum mips32_parse_format mips32_parse_formats[] = { - FORMAT(ADD, R) - FORMAT(ADDI, I) - FORMAT(ADDIU, I) - FORMAT(ADDU, R) - FORMAT(AND, R) - FORMAT(ANDI, I) - FORMAT(BAL, O16) - FORMAT(BALC, O26) - FORMAT(BC, O26) - FORMAT(BEQ, BE) - FORMAT(BEQL, BE) - FORMAT(BGEZ, BZ) - FORMAT(BGEZAL, BZ) - FORMAT(BGEZALL, BZ) - FORMAT(BGEZL, BZ) - FORMAT(BGTZ, BZ) - FORMAT(BGTZL, BZ) - FORMAT(BLEZ, BZ) - FORMAT(BLEZL, BZ) - FORMAT(BLTZ, BZ) - FORMAT(BLTZAL, BZ) - FORMAT(BLTZALL, BZ) - FORMAT(BLTZL, BZ) - FORMAT(BNE, BE) - FORMAT(BNEL, BE) - FORMAT(DDIV, R2) - FORMAT(DDIVU, R2) - FORMAT(DIV, R2) - FORMAT(DIVU, R2) - FORMAT(J, J) - FORMAT(JAL, J) - FORMAT(JALR, JR) // TODO: handle rd - FORMAT(JALX, J) - FORMAT(JR, JR) - FORMAT(LB, SL) - FORMAT(LBU, SL) - FORMAT(LH, SL) - FORMAT(LHU, SL) - FORMAT(LUI, SLI) - FORMAT(LW, SL) - FORMAT(LWL, SL) - FORMAT(LWR, SL) - FORMAT(MFHI, RD) - FORMAT(MFLO, RD) - FORMAT(MTHI, RS) - FORMAT(MTLO, RS) - FORMAT(MULT, R2) - FORMAT(MULTU, R2) - FORMAT(SB, SL) - FORMAT(SH, SL) - FORMAT(SW, SL) - FORMAT(SWL, SL) - FORMAT(SLL, S) - FORMAT(SLLV, SV) - FORMAT(SLT, R) - FORMAT(SLTI, I) - FORMAT(SLTIU, I) - FORMAT(SLTU, R) - FORMAT(SRA, S) - FORMAT(SRAV, SV) - FORMAT(SRL, S) - FORMAT(SRLV, SV) - FORMAT(SYSCALL, NONE) - FORMAT(OR, R) - FORMAT(ORI, I) - FORMAT(NOR, R) - FORMAT(SUB, R) - FORMAT(SUBU, R) - FORMAT(XOR, R) - FORMAT(XORI, I) -}; - -#undef FORMAT - -#define MAX5 32 -#define MAX16 65536 -#define MAX26 67108864 - -static int get_reference(struct parser *parser, uint32_t *offset, - enum reference_type type) -{ - struct token token; - - if (next_token(parser, &token)) - return M_ERROR; - - if (token.type == TOK_NUMBER) { - *offset = token.number; - return M_SUCCESS; - } - - if (token.type != TOK_IDENT) { - ERROR_POS(token, "unexpected token of type '%s'", - token_str(token.type)); - return M_ERROR; - } - - struct reference reference = { - .section = parser->sec_tbl.current, - .index = parser->sec_tbl.current->count, - .type = type, - }; - strcpy(reference.name, token.text); - - if (reftbl_push(&parser->ref_tbl, reference)) - return M_ERROR; - - *offset = 0; - - return M_SUCCESS; -} - -static int get_offset(struct parser *parser, uint32_t *offset) -{ - return get_reference(parser, offset, REF_OFFESET); -} - -static int get_target(struct parser *parser, uint32_t *offset) -{ - return get_reference(parser, offset, REF_TARGET); -} - -static int get_instruction(const char *ident, struct mips32_instruction *res) -{ - for (int i = 0; i < __MIPS32_INS_LEN; i++) { - struct mips32_instruction ins = - mips32_instructions[i]; - if (strcasecmp(ident, ins.name) == 0) { - if (res != NULL) - *res = ins; - return M_SUCCESS; - } - } - return M_ERROR; -} - -static int is_instruction(const char *ident) -{ - return get_instruction(ident, NULL); -} - -static int parse_register(struct parser *parser, enum mips32_register *reg) -{ - struct token token; - if (assert_token(parser, TOK_REG, &token)) - return M_ERROR; - - int len = strlen(token.text); - int c0 = len > 0 ? token.text[0] : '\0', - c1 = len > 1 ? token.text[1] : '\0', - c2 = len > 2 ? token.text[2] : '\0', - c3 = len > 3 ? token.text[3] : '\0'; - - // $zero - if (c0 == 'z') { - if (c1 == 'e' && c2 == 'r' && c3 == 'o') { - *reg = MIPS32_REG_ZERO; - return M_SUCCESS; - } - } - - // $a0-a3 $at - else if (c0 == 'a') { - if (c1 == 't') { - *reg = MIPS32_REG_AT; - return M_SUCCESS; - } - if (c1 >= '0' && c1 <= '3') { - *reg = MIPS32_REG_A0; - *reg += c1 - '0'; - return M_SUCCESS; - } - } - - // $v0-v1 - else if (c0 == 'v') { - if (c1 >= '0' && c1 <= '1') { - *reg = MIPS32_REG_V0; - *reg += c1 - '0'; - return M_SUCCESS; - } - } - - // $t0-t9 - else if (c0 == 't') { - if (c1 >= '0' && c1 <= '7') { - *reg = MIPS32_REG_T0; - *reg += c1 - '0'; - return M_SUCCESS; - } - // reg T8-T9 are not in order with T0-T7 - if (c1 >= '8' && c1 <= '9') { - *reg = MIPS32_REG_T8; - *reg += c1 - '8'; - return M_SUCCESS; - } - } - - // $s0-s7 $sp - else if (c0 == 's') { - if (c1 >= '0' && c1 <= '7') { - *reg = MIPS32_REG_S0; - *reg += c1 - '0'; - return M_SUCCESS; - } - if (c1 == 'p') { - *reg = MIPS32_REG_SP; - return M_SUCCESS; - } - } - - // $k0-k1 - else if (c0 == 'k') { - if (c1 >= '0' && c1 <= '1') { - *reg = MIPS32_REG_K0; - *reg += c1 - '0'; - return M_SUCCESS; - } - } - - // $gp - else if (c0 == 'g') { - if (c1 == 'p') { - *reg = MIPS32_REG_GP; - return M_SUCCESS; - } - } - - // $fp - else if (c0 == 'f') { - if (c1 == 'p') { - *reg = MIPS32_REG_FP; - return M_SUCCESS; - } - } - - // $rp - else if (c0 == 'r') { - if (c1 == 'p') { - *reg = MIPS32_REG_RA; - return M_SUCCESS; - } - } - - // $0-31 (non aliased register names) - else if (c0 >= '0' && c0 <= '9') { - int i = c0 - '0'; - if (c1 >= '0' && c1 <= '9') { - i *= 10; - i += c1 - '0'; - } - if (i <= 31) { - *reg = i; - return M_SUCCESS; - } - } - - ERROR_POS(token, "unknown register $%s", token.text); - return M_ERROR; -} - -static int parse_instruction_r(struct parser *parser, - struct mips32_instruction *ins) -{ - // format: rs, rt, rd - enum mips32_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rd = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rt = reg; - - return M_SUCCESS; -} - -static int parse_instruction_r2(struct parser *parser, - struct mips32_instruction *ins) -{ - // format: rs, rt - enum mips32_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rt = reg; - - return M_SUCCESS; -} - -static int parse_instruction_rs(struct parser *parser, - struct mips32_instruction *ins) -{ - // format: rs - enum mips32_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rs = reg; - - return M_SUCCESS; -} - -static int parse_instruction_rd(struct parser *parser, - struct mips32_instruction *ins) -{ - // format: rd - enum mips32_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rd = reg; - - return M_SUCCESS; -} - -static int parse_instruction_i(struct parser *parser, - struct mips32_instruction *ins) -{ - // format: rs, rt, immd - enum mips32_register reg; - struct token token; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->I_data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->I_data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - - if (token.number >= MAX16) - return M_ERROR; - ins->I_data.immd = token.number; - - return M_SUCCESS; -} - -static int parse_instruction_offset(struct parser *parser, - uint32_t max, - struct mips32_instruction *ins) -{ - uint32_t n; - if (get_offset(parser, &n) || n > max) - return M_ERROR; - - switch (max) { - case MAX26: - ins->J_data.target = n; - break; - case MAX16: - ins->B_data.offset = n; - break; - } - - return M_SUCCESS; -} - -static int parse_instruction_j(struct parser *parser, - struct mips32_instruction *ins) -{ - uint32_t n; - if (get_target(parser, &n) || n > MAX26) - return M_ERROR; - ins->J_data.target = n; - - return M_SUCCESS; -} - -static int parse_instruction_jr(struct parser *parser, - struct mips32_instruction *ins) -{ - uint32_t n; - if (get_target(parser, &n) || n > MAX26) - return M_ERROR; - ins->J_data.target = n; - - return M_SUCCESS; -} - -static int parse_instruction_branch_equal(struct parser *parser, - struct mips32_instruction *ins) -{ - enum mips32_register reg; - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rs = reg; - - return M_SUCCESS; -} - -static int parse_instruction_branch(struct parser *parser, - struct mips32_instruction *ins) -{ - enum mips32_register reg; - uint32_t n; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->B_data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (get_offset(parser, &n) || n > MAX16) - return M_ERROR; - ins->B_data.offset = n; - - return M_SUCCESS; -} - -static int parse_instruction_sl(struct parser *parser, - struct mips32_instruction *ins) -{ - enum mips32_register reg; - uint32_t offset = 0; - struct token token; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->I_data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (peek_token(parser, &token)) - return M_ERROR; - - if (token.type != TOK_LPAREN) - if (get_offset(parser, &offset)) - return M_ERROR; - ins->I_data.immd = offset; - - if (peek_token(parser, &token)) - return M_ERROR; - - if (token.type == TOK_NL) { - ins->I_data.rs = MIPS32_REG_ZERO; - return M_SUCCESS; - } - - if (assert_token(parser, TOK_LPAREN, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->I_data.rs = reg; - - if (assert_token(parser, TOK_RPAREN, NULL)) - return M_ERROR; - - return M_SUCCESS; -} - -static int parse_instruction_sli(struct parser *parser, - struct mips32_instruction *ins) -{ - enum mips32_register reg; - struct token token; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->I_data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX16) - return M_ERROR; - ins->I_data.immd = token.number; - - return M_SUCCESS; -} - -static int parse_instruction_s(struct parser *parser, - struct mips32_instruction *ins) -{ - enum mips32_register reg; - struct token token; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rd = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX5) - return M_ERROR; - ins->R_data.shamt = token.number; - - return M_SUCCESS; -} - -static int parse_instruction_sv(struct parser *parser, - struct mips32_instruction *ins) -{ - enum mips32_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rd = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rs = reg; - - return M_SUCCESS; -} - -static int parse_instruction(struct parser *parser, - union mips_instruction *ins, - struct token ident) -{ - struct mips32_instruction instruction; - enum mips32_parse_format format; - int res = M_SUCCESS; - - if (get_instruction(ident.text, &instruction)) { - ERROR_POS(ident, "unknown instruction '%s'", ident.text); - return M_ERROR; - } - - ins->mips32 = instruction; - format = mips32_parse_formats[instruction.type]; - - switch (format) { - case MIPS32_PARSE_R: - res = parse_instruction_r(parser, &ins->mips32); - break; - case MIPS32_PARSE_R2: - res = parse_instruction_r2(parser, &ins->mips32); - break; - case MIPS32_PARSE_RS: - res = parse_instruction_rs(parser, &ins->mips32); - break; - case MIPS32_PARSE_RD: - res = parse_instruction_rd(parser, &ins->mips32); - break; - case MIPS32_PARSE_I: - res = parse_instruction_i(parser, &ins->mips32); - break; - case MIPS32_PARSE_J: - res = parse_instruction_j(parser, &ins->mips32); - break; - case MIPS32_PARSE_JR: - res = parse_instruction_jr(parser, &ins->mips32); - break; - case MIPS32_PARSE_O16: - res = parse_instruction_offset(parser, MAX16, &ins->mips32); - break; - case MIPS32_PARSE_O26: - res = parse_instruction_offset(parser, MAX26, &ins->mips32); - break; - case MIPS32_PARSE_BE: - res = parse_instruction_branch_equal(parser, &ins->mips32); - break; - case MIPS32_PARSE_BZ: - res = parse_instruction_branch(parser, &ins->mips32); - break; - case MIPS32_PARSE_SL: - res = parse_instruction_sl(parser, &ins->mips32); - break; - case MIPS32_PARSE_SLI: - res = parse_instruction_sli(parser, &ins->mips32); - break; - case MIPS32_PARSE_S: - res = parse_instruction_s(parser, &ins->mips32); - break; - case MIPS32_PARSE_SV: - res = parse_instruction_sv(parser, &ins->mips32); - break; - case MIPS32_PARSE_NONE: - res = M_SUCCESS; - break; - } - - if (res == M_SUCCESS && assert_eol(parser)) - return M_ERROR; - - return res; -} - - -static int parse_directive_align(struct parser *parser, - struct mips32_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - - if (token.number < 0) { - ERROR_POS(token, "cannot align negative"); - return M_ERROR; - } - - if (token.number > MAX16) { - ERROR_POS(token, "cannot align more than 65kb"); - return M_ERROR; - } - - directive->type = MIPS32_DIRECTIVE_ALIGN; - directive->align = token.number; - - return M_SUCCESS; -} - -static int parse_directive_space(struct parser *parser, - struct mips32_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - - if (token.number < 0) { - ERROR_POS(token, "cannot reserve negative"); - return M_ERROR; - } - - if (token.number > MAX16) { - ERROR_POS(token, "cannot reserve more than 65kb"); - return M_ERROR; - } - - directive->type = MIPS32_DIRECTIVE_SPACE; - directive->space = token.number; - - return M_SUCCESS; -} - -static int parse_directive_whb(struct parser *parser, - struct mips32_directive *directive, - enum mips32_directive_type type) -{ - struct token token; - uint32_t size = 0; - uint32_t len = 0; - - switch (type) { - case MIPS32_DIRECTIVE_WORD: - size = UINT32_MAX; - break; - case MIPS32_DIRECTIVE_HALF: - size = UINT16_MAX; - break; - case MIPS32_DIRECTIVE_BYTE: - size = UINT8_MAX; - break; - default: - } - - directive->type = type; - - while (1) { - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - - if (len >= MAX_ARG_LENGTH) { - ERROR_POS(token, "directives cannot be longer than " - "%d arguments", MAX_ARG_LENGTH); - return M_ERROR; - } - - if (token.number > size) { - ERROR_POS(token, "number cannot execede max size of: " - "%d", size); - return M_ERROR; - } - - switch (type) { - case MIPS32_DIRECTIVE_WORD: - directive->words[len++] = token.number; - - break; - case MIPS32_DIRECTIVE_HALF: - directive->halfs[len++] = token.number; - break; - case MIPS32_DIRECTIVE_BYTE: - directive->bytes[len++] = token.number; - break; - default: - } - - if (peek_token(parser, &token)) - return M_ERROR; - - if (token.type == TOK_COMMA) { - next_token(parser, NULL); - continue; - } - - break; - } - - directive->len = len; - - return M_SUCCESS; -} - -static int parse_directive_extern(struct parser *parser, - struct mips32_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_IDENT, &token)) - return M_ERROR; - - directive->type = MIPS32_DIRECTIVE_EXTERN; - strcpy(directive->name, token.text); - - return M_SUCCESS; -} - -static int parse_directive_globl(struct parser *parser, - struct mips32_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_IDENT, &token)) - return M_ERROR; - - directive->type = MIPS32_DIRECTIVE_GLOBL; - strcpy(directive->name, token.text); - - return M_SUCCESS; -} - -static int parse_section(struct mips32_directive *directive, - char name[MAX_LEX_LENGTH]) -{ - directive->type = MIPS32_DIRECTIVE_SECTION; - strcpy(directive->name, name); - - return M_SUCCESS; -} - -static int parse_directive(struct parser *parser, - union mips_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_DIRECTIVE, &token)) - return M_ERROR; - - // .align n - if (strcmp(token.text, "align") == 0) - return parse_directive_align(parser, &directive->mips32); - else if (strcmp(token.text, "space") == 0) - return parse_directive_space(parser, &directive->mips32); - else if (strcmp(token.text, "word") == 0) - return parse_directive_whb(parser, &directive->mips32, - MIPS32_DIRECTIVE_WORD); - else if (strcmp(token.text, "half") == 0) - return parse_directive_whb(parser, &directive->mips32, - MIPS32_DIRECTIVE_HALF); - else if (strcmp(token.text, "byte") == 0) - return parse_directive_whb(parser, &directive->mips32, - MIPS32_DIRECTIVE_BYTE); - else if (strcmp(token.text, "extern") == 0) - return parse_directive_extern(parser, &directive->mips32); - else if (strcmp(token.text, "globl") == 0) - return parse_directive_globl(parser, &directive->mips32); - else - return parse_section(&directive->mips32, token.text); -} - -void mips32_parser_init(struct parser *parser) -{ - parser->parse_instruction = parse_instruction; - parser->is_instruction = is_instruction; - parser->parse_directive = parse_directive; -} - -void mips32_parser_free(struct parser *parser) -{ - parser_free(parser); -} diff --git a/masm/parse_mips32.h b/masm/parse_mips32.h deleted file mode 100644 index 5262d68..0000000 --- a/masm/parse_mips32.h +++ /dev/null @@ -1,14 +0,0 @@ -/* Copyright (c) 2024 Freya Murphy */ - -#ifndef __PARSE_MIPS32_H__ -#define __PARSE_MIPS32_H__ - -#include "parse.h" - -/* initzlize a mips32 parser vtable */ -void mips32_parser_init(struct parser *parser); - -/* free the mips32 parser */ -void mips32_parser_free(struct parser *parser); - -#endif /* __PARSE_MIPS32_H__ */ diff --git a/masm/reftbl.c b/masm/reftbl.c deleted file mode 100644 index 198af83..0000000 --- a/masm/reftbl.c +++ /dev/null @@ -1,47 +0,0 @@ -#include <string.h> -#include <stdlib.h> -#include <mips.h> -#include <merror.h> -#include <mlimits.h> - -#include "parse.h" - -#define RELTBL_INIT_LEN 8 - -int reftbl_init(struct reference_table *ref_tbl) -{ - ref_tbl->len = RELTBL_INIT_LEN; - ref_tbl->count = 0; - ref_tbl->references = malloc(sizeof(struct reference) * - RELTBL_INIT_LEN); - - if (ref_tbl->references == NULL) { - ERROR("cannot alloc"); - return M_ERROR; - } - - return M_SUCCESS; -} - -void reftbl_free(struct reference_table *ref_tbl) -{ - free(ref_tbl->references); -} - -int reftbl_push(struct reference_table *ref_tbl, struct reference reference) -{ - if (ref_tbl->count >= ref_tbl->len) { - ref_tbl->len *= 2; - ref_tbl->references = realloc(ref_tbl->references, - sizeof(struct reference) * ref_tbl->len); - - if (ref_tbl->references == NULL) { - ERROR("cannot realloc"); - return M_ERROR; - } - } - - ref_tbl->references[ref_tbl->count++] = reference; - - return M_SUCCESS; -} diff --git a/masm/reltab.c b/masm/reltab.c new file mode 100644 index 0000000..482ed44 --- /dev/null +++ b/masm/reltab.c @@ -0,0 +1,43 @@ +#include <elf.h> +#include <stdlib.h> +#include <merror.h> + +#include "asm.h" + +#define RELTAB_INIT_LEN 8 + +int reltab_init(struct relocation_table *reltab) +{ + reltab->size = RELTAB_INIT_LEN; + reltab->len = 0; + reltab->data = malloc(sizeof(Elf32_Rela) * RELTAB_INIT_LEN); + + if (reltab->data == NULL) { + ERROR("cannot alloc"); + return M_ERROR; + } + + return M_SUCCESS; +} + +void reltab_free(struct relocation_table *reltab) +{ + free(reltab->data); +} + +int reltab_push(struct relocation_table *reltab, const Elf32_Rela rel) +{ + if (reltab->len >= reltab->size) { + reltab->size *= 2; + reltab->data = realloc(reltab->data, sizeof(Elf32_Rela) + * reltab->size); + + if (reltab->data == NULL) { + ERROR("cannot realloc"); + return M_ERROR; + } + } + + reltab->data[reltab->len++] = rel; + return M_SUCCESS; +} diff --git a/masm/sectab.c b/masm/sectab.c new file mode 100644 index 0000000..d07399f --- /dev/null +++ b/masm/sectab.c @@ -0,0 +1,166 @@ +#include <string.h> +#include <stdlib.h> +#include <mips.h> +#include <merror.h> +#include <mlimits.h> + +#include "asm.h" + +#define SECTBL_INIT_LEN 8 +static const char inital_section[MAX_LEX_LENGTH] = "data"; + +int sectab_init(struct section_table *sectab) +{ + sectab->size = SECTBL_INIT_LEN; + sectab->len = 0; + sectab->sections = malloc(sizeof(struct section) * SECTBL_INIT_LEN); + + if (sectab->sections == NULL) { + ERROR("cannot alloc"); + return M_ERROR; + } + + if (sectab_alloc(sectab, §ab->current, inital_section)) + return M_ERROR; + + return M_SUCCESS; +} + +void sectab_free(struct section_table *sectab) +{ + for (size_t i = 0; i < sectab->len; i++) { + reltab_free(§ab->sections[i].reltab); + free(sectab->sections[i].entries); + } + free(sectab->sections); +} + +struct section_settings { + const char *name; + bool read; + bool write; + bool execute; + size_t align; +}; + +static struct section_settings default_section_settings[] = { + {"data", true, true, false, 1}, + {"bss", true, true, false, 1}, + {"rodata", true, false, false, 1}, + {"text", true, false, true, 4}, +}; + +int sectab_alloc(struct section_table *sectab, struct section **res, + const char name[MAX_LEX_LENGTH]) +{ + if (sectab->len >= sectab->size) { + sectab->size *= 2; + sectab->sections = realloc(sectab->sections, + sizeof(struct section) * sectab->size); + + if (sectab->sections == NULL) { + ERROR("cannot realloc"); + return M_ERROR; + } + } + + /* set the sectio defaults */ + struct section *sec; + sec = §ab->sections[sectab->len]; + strcpy(sec->name,name); + sec->len = 0; + sec->size = SECTBL_INIT_LEN; + sec->alignment = 1; + sec->read = true; + sec->write = true; + sec->execute = false; + sec->index = sectab->len; + sec->entries = malloc(sizeof(struct section_entry) * SECTBL_INIT_LEN); + + if (reltab_init(&sec->reltab)) + return M_ERROR; + + /* overwrite the default if the given name has their own + * defaults */ + for (int i = 0; i < 4; i++) { + struct section_settings *set = &default_section_settings[i]; + if (strcmp(set->name, name) == 0) { + sec->read = set->read; + sec->write = set->write; + sec->execute = set->execute; + sec->alignment = set->align; + break; + } + } + + if (sec->entries == NULL) { + ERROR("cannot alloc"); + return M_ERROR; + } + + sectab->len++; + + *res = sec; + return M_SUCCESS; +} + +int sectab_get(struct section_table *sectab, struct section **sec, + const char name[MAX_LEX_LENGTH]) +{ + for (size_t i = 0; i < sectab->len; i++) { + struct section *temp = §ab->sections[i]; + if (strcmp(name, temp->name) == 0) { + if (sec != NULL) + *sec = temp; + return M_SUCCESS; + } + } + + return M_ERROR; +} + +int sec_push(struct section *section, struct section_entry entry) +{ + if (section->len >= section->size) { + section->size *= 2; + void *new = realloc(section->entries, + sizeof(struct section_entry) * section->size); + + if (new == NULL) { + ERROR("cannot realloc"); + return M_ERROR; + } + + section->entries = new; + } + + section->entries[section->len++] = entry; + + return M_SUCCESS; +} + +size_t sec_size(struct section *sec) +{ + size_t n = 0; + for (size_t i = 0; i < sec->len; i++) { + size_t t = sec->entries[i].size; + size_t m = t % sec->alignment; + if (m) + t += sec->alignment - m; + n += t; + } + return n; +} + +size_t sec_index(struct section *sec, size_t idx) +{ + size_t n = 0; + for (size_t i = 0; i < idx; i++) { + size_t t = sec->entries[i].size; + size_t m = t % sec->alignment; + if (m) + t += sec->alignment - m; + n += t; + } + return n; +} diff --git a/masm/sectbl.c b/masm/sectbl.c deleted file mode 100644 index 6eafc60..0000000 --- a/masm/sectbl.c +++ /dev/null @@ -1,159 +0,0 @@ -#include <string.h> -#include <stdlib.h> -#include <mips.h> -#include <merror.h> -#include <mlimits.h> - -#include "parse.h" - -#define SECTBL_INIT_LEN 8 -static const char inital_section[MAX_LEX_LENGTH] = "data"; - -int sectbl_init(struct section_table *sec_tbl) -{ - sec_tbl->len = SECTBL_INIT_LEN; - sec_tbl->count = 0; - sec_tbl->sections = malloc(sizeof(struct section) * SECTBL_INIT_LEN); - - if (sec_tbl->sections == NULL) { - ERROR("cannot alloc"); - return M_ERROR; - } - - if (sectbl_alloc(sec_tbl, &sec_tbl->current, inital_section)) - return M_ERROR; - - return M_SUCCESS; -} - -void sectbl_free(struct section_table *sec_tbl) -{ - for (uint32_t i = 0; i < sec_tbl->count; i++) { - free(sec_tbl->sections[i].entries); - } - free(sec_tbl->sections); -} - -struct section_settings { - const char *name; - bool read; - bool write; - bool execute; - uint32_t align; -}; - -static struct section_settings default_section_settings[] = { - {"data", true, true, false, 1}, - {"bss", true, true, false, 1}, - {"rodata", true, false, false, 1}, - {"text", true, false, true, 4}, -}; - -int sectbl_alloc(struct section_table *sec_tbl, struct section **sec, - const char name[MAX_LEX_LENGTH]) -{ - if (sec_tbl->count >= sec_tbl->len) { - sec_tbl->len *= 2; - sec_tbl->sections = realloc(sec_tbl->sections, - sizeof(struct section) * sec_tbl->len); - - if (sec_tbl->sections == NULL) { - ERROR("cannot realloc"); - return M_ERROR; - } - } - - struct section *temp; - temp = &sec_tbl->sections[sec_tbl->count]; - strcpy(temp->name,name); - temp->count = 0; - temp->len = SECTBL_INIT_LEN; - temp->alignment = 1; - temp->read = true; - temp->write = true; - temp->execute = false; - temp->index = sec_tbl->count; - temp->entries = malloc(sizeof(struct section_entry) * SECTBL_INIT_LEN); - - for (int i = 0; i < 4; i++) { - struct section_settings *set = &default_section_settings[i]; - if (strcmp(set->name, name) == 0) { - temp->read = set->read; - temp->write = set->write; - temp->execute = set->execute; - temp->alignment = set->align; - break; - } - } - - if (temp->entries == NULL) { - ERROR("cannot alloc"); - return M_ERROR; - } - - sec_tbl->count++; - - *sec = temp; - return M_SUCCESS; -} - -int sectbl_get(struct section_table *sec_tbl, struct section **sec, - const char name[MAX_LEX_LENGTH]) -{ - for (uint32_t i = 0; i < sec_tbl->count; i++) { - struct section *temp = &sec_tbl->sections[i]; - if (strcmp(name, temp->name) == 0) { - if (sec != NULL) - *sec = temp; - return M_SUCCESS; - } - } - - return M_ERROR; -} - -int sec_push(struct section *section, struct section_entry entry) -{ - if (section->count >= section->len) { - section->len *= 2; - void *new = realloc(section->entries, - sizeof(struct section_entry) * section->len); - - if (new == NULL) { - ERROR("cannot realloc"); - return M_ERROR; - } - - section->entries = new; - } - - section->entries[section->count++] = entry; - - return M_SUCCESS; -} - -size_t sec_size(struct section *sec) -{ - size_t n = 0; - for (uint32_t i = 0; i < sec->count; i++) { - size_t t = sec->entries[i].size; - size_t m = t % sec->alignment; - if (m) - t += sec->alignment - m; - n += t; - } - return n; -} - -size_t sec_index(struct section *sec, uint32_t idx) -{ - size_t n = 0; - for (uint32_t i = 0; i < idx; i++) { - size_t t = sec->entries[i].size; - size_t m = t % sec->alignment; - if (m) - t += sec->alignment - m; - n += t; - } - return n; -} diff --git a/masm/strtab.c b/masm/strtab.c new file mode 100644 index 0000000..57d3d0e --- /dev/null +++ b/masm/strtab.c @@ -0,0 +1,54 @@ +#include <merror.h> +#include <string.h> +#include <stdlib.h> + +#include "asm.h" + +int strtab_get_str(struct str_table *strtab, const char *str, size_t *res) +{ + for (size_t i = 0; i < strtab->size; i ++) { + if (strcmp(strtab->ptr + i, str) == 0) { + if (res != NULL) + *res = i; + return M_SUCCESS; + } + } + + return M_ERROR; +} + +int strtab_write_str(struct str_table *strtab, const char *str, size_t *res) +{ + if (strtab_get_str(strtab, str, res) == M_SUCCESS) + return M_SUCCESS; + + size_t len = strlen(str); + char *new = realloc(strtab->ptr, strtab->size + len + 1); + if (new == NULL) + return M_ERROR; + strtab->ptr = new; + memcpy(strtab->ptr + strtab->size, str, len + 1); + + if (res != NULL) + *res = strtab->size; + + strtab->size += len + 1; + return M_SUCCESS; +} + +int strtab_init(struct str_table *strtab) +{ + strtab->size = 1; + strtab->ptr = malloc(1); + if (strtab->ptr == NULL) { + ERROR("cannot alloc"); + return M_ERROR; + } + *strtab->ptr = '\0'; + return M_SUCCESS; +} + +void strtab_free(struct str_table *strtab) +{ + free(strtab->ptr); +} diff --git a/masm/strtbl.c b/masm/strtbl.c deleted file mode 100644 index 7bdbbea..0000000 --- a/masm/strtbl.c +++ /dev/null @@ -1,54 +0,0 @@ -#include <merror.h> -#include <string.h> -#include <stdlib.h> - -#include "asm.h" - -int strtbl_get_str(struct str_table *str_tbl, const char *str, size_t *res) -{ - for (size_t i = 0; i < str_tbl->size; i ++) { - if (strcmp(str_tbl->ptr + i, str) == 0) { - if (res != NULL) - *res = i; - return M_SUCCESS; - } - } - - return M_ERROR; -} - -int strtbl_write_str(struct str_table *str_tbl, const char *str, size_t *res) -{ - if (strtbl_get_str(str_tbl, str, res) == M_SUCCESS) - return M_SUCCESS; - - size_t len = strlen(str); - char *new = realloc(str_tbl->ptr, str_tbl->size + len + 1); - if (new == NULL) - return M_ERROR; - str_tbl->ptr = new; - memcpy(str_tbl->ptr + str_tbl->size, str, len + 1); - - if (res != NULL) - *res = str_tbl->size; - - str_tbl->size += len + 1; - return M_SUCCESS; -} - -int strtbl_init(struct str_table *str_tbl) -{ - str_tbl->size = 1; - str_tbl->ptr = malloc(1); - if (str_tbl->ptr == NULL) { - ERROR("cannot alloc"); - return M_ERROR; - } - *str_tbl->ptr = '\0'; - return M_SUCCESS; -} - -void strtbl_free(struct str_table *str_tbl) -{ - free(str_tbl->ptr); -} diff --git a/masm/symtab.c b/masm/symtab.c new file mode 100644 index 0000000..7d40609 --- /dev/null +++ b/masm/symtab.c @@ -0,0 +1,70 @@ +#include <elf.h> +#include <merror.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "asm.h" + +#define SYMTBL_INIT_LEN 24 + +int symtab_init(struct symbol_table *symtab) +{ + symtab->size = SYMTBL_INIT_LEN; + symtab->len = 0; + symtab->symbols = malloc(sizeof(Elf32_Sym) * SYMTBL_INIT_LEN); + symtab->sections = malloc(sizeof(ssize_t) * SYMTBL_INIT_LEN); + + if (symtab->symbols == NULL || symtab->sections == NULL) { + ERROR("cannot alloc"); + return M_ERROR; + } + + return M_SUCCESS; +} + +void symtab_free(struct symbol_table *symtab) +{ + free(symtab->symbols); + free(symtab->sections); +} + +int symtab_push(struct symbol_table *symtab, Elf32_Sym sym, ssize_t sec_idx) +{ + if (symtab->len >= symtab->size) { + symtab->size *= 2; + symtab->symbols = realloc(symtab->symbols, + sizeof(Elf32_Sym) * symtab->size); + symtab->sections = realloc(symtab->sections, + sizeof(ssize_t) * symtab->size); + if (symtab->symbols == NULL || symtab->sections == NULL) { + ERROR("cannot realloc"); + return M_ERROR; + } + } + + symtab->symbols[symtab->len] = sym; + symtab->sections[symtab->len++] = sec_idx; + return M_SUCCESS; +} + +int symtab_find(struct symbol_table *symtab, Elf32_Sym **ptr, + size_t *idx, const char name[MAX_LEX_LENGTH]) +{ + for (uint32_t i = 0; i < symtab->len; i++) { + Elf32_Sym *sym = &symtab->symbols[i]; + const char *str = &symtab->strtab->ptr[sym->st_name]; + if (strcmp(str, name) == 0) { + if (ptr != NULL) + *ptr = sym; + + ptrdiff_t diff = sym - symtab->symbols; + if (idx != NULL) + *idx = diff / sizeof(Elf32_Sym); + + return M_SUCCESS; + } + } + return M_ERROR; +} diff --git a/masm/symtbl.c b/masm/symtbl.c deleted file mode 100644 index 8aa7bcf..0000000 --- a/masm/symtbl.c +++ /dev/null @@ -1,57 +0,0 @@ -#include <merror.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "asm.h" - -#define SYMTBL_INIT_LEN 24 - -int symtbl_init(struct symbol_table *sym_tbl) -{ - sym_tbl->len = SYMTBL_INIT_LEN; - sym_tbl->count = 0; - sym_tbl->symbols = malloc(sizeof(struct symbol) * SYMTBL_INIT_LEN); - - if (sym_tbl->symbols == NULL) { - ERROR("cannot alloc"); - return M_ERROR; - } - - return M_SUCCESS; -} - -void symtbl_free(struct symbol_table *sym_tbl) -{ - free(sym_tbl->symbols); -} - -int symtbl_push(struct symbol_table *sym_tbl, struct symbol sym) -{ - if (sym_tbl->count >= sym_tbl->len) { - sym_tbl->len *= 2; - sym_tbl->symbols = realloc(sym_tbl->symbols, - sizeof(struct symbol) * sym_tbl->len); - if (sym_tbl->symbols == NULL) { - ERROR("cannot relloc"); - return M_ERROR; - } - } - - sym_tbl->symbols[sym_tbl->count++] = sym; - return M_SUCCESS; -} - -int symtbl_find(struct symbol_table *sym_tbl, struct symbol **ptr, - const char name[MAX_LEX_LENGTH]) -{ - for (uint32_t i = 0; i < sym_tbl->count; i++) { - struct symbol *sym = &sym_tbl->symbols[i]; - if (strcmp(sym->name, name) == 0) { - if (ptr != NULL) - *ptr = sym; - return M_SUCCESS; - } - } - return M_ERROR; -} |