From b663f827057fc9fb199293bc1920cf27315d1846 Mon Sep 17 00:00:00 2001 From: Freya Murphy Date: Wed, 9 Oct 2024 12:07:59 -0400 Subject: refactor elf32 assembler, add support for multiple isa's in cmdline --- masm/asm.c | 549 ---------------------------------------------------- masm/asm.h | 99 ---------- masm/asm/elf32.c | 567 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ masm/asm/elf32.h | 90 +++++++++ masm/asm/strtab.c | 54 ++++++ masm/main.c | 134 ++++++++++++- masm/masm.h | 45 +++++ masm/strtab.c | 54 ------ 8 files changed, 882 insertions(+), 710 deletions(-) delete mode 100644 masm/asm.c create mode 100644 masm/asm/elf32.c create mode 100644 masm/asm/elf32.h create mode 100644 masm/asm/strtab.c create mode 100644 masm/masm.h delete mode 100644 masm/strtab.c diff --git a/masm/asm.c b/masm/asm.c deleted file mode 100644 index 8cbc439..0000000 --- a/masm/asm.c +++ /dev/null @@ -1,549 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "asm.h" -#include "gen.h" -#include "tab.h" - -extern char *current_file; - -#define SYMSEC_STUB -1 -#define SYMSEC_EXTERN -1 - -#define SEC_ALIGN 0x1000 - -static int elf_rel_type(enum reference_type ty) { - switch (ty) { - case REF_NONE: - return R_MIPS_NONE; - case REF_MIPS_16: - return R_MIPS_16; - case REF_MIPS_26: - return R_MIPS_26; - case REF_MIPS_PC16: - return R_MIPS_PC16; - case REF_MIPS_LO16: - return R_MIPS_LO16; - case REF_MIPS_HI16: - return R_MIPS_HI16; - } - - return R_MIPS_NONE; -} - -static int elf_section_init_reltab(struct section *sec, - struct elf_section *elf_sec) -{ - Elf32_Rel *reltab = malloc(sizeof(Elf32_Rel) * - sec->reftab.len); - - if (reltab == NULL) { - PERROR("cannot alloc"); - return M_ERROR; - } - for (uint32_t i = 0; i < sec->reftab.len; i++) { - Elf32_Rel *rel = &reltab[i]; - struct reference *ref = &sec->reftab.references[i]; - rel->r_offset = B32(ref->offset); - int sym = ref->symbol->tabidx + 1; - int type = elf_rel_type(ref->type); - rel->r_info = B32(ELF32_R_INFO(sym, type)); - } - - elf_sec->reltab_len = sec->reftab.len; - elf_sec->reltab = reltab; - - return M_SUCCESS; -} - -static int elf_section_init(struct section *sec, struct elf_section *elf_sec) -{ - elf_sec->data = sec; - elf_sec->shdr_idx = 0; // dont know yet - elf_sec->reltab_shidx = 0; // dont know yet - elf_sec->reltab_len = sec->reftab.len; - elf_sec->reltab = NULL; - - if (sec->reftab.len && elf_section_init_reltab(sec, elf_sec)) - return M_ERROR; - - return M_SUCCESS; -} - -/* free an elf section */ -static void elf_section_free(struct elf_section *sec) -{ - if (sec->reltab != NULL) - free(sec->reltab); -} - -static int asm_init_sections(struct assembler *assembler) -{ - struct section *sections = assembler->gen.sections; - uint32_t len = assembler->gen.sections_len; - - struct elf_section *elftab = malloc(sizeof(struct elf_section) * len); - if (elftab == NULL) { - PERROR("cannot alloc"); - return M_ERROR; - } - - for (uint32_t i = 0; i < len; i++) { - struct elf_section *elfsec = &elftab[i]; - elfsec->data = §ions[i]; - if (elf_section_init(§ions[i], elfsec)) { - free(elftab); - return M_ERROR; - } - } - - assembler->sections = elftab; - assembler->section_len = len; - return M_SUCCESS; -} - -static int elf_sym_bind(enum symbol_type ty) { - switch (ty) { - case SYM_LOCAL: - return STB_LOCAL; - case SYM_GLOBAL: - return STB_GLOBAL; - case SYM_EXTERN: - return STB_GLOBAL; - } - - return STB_GLOBAL; -} - -static int asm_init_symtab(struct assembler *assembler) { - struct symbol_table *symtab = &assembler->gen.symtab; - size_t len = symtab->len + 1; - Elf32_Sym *elftab = malloc(sizeof(Elf32_Sym) * len); - if (elftab == NULL) { - PERROR("cannot alloc"); - } - - // add null entry - elftab[0] = (Elf32_Sym) {0}; - - // add rest of the entries - for (uint32_t i = 0; i < symtab->len; i++) { - struct symbol *sym = &symtab->symbols[i]; - int bind = elf_sym_bind(sym->type); - int type = STT_NOTYPE; - - // get name - size_t str_off; - if (strtab_write_str(&assembler->strtab, sym->name.str, - &str_off)) { - free(elftab); - return M_ERROR; - } - - elftab[i+1] = (Elf32_Sym) { - .st_name = B32(str_off), - .st_info = ELF32_ST_INFO(bind, type), - .st_size = 0, - .st_other = 0, - .st_value = B32(sym->offset), - .st_shndx = 0, - }; - } - - assembler->symbols = elftab; - assembler->symtab_len = len; - - return M_SUCCESS; -} - -static int parse_file(struct assembler *assembler) -{ - if (generate_mips32r6(&assembler->gen)) - return M_ERROR; - if (asm_init_sections(assembler)) - return M_ERROR; - if (asm_init_symtab(assembler)) - return M_ERROR; - return M_SUCCESS; -} - -static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res, - uint32_t *res2) -{ - uint32_t max_entries = 0; - max_entries += 1; // null - max_entries += 1; // symtab - max_entries += 1; // strtab - max_entries += 1; // shtrtab - max_entries += assembler->section_len; // sections - max_entries += assembler->section_len; // reltabs per section - - Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * max_entries); - - if (shdr == NULL) { - PERROR("cannot alloc"); - return M_ERROR; - } - - size_t str_off; - uint32_t count = 0; - - // null - shdr[count++] = (Elf32_Shdr) {0}; - - // reltables - for (uint32_t i = 0; i < assembler->section_len; i++) { - struct elf_section *sec = &assembler->sections[i]; - const char *prefix = ".reltab"; - char reltab_name[MAX_EXT_LENGTH + strlen(prefix)]; - - if (sec->reltab_len == 0) - continue; - - strcpy(reltab_name, prefix); - strncat(reltab_name, sec->data->name.str, - MAX_EXT_LENGTH - strlen(prefix)); - - if (strtab_write_str(&assembler->shstrtab, - reltab_name, &str_off)) { - free(shdr); - return M_ERROR; - } - - sec->reltab_shidx = count; - shdr[count++] = (Elf32_Shdr) { - .sh_name = B32(str_off), - .sh_type = B32(SHT_REL), - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = B32(1), - .sh_entsize = B32(sizeof(Elf32_Rel)), - }; - } - - // for each section - for (uint32_t i = 0; i < assembler->section_len; i++) { - struct elf_section *sec = &assembler->sections[i]; - const char *name = sec->data->name.str; - - if (strtab_write_str(&assembler->shstrtab, name, &str_off)) { - free(shdr); - return M_ERROR; - } - - sec->shdr_idx = count; - if (sec->reltab_len != 0) - shdr[sec->reltab_shidx].sh_info = B32(count); - - shdr[count++] = (Elf32_Shdr){ - .sh_name = B32(str_off), - .sh_type = B32(sec->data->execute ? - SHT_PROGBITS : SHT_NOBITS), - .sh_flags = B32( - (sec->data->write << 0) | - (sec->data->execute << 2) | - SHF_ALLOC), - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = B32(SEC_ALIGN), - .sh_entsize = 0, - }; - } - - // symbol table - if (strtab_write_str(&assembler->shstrtab, ".symtab", &str_off)) { - free(shdr); - return M_ERROR; - } - - assembler->symtab_shidx = count; - shdr[count++] = (Elf32_Shdr) { - .sh_name = B32(str_off), - .sh_type = B32(SHT_SYMTAB), - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 1, - .sh_info = 0, - .sh_addralign = B32(1), - .sh_entsize = B32(sizeof(Elf32_Sym)), - }; - - // string table - if (strtab_write_str(&assembler->shstrtab, ".strtab", &str_off)) { - free(shdr); - return M_ERROR; - } - - assembler->strtab_shidx = count; - shdr[count++] = (Elf32_Shdr) { - .sh_name = B32(str_off), - .sh_type = B32(SHT_STRTAB), - .sh_flags = B32(SHF_STRINGS), - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = B32(1), - .sh_entsize = 0, - }; - - // sh string table - if (strtab_write_str(&assembler->shstrtab, ".shstrtab", &str_off)) { - free(shdr); - return M_ERROR; - } - - assembler->shstrtab_shidx = count; - shdr[count++] = (Elf32_Shdr) { - .sh_name = B32(str_off), - .sh_type = B32(SHT_STRTAB), - .sh_flags = B32(SHF_STRINGS), - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = B32(1), - .sh_entsize = 0, - }; - - for (uint32_t i = 0; i < assembler->section_len; i++) { - struct elf_section *sec = &assembler->sections[i]; - if (sec->reltab_len == 0) - continue; - shdr[sec->reltab_shidx].sh_link = - B32(assembler->symtab_shidx); - } - - *res = shdr; - *res2 = count; - - return M_SUCCESS; -} - -static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr) -{ - Elf32_Shdr *shdr = (Elf32_Shdr *) assembler->shdr; - uint32_t ptr = 0; - - // we must now correct offets and sizes inside the ehdr, phdr, - // and shdr - ptr += sizeof(Elf32_Ehdr); - - // reltbls - for (uint32_t i = 0; i < assembler->section_len; i++) { - struct elf_section *sec = &assembler->sections[i]; - if (sec->reltab_len == 0) - continue; - int idx = sec->reltab_shidx; - int len = sec->reltab_len; - shdr[idx].sh_offset = B32(ptr); - shdr[idx].sh_size = B32(len * sizeof(Elf32_Rel)); - ptr += len * sizeof(Elf32_Rel); - } - - // sections - size_t v_addr = 0; - for (uint32_t i = 0; i < assembler->section_len; i++) { - - size_t pad = v_addr % SEC_ALIGN; - if (pad) - pad = SEC_ALIGN - pad; - v_addr += pad; - - struct elf_section *sec = &assembler->sections[i]; - uint32_t idx = sec->shdr_idx; - uint32_t size = sec->data->len; - shdr[idx].sh_offset = B32(ptr); - shdr[idx].sh_size = B32(size); - shdr[idx].sh_addr = B32(v_addr); - v_addr += size; - ptr += size; - } - - // symtab - { - uint32_t len = assembler->symtab_len; - uint32_t size = len * sizeof(Elf32_Sym); - shdr[assembler->symtab_shidx].sh_offset = B32(ptr); - shdr[assembler->symtab_shidx].sh_link = - B32(assembler->strtab_shidx); - shdr[assembler->symtab_shidx].sh_size = B32(size); - ptr += size; - } - - // strtab - shdr[assembler->strtab_shidx].sh_offset = B32(ptr); - shdr[assembler->strtab_shidx].sh_size = B32(assembler->strtab.size); - ptr += assembler->strtab.size; - - // shstrtab - shdr[assembler->shstrtab_shidx].sh_offset = B32(ptr); - shdr[assembler->shstrtab_shidx].sh_size = - B32(assembler->shstrtab.size); - ptr += assembler->shstrtab.size; - // shdr - ehdr->e_shoff = B32(ptr); -} - -static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr, - const char *path) -{ - FILE *out = fopen(path, "w"); - - if (out == NULL) - { - PERROR("cannot write '%s'", path); - return M_ERROR; - } - - // ehdr - fwrite(ehdr, sizeof(Elf32_Ehdr), 1, out); - - // reltbls - for (uint32_t i = 0; i < assembler->section_len; i++) { - struct elf_section *sec = &assembler->sections[i]; - void *ptr = sec->reltab; - int len = sec->reltab_len; - if (len < 1) - continue; - fwrite(ptr, sizeof(Elf32_Rel), len, out); - } - - // sections - for (uint32_t i = 0; i < assembler->section_len; i++) { - struct elf_section *sec = &assembler->sections[i]; - void *ptr = sec->data->data; - size_t size = sec->data->len; - fwrite(ptr, 1, size, out); - } - - // sym tbl - fwrite(assembler->symbols, sizeof(Elf32_Sym), assembler->symtab_len, - out); - - // str tbl - fwrite(assembler->strtab.ptr, 1, assembler->strtab.size, out); - - // shstr tbl - fwrite(assembler->shstrtab.ptr, 1, assembler->shstrtab.size, out); - - // shdr - fwrite(assembler->shdr, sizeof(Elf32_Shdr), assembler->shdr_len, out); - - // close - fclose(out); - - return M_SUCCESS; -} - -static void update_sym_shndx(struct assembler *assembler) -{ - for (uint32_t i = 1; i < assembler->symtab_len; i++) { - Elf32_Sym *esym = &assembler->symbols[i]; - struct symbol *sym = &assembler->gen.symtab.symbols[i - 1]; - - // get shindx - int shindx = 0; - if (sym->secidx != SYM_SEC_STUB) - shindx = assembler->sections[sym->secidx].shdr_idx; - else if (sym->type == SYM_EXTERN) - shindx = 0; - - esym->st_shndx = B16(shindx); - } -} - -static int assemble_elf(struct assembler *assembler, const char *out) -{ - if (assemble_shdr(assembler, &assembler->shdr, &assembler->shdr_len)) - return M_ERROR; - - Elf32_Ehdr ehdr = MIPS_ELF_EHDR; - ehdr.e_shnum = B16(assembler->shdr_len); - ehdr.e_shstrndx = B16(assembler->shstrtab_shidx); - update_offsets(assembler, &ehdr); - update_sym_shndx(assembler); - - if (write_file(assembler, &ehdr, out)) - return M_ERROR; - - return M_SUCCESS; -} - -int assemble_file(struct assembler_arguments args) -{ - struct assembler assembler; - int res = M_SUCCESS; - - current_file = args.in_file; - - if (assembler_init(&assembler, args.in_file)) - return M_ERROR; - - if (res == M_SUCCESS) - res = parse_file(&assembler); - - if (res == M_SUCCESS) - res = assemble_elf(&assembler, args.out_file); - - assembler_free(&assembler); - - return res; -} - -int assembler_init(struct assembler *assembler, const char *path) -{ - assembler->shdr = NULL; - assembler->symbols = NULL; - assembler->sections = NULL; - assembler->strtab.ptr = NULL; - assembler->shstrtab.ptr = NULL; - assembler->gen.sections = NULL; - assembler->gen.symtab.symbols = NULL; - assembler->section_len = 0; - - if (generator_init(path, &assembler->gen)) - return M_ERROR; - - if (strtab_init(&assembler->shstrtab)) - return M_ERROR; - - if (strtab_init(&assembler->strtab)) - return M_ERROR; - - return M_SUCCESS; -} - -void assembler_free(struct assembler *assembler) -{ - if (assembler->shdr) - free(assembler->shdr); - if (assembler->symbols) - free(assembler->symbols); - if (assembler->sections) { - for (uint32_t i = 0; i < assembler->section_len; i++) - elf_section_free(&assembler->sections[i]); - free(assembler->sections); - } - - strtab_free(&assembler->strtab); - strtab_free(&assembler->shstrtab); - generator_free(&assembler->gen); -} diff --git a/masm/asm.h b/masm/asm.h index fecd335..e69de29 100644 --- a/masm/asm.h +++ b/masm/asm.h @@ -1,99 +0,0 @@ -/* Copyright (c) 2024 Freya Murphy */ - -#ifndef __ASM_H__ -#define __ASM_H__ - -#include - -#include "gen.h" - -/// -/// ELF string table -/// - -struct elf_str_table { - // size of the ptr in bytes - size_t size; - - // pointer that contains - // the strings - char *ptr; -}; - -/* initalize a string table */ -int strtab_init(struct elf_str_table *strtab); - -/* free a string table */ -void strtab_free(struct elf_str_table *strtab); - -/* get a string form the string table */ -int strtab_get_str(struct elf_str_table *strtab, const char *str, size_t *res); - -/* get or append a string into the string table */ -int strtab_write_str(struct elf_str_table *strtab, const char *str, size_t *res); - -/// -/// elf section -/// - -/* holds a section of the asm file (i.e. .text, .bss, .data) */ -struct elf_section { - // section data *weak* pointer - struct section *data; - - // index of the section in - // the ELF shdr - size_t shdr_idx; - - // relocation table - size_t reltab_shidx; - uint32_t reltab_len; - Elf32_Rel *reltab; -}; - -/// -/// assembler -/// - -struct assembler { - // the code generator - struct generator gen; - - /// symbol table - size_t symtab_shidx; - size_t symtab_len; - Elf32_Sym *symbols; - - // sh string table - size_t strtab_shidx; - struct elf_str_table strtab; - - // string table - size_t shstrtab_shidx; - struct elf_str_table shstrtab; - - /// sections - uint32_t section_len; - struct elf_section *sections; - - /// section header - Elf32_Shdr *shdr; - uint32_t shdr_len; -}; - -/* defines arguments to the assembler */ -struct assembler_arguments { - char *in_file; - char *out_file; -}; - -/* initalize the assembler */ -int assembler_init(struct assembler *assembler, const char *path); - -/* free the assembler */ -void assembler_free(struct assembler *assembler); - -/* assemble a file */ -int assemble_file(struct assembler_arguments args); - -#endif /* __ASM_H__ */ diff --git a/masm/asm/elf32.c b/masm/asm/elf32.c new file mode 100644 index 0000000..e454273 --- /dev/null +++ b/masm/asm/elf32.c @@ -0,0 +1,567 @@ +#include +#include +#include +#include + +#include "../tab.h" +#include "../masm.h" +#include "elf32.h" + +extern char *current_file; + +#define SYMSEC_STUB -1 +#define SYMSEC_EXTERN -1 + +#define SEC_ALIGN 0x1000 + +static int elf_rel_type(enum reference_type ty) { + switch (ty) { + case REF_NONE: + return R_MIPS_NONE; + case REF_MIPS_16: + return R_MIPS_16; + case REF_MIPS_26: + return R_MIPS_26; + case REF_MIPS_PC16: + return R_MIPS_PC16; + case REF_MIPS_LO16: + return R_MIPS_LO16; + case REF_MIPS_HI16: + return R_MIPS_HI16; + } + + return R_MIPS_NONE; +} + +static int elf_section_init_reltab(struct section *sec, + struct elf_section *elf_sec) +{ + Elf32_Rel *reltab = malloc(sizeof(Elf32_Rel) * + sec->reftab.len); + + if (reltab == NULL) { + PERROR("cannot alloc"); + return M_ERROR; + } + for (uint32_t i = 0; i < sec->reftab.len; i++) { + Elf32_Rel *rel = &reltab[i]; + struct reference *ref = &sec->reftab.references[i]; + rel->r_offset = B32(ref->offset); + int sym = ref->symbol->tabidx + 1; + int type = elf_rel_type(ref->type); + rel->r_info = B32(ELF32_R_INFO(sym, type)); + } + + elf_sec->reltab_len = sec->reftab.len; + elf_sec->reltab = reltab; + + return M_SUCCESS; +} + +static int elf_section_init(struct section *sec, struct elf_section *elf_sec) +{ + elf_sec->data = sec; + elf_sec->shdr_idx = 0; // dont know yet + elf_sec->reltab_shidx = 0; // dont know yet + elf_sec->reltab_len = sec->reftab.len; + elf_sec->reltab = NULL; + + if (sec->reftab.len && elf_section_init_reltab(sec, elf_sec)) + return M_ERROR; + + return M_SUCCESS; +} + +/* free an elf section */ +static void elf_section_free(struct elf_section *sec) +{ + if (sec->reltab != NULL) + free(sec->reltab); +} + +static int asm_init_sections(struct elf_assembler *assembler) +{ + struct section *sections = assembler->gen->sections; + uint32_t len = assembler->gen->sections_len; + + struct elf_section *elftab = malloc(sizeof(struct elf_section) * len); + if (elftab == NULL) { + PERROR("cannot alloc"); + return M_ERROR; + } + + for (uint32_t i = 0; i < len; i++) { + struct elf_section *elfsec = &elftab[i]; + elfsec->data = §ions[i]; + if (elf_section_init(§ions[i], elfsec)) { + free(elftab); + return M_ERROR; + } + } + + assembler->sections = elftab; + assembler->section_len = len; + return M_SUCCESS; +} + +static int elf_sym_bind(enum symbol_type ty) { + switch (ty) { + case SYM_LOCAL: + return STB_LOCAL; + case SYM_GLOBAL: + return STB_GLOBAL; + case SYM_EXTERN: + return STB_GLOBAL; + } + + return STB_GLOBAL; +} + +static int asm_init_symtab(struct elf_assembler *assembler) { + struct symbol_table *symtab = &assembler->gen->symtab; + size_t len = symtab->len + 1; + Elf32_Sym *elftab = malloc(sizeof(Elf32_Sym) * len); + if (elftab == NULL) { + PERROR("cannot alloc"); + } + + // add null entry + elftab[0] = (Elf32_Sym) {0}; + + // add rest of the entries + for (uint32_t i = 0; i < symtab->len; i++) { + struct symbol *sym = &symtab->symbols[i]; + int bind = elf_sym_bind(sym->type); + int type = STT_NOTYPE; + + // get name + size_t str_off; + if (strtab_write_str(&assembler->strtab, sym->name.str, + &str_off)) { + free(elftab); + return M_ERROR; + } + + // check if symbol is undefined + if (sym->secidx == SYM_SEC_STUB) { + if (sym->type == SYM_LOCAL && + assembler->args->extern_undefined == false) { + ERROR("undefined symbol %s", sym->name.str); + return M_ERROR; + } + sym->secidx = 0; + bind = STB_GLOBAL; + } + + elftab[i+1] = (Elf32_Sym) { + .st_name = B32(str_off), + .st_info = ELF32_ST_INFO(bind, type), + .st_size = 0, + .st_other = 0, + .st_value = B32(sym->offset), + .st_shndx = 0, + }; + } + + assembler->symbols = elftab; + assembler->symtab_len = len; + + return M_SUCCESS; +} + +static int assemble_shdr(struct elf_assembler *assembler, Elf32_Shdr **res, + uint32_t *res2) +{ + uint32_t max_entries = 0; + max_entries += 1; // null + max_entries += 1; // symtab + max_entries += 1; // strtab + max_entries += 1; // shtrtab + max_entries += assembler->section_len; // sections + max_entries += assembler->section_len; // reltabs per section + + Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * max_entries); + + if (shdr == NULL) { + PERROR("cannot alloc"); + return M_ERROR; + } + + size_t str_off; + uint32_t count = 0; + + // null + shdr[count++] = (Elf32_Shdr) {0}; + + // reltables + for (uint32_t i = 0; i < assembler->section_len; i++) { + struct elf_section *sec = &assembler->sections[i]; + const char *prefix = ".reltab"; + char reltab_name[MAX_EXT_LENGTH + strlen(prefix)]; + + if (sec->reltab_len == 0) + continue; + + strcpy(reltab_name, prefix); + strncat(reltab_name, sec->data->name.str, + MAX_EXT_LENGTH - strlen(prefix)); + + if (strtab_write_str(&assembler->shstrtab, + reltab_name, &str_off)) { + free(shdr); + return M_ERROR; + } + + sec->reltab_shidx = count; + shdr[count++] = (Elf32_Shdr) { + .sh_name = B32(str_off), + .sh_type = B32(SHT_REL), + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = B32(1), + .sh_entsize = B32(sizeof(Elf32_Rel)), + }; + } + + // for each section + for (uint32_t i = 0; i < assembler->section_len; i++) { + struct elf_section *sec = &assembler->sections[i]; + const char *name = sec->data->name.str; + + if (strtab_write_str(&assembler->shstrtab, name, &str_off)) { + free(shdr); + return M_ERROR; + } + + sec->shdr_idx = count; + if (sec->reltab_len != 0) + shdr[sec->reltab_shidx].sh_info = B32(count); + + shdr[count++] = (Elf32_Shdr){ + .sh_name = B32(str_off), + .sh_type = B32(sec->data->execute ? + SHT_PROGBITS : SHT_NOBITS), + .sh_flags = B32( + (sec->data->write << 0) | + (sec->data->execute << 2) | + SHF_ALLOC), + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = B32(SEC_ALIGN), + .sh_entsize = 0, + }; + } + + // symbol table + if (strtab_write_str(&assembler->shstrtab, ".symtab", &str_off)) { + free(shdr); + return M_ERROR; + } + + assembler->symtab_shidx = count; + shdr[count++] = (Elf32_Shdr) { + .sh_name = B32(str_off), + .sh_type = B32(SHT_SYMTAB), + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 1, + .sh_info = 0, + .sh_addralign = B32(1), + .sh_entsize = B32(sizeof(Elf32_Sym)), + }; + + // string table + if (strtab_write_str(&assembler->shstrtab, ".strtab", &str_off)) { + free(shdr); + return M_ERROR; + } + + assembler->strtab_shidx = count; + shdr[count++] = (Elf32_Shdr) { + .sh_name = B32(str_off), + .sh_type = B32(SHT_STRTAB), + .sh_flags = B32(SHF_STRINGS), + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = B32(1), + .sh_entsize = 0, + }; + + // sh string table + if (strtab_write_str(&assembler->shstrtab, ".shstrtab", &str_off)) { + free(shdr); + return M_ERROR; + } + + assembler->shstrtab_shidx = count; + shdr[count++] = (Elf32_Shdr) { + .sh_name = B32(str_off), + .sh_type = B32(SHT_STRTAB), + .sh_flags = B32(SHF_STRINGS), + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = B32(1), + .sh_entsize = 0, + }; + + for (uint32_t i = 0; i < assembler->section_len; i++) { + struct elf_section *sec = &assembler->sections[i]; + if (sec->reltab_len == 0) + continue; + shdr[sec->reltab_shidx].sh_link = + B32(assembler->symtab_shidx); + } + + *res = shdr; + *res2 = count; + + return M_SUCCESS; +} + +static void update_offsets(struct elf_assembler *assembler, Elf32_Ehdr *ehdr) +{ + Elf32_Shdr *shdr = (Elf32_Shdr *) assembler->shdr; + uint32_t ptr = 0; + + // we must now correct offets and sizes inside the ehdr, phdr, + // and shdr + ptr += sizeof(Elf32_Ehdr); + + // reltbls + for (uint32_t i = 0; i < assembler->section_len; i++) { + struct elf_section *sec = &assembler->sections[i]; + if (sec->reltab_len == 0) + continue; + int idx = sec->reltab_shidx; + int len = sec->reltab_len; + shdr[idx].sh_offset = B32(ptr); + shdr[idx].sh_size = B32(len * sizeof(Elf32_Rel)); + ptr += len * sizeof(Elf32_Rel); + } + + // sections + size_t v_addr = 0; + for (uint32_t i = 0; i < assembler->section_len; i++) { + + size_t pad = v_addr % SEC_ALIGN; + if (pad) + pad = SEC_ALIGN - pad; + v_addr += pad; + + struct elf_section *sec = &assembler->sections[i]; + uint32_t idx = sec->shdr_idx; + uint32_t size = sec->data->len; + shdr[idx].sh_offset = B32(ptr); + shdr[idx].sh_size = B32(size); + shdr[idx].sh_addr = B32(v_addr); + v_addr += size; + ptr += size; + } + + // symtab + { + uint32_t len = assembler->symtab_len; + uint32_t size = len * sizeof(Elf32_Sym); + shdr[assembler->symtab_shidx].sh_offset = B32(ptr); + shdr[assembler->symtab_shidx].sh_link = + B32(assembler->strtab_shidx); + shdr[assembler->symtab_shidx].sh_size = B32(size); + ptr += size; + } + + // strtab + shdr[assembler->strtab_shidx].sh_offset = B32(ptr); + shdr[assembler->strtab_shidx].sh_size = B32(assembler->strtab.size); + ptr += assembler->strtab.size; + + // shstrtab + shdr[assembler->shstrtab_shidx].sh_offset = B32(ptr); + shdr[assembler->shstrtab_shidx].sh_size = + B32(assembler->shstrtab.size); + ptr += assembler->shstrtab.size; + // shdr + ehdr->e_shoff = B32(ptr); +} + +static int write_file(struct elf_assembler *assembler, Elf32_Ehdr *ehdr, + const char *path) +{ + FILE *out = fopen(path, "w"); + + if (out == NULL) + { + PERROR("cannot write '%s'", path); + return M_ERROR; + } + + // ehdr + fwrite(ehdr, sizeof(Elf32_Ehdr), 1, out); + + // reltbls + for (uint32_t i = 0; i < assembler->section_len; i++) { + struct elf_section *sec = &assembler->sections[i]; + void *ptr = sec->reltab; + int len = sec->reltab_len; + if (len < 1) + continue; + fwrite(ptr, sizeof(Elf32_Rel), len, out); + } + + // sections + for (uint32_t i = 0; i < assembler->section_len; i++) { + struct elf_section *sec = &assembler->sections[i]; + void *ptr = sec->data->data; + size_t size = sec->data->len; + fwrite(ptr, 1, size, out); + } + + // sym tbl + fwrite(assembler->symbols, sizeof(Elf32_Sym), assembler->symtab_len, + out); + + // str tbl + fwrite(assembler->strtab.ptr, 1, assembler->strtab.size, out); + + // shstr tbl + fwrite(assembler->shstrtab.ptr, 1, assembler->shstrtab.size, out); + + // shdr + fwrite(assembler->shdr, sizeof(Elf32_Shdr), assembler->shdr_len, out); + + // close + fclose(out); + + return M_SUCCESS; +} + +static void update_sym_shndx(struct elf_assembler *assembler) +{ + for (uint32_t i = 1; i < assembler->symtab_len; i++) { + Elf32_Sym *esym = &assembler->symbols[i]; + struct symbol *sym = &assembler->gen->symtab.symbols[i - 1]; + + // get shindx + int shindx = 0; + if (sym->secidx != SYM_SEC_STUB) + shindx = assembler->sections[sym->secidx].shdr_idx; + else if (sym->type == SYM_EXTERN) + shindx = 0; + + esym->st_shndx = B16(shindx); + } +} + +static int assemble_elf(struct elf_assembler *assembler, const char *out) +{ + if (asm_init_sections(assembler)) + return M_ERROR; + + if (asm_init_symtab(assembler)) + return M_ERROR; + + if (assemble_shdr(assembler, &assembler->shdr, &assembler->shdr_len)) + return M_ERROR; + + // get ehdr flags + uint32_t flags = EF_MIPS_NAN2008; + switch (assembler->args->isa) { + case ISA_MIPS1: + flags |= EF_MIPS_ARCH_1; + break; + case ISA_MIPS32R2: + flags |= EF_MIPS_ARCH_32R2; + break; + case ISA_MIPS32R6: + flags |= EF_MIPS_ARCH_32R6; + break; + } + switch (assembler->args->abi) { + case ABI_O32: + flags |= EF_MIPS_ABI_O32; + break; + case ABI_NONE: + break; + } + + Elf32_Ehdr ehdr = MIPS_ELF_EHDR; + ehdr.e_shnum = B16(assembler->shdr_len); + ehdr.e_shstrndx = B16(assembler->shstrtab_shidx); + ehdr.e_flags = B32(flags); + update_offsets(assembler, &ehdr); + update_sym_shndx(assembler); + + if (write_file(assembler, &ehdr, out)) + return M_ERROR; + + return M_SUCCESS; +} + +static int assembler_init(struct elf_assembler *assembler, + struct generator *gen, + struct arguments *args) +{ + assembler->args = args; + assembler->gen = gen; + + assembler->shdr = NULL; + assembler->symbols = NULL; + assembler->sections = NULL; + assembler->strtab.ptr = NULL; + assembler->shstrtab.ptr = NULL; + assembler->section_len = 0; + + if (strtab_init(&assembler->shstrtab)) + return M_ERROR; + + if (strtab_init(&assembler->strtab)) + return M_ERROR; + + return M_SUCCESS; +} + +static void assembler_free(struct elf_assembler *assembler) +{ + if (assembler->shdr) + free(assembler->shdr); + if (assembler->symbols) + free(assembler->symbols); + if (assembler->sections) { + for (uint32_t i = 0; i < assembler->section_len; i++) + elf_section_free(&assembler->sections[i]); + free(assembler->sections); + } + + strtab_free(&assembler->strtab); + strtab_free(&assembler->shstrtab); +} + +int assemble_elf32(struct generator *gen, struct arguments *args) +{ + struct elf_assembler assembler; + int res = M_SUCCESS; + current_file = args->in_file; + + if (assembler_init(&assembler, gen, args)) + return M_ERROR; + + res = assemble_elf(&assembler, args->out_file); + assembler_free(&assembler); + + return res; +} + diff --git a/masm/asm/elf32.h b/masm/asm/elf32.h new file mode 100644 index 0000000..76aeb19 --- /dev/null +++ b/masm/asm/elf32.h @@ -0,0 +1,90 @@ +/* Copyright (c) 2024 Freya Murphy */ + +#ifndef __ELF32_H__ +#define __ELF32_H__ + +#include + +#include "../gen.h" +#include "../masm.h" + +/// +/// ELF string table +/// + +struct elf_str_table { + // size of the ptr in bytes + size_t size; + + // pointer that contains + // the strings + char *ptr; +}; + +/* initalize a string table */ +int strtab_init(struct elf_str_table *strtab); + +/* free a string table */ +void strtab_free(struct elf_str_table *strtab); + +/* get a string form the string table */ +int strtab_get_str(struct elf_str_table *strtab, const char *str, size_t *res); + +/* get or append a string into the string table */ +int strtab_write_str(struct elf_str_table *strtab, const char *str, size_t *res); + +/// +/// elf section +/// + +/* holds a section of the asm file (i.e. .text, .bss, .data) */ +struct elf_section { + // section data *weak* pointer + struct section *data; + + // index of the section in + // the ELF shdr + size_t shdr_idx; + + // relocation table + size_t reltab_shidx; + uint32_t reltab_len; + Elf32_Rel *reltab; +}; + +/// +/// assembler +/// + +struct elf_assembler { + // arguments passed in + struct arguments *args; + + // the code generator + struct generator *gen; + + /// symbol table + size_t symtab_shidx; + size_t symtab_len; + Elf32_Sym *symbols; + + // sh string table + size_t strtab_shidx; + struct elf_str_table strtab; + + // string table + size_t shstrtab_shidx; + struct elf_str_table shstrtab; + + /// sections + uint32_t section_len; + struct elf_section *sections; + + /// section header + Elf32_Shdr *shdr; + uint32_t shdr_len; +}; + +int assemble_elf32(struct generator *gen, struct arguments *args); + +#endif /* __ELF32_H__ */ diff --git a/masm/asm/strtab.c b/masm/asm/strtab.c new file mode 100644 index 0000000..799f0dc --- /dev/null +++ b/masm/asm/strtab.c @@ -0,0 +1,54 @@ +#include +#include +#include + +#include "elf32.h" + +int strtab_get_str(struct elf_str_table *strtab, const char *str, size_t *res) +{ + for (size_t i = 0; i < strtab->size; i ++) { + if (strcmp(strtab->ptr + i, str) == 0) { + if (res != NULL) + *res = i; + return M_SUCCESS; + } + } + + return M_ERROR; +} + +int strtab_write_str(struct elf_str_table *strtab, const char *str, size_t *res) +{ + if (strtab_get_str(strtab, str, res) == M_SUCCESS) + return M_SUCCESS; + + size_t len = strlen(str); + char *new = realloc(strtab->ptr, strtab->size + len + 1); + if (new == NULL) + return M_ERROR; + strtab->ptr = new; + memcpy(strtab->ptr + strtab->size, str, len + 1); + + if (res != NULL) + *res = strtab->size; + + strtab->size += len + 1; + return M_SUCCESS; +} + +int strtab_init(struct elf_str_table *strtab) +{ + strtab->size = 1; + strtab->ptr = malloc(1); + if (strtab->ptr == NULL) { + PERROR("cannot alloc"); + return M_ERROR; + } + *strtab->ptr = '\0'; + return M_SUCCESS; +} + +void strtab_free(struct elf_str_table *strtab) +{ + free(strtab->ptr); +} diff --git a/masm/main.c b/masm/main.c index caa8420..a97d949 100644 --- a/masm/main.c +++ b/masm/main.c @@ -1,33 +1,151 @@ #include #include #include +#include -#include "asm.h" +#include "gen.h" +#include "masm.h" +#include "asm/elf32.h" void help(void) { - printf("usage: masm [options] source.asm\n\n"); - printf("options:\n"); - printf("\t-h\t\tprints this help message\n"); - printf("\t-o \tselect a output file destination\n"); + printf( +"usage: masm [options] source.asm\n" +"\n" +"options: \n" +" -h print the help message \n" +" -g assume undefined symbols are external\n" +" -o specify the object file output name \n" +" -a specify mips abi used [none, o32] \n" +" default: o32 \n" +" -i mips machine isa to assemble for [mips1, mips32r2, mips32r6] \n" +" default: mips32r6\n" +" -f specify the object file format [elf32] \n" +" defualt: elf32\n" + ); +} + +static int read_isa(enum isa *isa, const char *str) +{ + #define __ISA_CHK(name) \ + if (strcasecmp(#name, str) == 0) { \ + *isa = ISA_ ##name; \ + return M_SUCCESS; \ + } \ + + __ISA_CHK(MIPS1); + __ISA_CHK(MIPS32R2); + __ISA_CHK(MIPS32R6); + + ERROR("invalid isa '%s'", str); + return M_ERROR; +} + +static int read_abi(enum abi *abi, const char *str) +{ + #define __ABI_CHK(name) \ + if (strcasecmp(#name, str) == 0) { \ + *abi = ABI_ ##name; \ + return M_SUCCESS; \ + } \ + + + __ABI_CHK(O32); + __ABI_CHK(NONE); + + ERROR("invalid abi '%s'", str); + return M_ERROR; +} + +static int read_format(enum format *format, const char *str) +{ + #define __FORMAT_CHK(name) \ + if (strcasecmp(#name, str) == 0) { \ + *format = FORMAT_ ##name; \ + return M_SUCCESS; \ + } \ + + + __FORMAT_CHK(ELF32); + + ERROR("invalid format '%s'", str); + return M_ERROR; +} + +static int generate(struct generator *gen, struct arguments *args) +{ + if (generator_init(args->in_file, gen)) + return M_ERROR; + + switch (args->isa) { + case ISA_MIPS1: + return generate_mips1(gen); + case ISA_MIPS32R2: + return generate_mips32r2(gen); + case ISA_MIPS32R6: + return generate_mips32r6(gen); + } + + return M_ERROR; +} + +static int assemble(struct arguments *args) +{ + struct generator gen; + int res = M_SUCCESS; + + if (generate(&gen, args)) + return M_ERROR; + + switch (args->format) { + case FORMAT_ELF32: + res = assemble_elf32(&gen, args); + break; + default: + res = M_ERROR; + break; + } + + generator_free(&gen); + + return res; } int main(int argc, char **argv) { - struct assembler_arguments args = { + struct arguments args = { .in_file = NULL, .out_file = "out.o", + .extern_undefined = false, + .isa = ISA_MIPS32R6, + .abi = ABI_O32, + .format = FORMAT_ELF32 }; int c; - while ((c = getopt(argc, argv, "ho:")) != 1) { + while ((c = getopt(argc, argv, "hgo:a:i:f:")) != 1) { switch(c) { case 'h': help(); return M_SUCCESS; + case 'g': + args.extern_undefined = true; + break; case 'o': args.out_file = optarg; break; + case 'a': + if (read_abi(&args.abi, optarg)) + return M_ERROR; + break; + case 'i': + if (read_isa(&args.isa, optarg)) + return M_ERROR; + break; + case 'f': + if (read_format(&args.format, optarg)) + return M_ERROR; + break; case '?': return M_ERROR; default: @@ -48,5 +166,5 @@ next: args.in_file = argv[optind]; - return assemble_file(args); + return assemble(&args); } diff --git a/masm/masm.h b/masm/masm.h new file mode 100644 index 0000000..75a63f8 --- /dev/null +++ b/masm/masm.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2024 Freya Murphy */ + +#ifndef __MASM_H__ +#define __MASM_H__ + +// isa to asemble for +enum isa { + ISA_MIPS1, // a.k.a mipsR2000 + ISA_MIPS32R2, + ISA_MIPS32R6, +}; + +// abi to mark output object +enum abi { + ABI_O32, // mips o32 abi + ABI_NONE, // no flag output +}; + +// format for the object file +enum format { + FORMAT_ELF32, +}; + +// defines arguments +struct arguments { + // files to read from and + // write to + char *in_file; + char *out_file; + + // if undefined symbols should + // be treated as extern + bool extern_undefined; + + // isa to assemble for + enum isa isa; + + // abi to mark object + enum abi abi; + + // format to output + enum format format; +}; + +#endif /* __ASM_H__ */ diff --git a/masm/strtab.c b/masm/strtab.c deleted file mode 100644 index bd914b0..0000000 --- a/masm/strtab.c +++ /dev/null @@ -1,54 +0,0 @@ -#include -#include -#include - -#include "asm.h" - -int strtab_get_str(struct elf_str_table *strtab, const char *str, size_t *res) -{ - for (size_t i = 0; i < strtab->size; i ++) { - if (strcmp(strtab->ptr + i, str) == 0) { - if (res != NULL) - *res = i; - return M_SUCCESS; - } - } - - return M_ERROR; -} - -int strtab_write_str(struct elf_str_table *strtab, const char *str, size_t *res) -{ - if (strtab_get_str(strtab, str, res) == M_SUCCESS) - return M_SUCCESS; - - size_t len = strlen(str); - char *new = realloc(strtab->ptr, strtab->size + len + 1); - if (new == NULL) - return M_ERROR; - strtab->ptr = new; - memcpy(strtab->ptr + strtab->size, str, len + 1); - - if (res != NULL) - *res = strtab->size; - - strtab->size += len + 1; - return M_SUCCESS; -} - -int strtab_init(struct elf_str_table *strtab) -{ - strtab->size = 1; - strtab->ptr = malloc(1); - if (strtab->ptr == NULL) { - PERROR("cannot alloc"); - return M_ERROR; - } - *strtab->ptr = '\0'; - return M_SUCCESS; -} - -void strtab_free(struct elf_str_table *strtab) -{ - free(strtab->ptr); -} -- cgit v1.2.3-freya