From b663f827057fc9fb199293bc1920cf27315d1846 Mon Sep 17 00:00:00 2001 From: Freya Murphy Date: Wed, 9 Oct 2024 12:07:59 -0400 Subject: [PATCH] refactor elf32 assembler, add support for multiple isa's in cmdline --- masm/asm.h | 99 -------------------------- masm/{asm.c => asm/elf32.c} | 132 ++++++++++++++++++++--------------- masm/asm/elf32.h | 90 ++++++++++++++++++++++++ masm/{ => asm}/strtab.c | 2 +- masm/main.c | 134 +++++++++++++++++++++++++++++++++--- masm/masm.h | 45 ++++++++++++ 6 files changed, 337 insertions(+), 165 deletions(-) rename masm/{asm.c => asm/elf32.c} (85%) create mode 100644 masm/asm/elf32.h rename masm/{ => asm}/strtab.c (98%) create mode 100644 masm/masm.h diff --git a/masm/asm.h b/masm/asm.h index fecd335..e69de29 100644 --- a/masm/asm.h +++ b/masm/asm.h @@ -1,99 +0,0 @@ -/* Copyright (c) 2024 Freya Murphy */ - -#ifndef __ASM_H__ -#define __ASM_H__ - -#include - -#include "gen.h" - -/// -/// ELF string table -/// - -struct elf_str_table { - // size of the ptr in bytes - size_t size; - - // pointer that contains - // the strings - char *ptr; -}; - -/* initalize a string table */ -int strtab_init(struct elf_str_table *strtab); - -/* free a string table */ -void strtab_free(struct elf_str_table *strtab); - -/* get a string form the string table */ -int strtab_get_str(struct elf_str_table *strtab, const char *str, size_t *res); - -/* get or append a string into the string table */ -int strtab_write_str(struct elf_str_table *strtab, const char *str, size_t *res); - -/// -/// elf section -/// - -/* holds a section of the asm file (i.e. .text, .bss, .data) */ -struct elf_section { - // section data *weak* pointer - struct section *data; - - // index of the section in - // the ELF shdr - size_t shdr_idx; - - // relocation table - size_t reltab_shidx; - uint32_t reltab_len; - Elf32_Rel *reltab; -}; - -/// -/// assembler -/// - -struct assembler { - // the code generator - struct generator gen; - - /// symbol table - size_t symtab_shidx; - size_t symtab_len; - Elf32_Sym *symbols; - - // sh string table - size_t strtab_shidx; - struct elf_str_table strtab; - - // string table - size_t shstrtab_shidx; - struct elf_str_table shstrtab; - - /// sections - uint32_t section_len; - struct elf_section *sections; - - /// section header - Elf32_Shdr *shdr; - uint32_t shdr_len; -}; - -/* defines arguments to the assembler */ -struct assembler_arguments { - char *in_file; - char *out_file; -}; - -/* initalize the assembler */ -int assembler_init(struct assembler *assembler, const char *path); - -/* free the assembler */ -void assembler_free(struct assembler *assembler); - -/* assemble a file */ -int assemble_file(struct assembler_arguments args); - -#endif /* __ASM_H__ */ diff --git a/masm/asm.c b/masm/asm/elf32.c similarity index 85% rename from masm/asm.c rename to masm/asm/elf32.c index 8cbc439..e454273 100644 --- a/masm/asm.c +++ b/masm/asm/elf32.c @@ -1,15 +1,11 @@ #include -#include -#include #include #include -#include -#include #include -#include "asm.h" -#include "gen.h" -#include "tab.h" +#include "../tab.h" +#include "../masm.h" +#include "elf32.h" extern char *current_file; @@ -83,10 +79,10 @@ static void elf_section_free(struct elf_section *sec) free(sec->reltab); } -static int asm_init_sections(struct assembler *assembler) +static int asm_init_sections(struct elf_assembler *assembler) { - struct section *sections = assembler->gen.sections; - uint32_t len = assembler->gen.sections_len; + struct section *sections = assembler->gen->sections; + uint32_t len = assembler->gen->sections_len; struct elf_section *elftab = malloc(sizeof(struct elf_section) * len); if (elftab == NULL) { @@ -121,8 +117,8 @@ static int elf_sym_bind(enum symbol_type ty) { return STB_GLOBAL; } -static int asm_init_symtab(struct assembler *assembler) { - struct symbol_table *symtab = &assembler->gen.symtab; +static int asm_init_symtab(struct elf_assembler *assembler) { + struct symbol_table *symtab = &assembler->gen->symtab; size_t len = symtab->len + 1; Elf32_Sym *elftab = malloc(sizeof(Elf32_Sym) * len); if (elftab == NULL) { @@ -146,6 +142,17 @@ static int asm_init_symtab(struct assembler *assembler) { return M_ERROR; } + // check if symbol is undefined + if (sym->secidx == SYM_SEC_STUB) { + if (sym->type == SYM_LOCAL && + assembler->args->extern_undefined == false) { + ERROR("undefined symbol %s", sym->name.str); + return M_ERROR; + } + sym->secidx = 0; + bind = STB_GLOBAL; + } + elftab[i+1] = (Elf32_Sym) { .st_name = B32(str_off), .st_info = ELF32_ST_INFO(bind, type), @@ -162,18 +169,7 @@ static int asm_init_symtab(struct assembler *assembler) { return M_SUCCESS; } -static int parse_file(struct assembler *assembler) -{ - if (generate_mips32r6(&assembler->gen)) - return M_ERROR; - if (asm_init_sections(assembler)) - return M_ERROR; - if (asm_init_symtab(assembler)) - return M_ERROR; - return M_SUCCESS; -} - -static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res, +static int assemble_shdr(struct elf_assembler *assembler, Elf32_Shdr **res, uint32_t *res2) { uint32_t max_entries = 0; @@ -337,7 +333,7 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res, return M_SUCCESS; } -static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr) +static void update_offsets(struct elf_assembler *assembler, Elf32_Ehdr *ehdr) { Elf32_Shdr *shdr = (Elf32_Shdr *) assembler->shdr; uint32_t ptr = 0; @@ -402,7 +398,7 @@ static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr) ehdr->e_shoff = B32(ptr); } -static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr, +static int write_file(struct elf_assembler *assembler, Elf32_Ehdr *ehdr, const char *path) { FILE *out = fopen(path, "w"); @@ -453,11 +449,11 @@ static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr, return M_SUCCESS; } -static void update_sym_shndx(struct assembler *assembler) +static void update_sym_shndx(struct elf_assembler *assembler) { for (uint32_t i = 1; i < assembler->symtab_len; i++) { Elf32_Sym *esym = &assembler->symbols[i]; - struct symbol *sym = &assembler->gen.symtab.symbols[i - 1]; + struct symbol *sym = &assembler->gen->symtab.symbols[i - 1]; // get shindx int shindx = 0; @@ -470,14 +466,42 @@ static void update_sym_shndx(struct assembler *assembler) } } -static int assemble_elf(struct assembler *assembler, const char *out) +static int assemble_elf(struct elf_assembler *assembler, const char *out) { + if (asm_init_sections(assembler)) + return M_ERROR; + + if (asm_init_symtab(assembler)) + return M_ERROR; + if (assemble_shdr(assembler, &assembler->shdr, &assembler->shdr_len)) return M_ERROR; + // get ehdr flags + uint32_t flags = EF_MIPS_NAN2008; + switch (assembler->args->isa) { + case ISA_MIPS1: + flags |= EF_MIPS_ARCH_1; + break; + case ISA_MIPS32R2: + flags |= EF_MIPS_ARCH_32R2; + break; + case ISA_MIPS32R6: + flags |= EF_MIPS_ARCH_32R6; + break; + } + switch (assembler->args->abi) { + case ABI_O32: + flags |= EF_MIPS_ABI_O32; + break; + case ABI_NONE: + break; + } + Elf32_Ehdr ehdr = MIPS_ELF_EHDR; ehdr.e_shnum = B16(assembler->shdr_len); ehdr.e_shstrndx = B16(assembler->shstrtab_shidx); + ehdr.e_flags = B32(flags); update_offsets(assembler, &ehdr); update_sym_shndx(assembler); @@ -487,41 +511,20 @@ static int assemble_elf(struct assembler *assembler, const char *out) return M_SUCCESS; } -int assemble_file(struct assembler_arguments args) +static int assembler_init(struct elf_assembler *assembler, + struct generator *gen, + struct arguments *args) { - struct assembler assembler; - int res = M_SUCCESS; + assembler->args = args; + assembler->gen = gen; - current_file = args.in_file; - - if (assembler_init(&assembler, args.in_file)) - return M_ERROR; - - if (res == M_SUCCESS) - res = parse_file(&assembler); - - if (res == M_SUCCESS) - res = assemble_elf(&assembler, args.out_file); - - assembler_free(&assembler); - - return res; -} - -int assembler_init(struct assembler *assembler, const char *path) -{ assembler->shdr = NULL; assembler->symbols = NULL; assembler->sections = NULL; assembler->strtab.ptr = NULL; assembler->shstrtab.ptr = NULL; - assembler->gen.sections = NULL; - assembler->gen.symtab.symbols = NULL; assembler->section_len = 0; - if (generator_init(path, &assembler->gen)) - return M_ERROR; - if (strtab_init(&assembler->shstrtab)) return M_ERROR; @@ -531,7 +534,7 @@ int assembler_init(struct assembler *assembler, const char *path) return M_SUCCESS; } -void assembler_free(struct assembler *assembler) +static void assembler_free(struct elf_assembler *assembler) { if (assembler->shdr) free(assembler->shdr); @@ -545,5 +548,20 @@ void assembler_free(struct assembler *assembler) strtab_free(&assembler->strtab); strtab_free(&assembler->shstrtab); - generator_free(&assembler->gen); } + +int assemble_elf32(struct generator *gen, struct arguments *args) +{ + struct elf_assembler assembler; + int res = M_SUCCESS; + current_file = args->in_file; + + if (assembler_init(&assembler, gen, args)) + return M_ERROR; + + res = assemble_elf(&assembler, args->out_file); + assembler_free(&assembler); + + return res; +} + diff --git a/masm/asm/elf32.h b/masm/asm/elf32.h new file mode 100644 index 0000000..76aeb19 --- /dev/null +++ b/masm/asm/elf32.h @@ -0,0 +1,90 @@ +/* Copyright (c) 2024 Freya Murphy */ + +#ifndef __ELF32_H__ +#define __ELF32_H__ + +#include + +#include "../gen.h" +#include "../masm.h" + +/// +/// ELF string table +/// + +struct elf_str_table { + // size of the ptr in bytes + size_t size; + + // pointer that contains + // the strings + char *ptr; +}; + +/* initalize a string table */ +int strtab_init(struct elf_str_table *strtab); + +/* free a string table */ +void strtab_free(struct elf_str_table *strtab); + +/* get a string form the string table */ +int strtab_get_str(struct elf_str_table *strtab, const char *str, size_t *res); + +/* get or append a string into the string table */ +int strtab_write_str(struct elf_str_table *strtab, const char *str, size_t *res); + +/// +/// elf section +/// + +/* holds a section of the asm file (i.e. .text, .bss, .data) */ +struct elf_section { + // section data *weak* pointer + struct section *data; + + // index of the section in + // the ELF shdr + size_t shdr_idx; + + // relocation table + size_t reltab_shidx; + uint32_t reltab_len; + Elf32_Rel *reltab; +}; + +/// +/// assembler +/// + +struct elf_assembler { + // arguments passed in + struct arguments *args; + + // the code generator + struct generator *gen; + + /// symbol table + size_t symtab_shidx; + size_t symtab_len; + Elf32_Sym *symbols; + + // sh string table + size_t strtab_shidx; + struct elf_str_table strtab; + + // string table + size_t shstrtab_shidx; + struct elf_str_table shstrtab; + + /// sections + uint32_t section_len; + struct elf_section *sections; + + /// section header + Elf32_Shdr *shdr; + uint32_t shdr_len; +}; + +int assemble_elf32(struct generator *gen, struct arguments *args); + +#endif /* __ELF32_H__ */ diff --git a/masm/strtab.c b/masm/asm/strtab.c similarity index 98% rename from masm/strtab.c rename to masm/asm/strtab.c index bd914b0..799f0dc 100644 --- a/masm/strtab.c +++ b/masm/asm/strtab.c @@ -2,7 +2,7 @@ #include #include -#include "asm.h" +#include "elf32.h" int strtab_get_str(struct elf_str_table *strtab, const char *str, size_t *res) { diff --git a/masm/main.c b/masm/main.c index caa8420..a97d949 100644 --- a/masm/main.c +++ b/masm/main.c @@ -1,33 +1,151 @@ #include #include #include +#include -#include "asm.h" +#include "gen.h" +#include "masm.h" +#include "asm/elf32.h" void help(void) { - printf("usage: masm [options] source.asm\n\n"); - printf("options:\n"); - printf("\t-h\t\tprints this help message\n"); - printf("\t-o \tselect a output file destination\n"); + printf( +"usage: masm [options] source.asm\n" +"\n" +"options: \n" +" -h print the help message \n" +" -g assume undefined symbols are external\n" +" -o specify the object file output name \n" +" -a specify mips abi used [none, o32] \n" +" default: o32 \n" +" -i mips machine isa to assemble for [mips1, mips32r2, mips32r6] \n" +" default: mips32r6\n" +" -f specify the object file format [elf32] \n" +" defualt: elf32\n" + ); +} + +static int read_isa(enum isa *isa, const char *str) +{ + #define __ISA_CHK(name) \ + if (strcasecmp(#name, str) == 0) { \ + *isa = ISA_ ##name; \ + return M_SUCCESS; \ + } \ + + __ISA_CHK(MIPS1); + __ISA_CHK(MIPS32R2); + __ISA_CHK(MIPS32R6); + + ERROR("invalid isa '%s'", str); + return M_ERROR; +} + +static int read_abi(enum abi *abi, const char *str) +{ + #define __ABI_CHK(name) \ + if (strcasecmp(#name, str) == 0) { \ + *abi = ABI_ ##name; \ + return M_SUCCESS; \ + } \ + + + __ABI_CHK(O32); + __ABI_CHK(NONE); + + ERROR("invalid abi '%s'", str); + return M_ERROR; +} + +static int read_format(enum format *format, const char *str) +{ + #define __FORMAT_CHK(name) \ + if (strcasecmp(#name, str) == 0) { \ + *format = FORMAT_ ##name; \ + return M_SUCCESS; \ + } \ + + + __FORMAT_CHK(ELF32); + + ERROR("invalid format '%s'", str); + return M_ERROR; +} + +static int generate(struct generator *gen, struct arguments *args) +{ + if (generator_init(args->in_file, gen)) + return M_ERROR; + + switch (args->isa) { + case ISA_MIPS1: + return generate_mips1(gen); + case ISA_MIPS32R2: + return generate_mips32r2(gen); + case ISA_MIPS32R6: + return generate_mips32r6(gen); + } + + return M_ERROR; +} + +static int assemble(struct arguments *args) +{ + struct generator gen; + int res = M_SUCCESS; + + if (generate(&gen, args)) + return M_ERROR; + + switch (args->format) { + case FORMAT_ELF32: + res = assemble_elf32(&gen, args); + break; + default: + res = M_ERROR; + break; + } + + generator_free(&gen); + + return res; } int main(int argc, char **argv) { - struct assembler_arguments args = { + struct arguments args = { .in_file = NULL, .out_file = "out.o", + .extern_undefined = false, + .isa = ISA_MIPS32R6, + .abi = ABI_O32, + .format = FORMAT_ELF32 }; int c; - while ((c = getopt(argc, argv, "ho:")) != 1) { + while ((c = getopt(argc, argv, "hgo:a:i:f:")) != 1) { switch(c) { case 'h': help(); return M_SUCCESS; + case 'g': + args.extern_undefined = true; + break; case 'o': args.out_file = optarg; break; + case 'a': + if (read_abi(&args.abi, optarg)) + return M_ERROR; + break; + case 'i': + if (read_isa(&args.isa, optarg)) + return M_ERROR; + break; + case 'f': + if (read_format(&args.format, optarg)) + return M_ERROR; + break; case '?': return M_ERROR; default: @@ -48,5 +166,5 @@ next: args.in_file = argv[optind]; - return assemble_file(args); + return assemble(&args); } diff --git a/masm/masm.h b/masm/masm.h new file mode 100644 index 0000000..75a63f8 --- /dev/null +++ b/masm/masm.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2024 Freya Murphy */ + +#ifndef __MASM_H__ +#define __MASM_H__ + +// isa to asemble for +enum isa { + ISA_MIPS1, // a.k.a mipsR2000 + ISA_MIPS32R2, + ISA_MIPS32R6, +}; + +// abi to mark output object +enum abi { + ABI_O32, // mips o32 abi + ABI_NONE, // no flag output +}; + +// format for the object file +enum format { + FORMAT_ELF32, +}; + +// defines arguments +struct arguments { + // files to read from and + // write to + char *in_file; + char *out_file; + + // if undefined symbols should + // be treated as extern + bool extern_undefined; + + // isa to assemble for + enum isa isa; + + // abi to mark object + enum abi abi; + + // format to output + enum format format; +}; + +#endif /* __ASM_H__ */