From 92a7e5853c6caeec09122c05ddbc19ae1498a0d8 Mon Sep 17 00:00:00 2001 From: Freya Murphy Date: Tue, 10 Sep 2024 18:23:46 -0400 Subject: [PATCH] joe --- compile_flags.txt | 2 +- include/mips.h | 4 + makefile.mk | 2 +- masm/asm.c | 72 +++++-- masm/asm.h | 62 +++++- masm/asm_mips32.c | 482 ++++++++++++++++++++++++++-------------------- masm/lex.c | 8 +- masm/main.c | 64 +++++- masm/parse.c | 26 --- masm/parse.h | 30 --- masm/symtbl.c | 2 +- 11 files changed, 457 insertions(+), 297 deletions(-) diff --git a/compile_flags.txt b/compile_flags.txt index 545b1ad..c36adb4 100644 --- a/compile_flags.txt +++ b/compile_flags.txt @@ -1,5 +1,5 @@ -c --std=c2x +-std=gnu2x -Wall -Wextra -pedantic diff --git a/include/mips.h b/include/mips.h index 79b516b..ba33893 100644 --- a/include/mips.h +++ b/include/mips.h @@ -5,6 +5,10 @@ #include +enum mips_isa { + ISA_MIPS32 +}; + union mips_instruction { struct mips32_instruction mips32; }; diff --git a/makefile.mk b/makefile.mk index afcfda7..a5321b8 100644 --- a/makefile.mk +++ b/makefile.mk @@ -1,6 +1,6 @@ # needed cflags -CFLAGS += -std=c2x +CFLAGS += -std=gnu2x # add include directory CFLAGS += -isystem ../include diff --git a/masm/asm.c b/masm/asm.c index 6e2a56c..099bfe6 100644 --- a/masm/asm.c +++ b/masm/asm.c @@ -3,39 +3,75 @@ #include "asm.h" -int assembler_init(struct assembler *asm, const char *path) +int assembler_init(struct assembler *assembler, const char *path) { - if (lexer_init(path, &asm->lexer)) + if (lexer_init(path, &assembler->lexer)) return M_ERROR; - if (parser_init(&asm->lexer, &asm->parser)) { - lexer_free(&asm->lexer); + if (parser_init(&assembler->lexer, &assembler->parser)) { + lexer_free(&assembler->lexer); return M_ERROR; } - if (strtbl_init(&asm->shstr_tbl)) { - parser_free(&asm->parser); - lexer_free(&asm->lexer); + if (strtbl_init(&assembler->shstr_tbl)) { + parser_free(&assembler->parser); + lexer_free(&assembler->lexer); return M_ERROR; } - if (strtbl_init(&asm->str_tbl)) { - strtbl_free(&asm->shstr_tbl); - parser_free(&asm->parser); - lexer_free(&asm->lexer); + if (strtbl_init(&assembler->str_tbl)) { + strtbl_free(&assembler->shstr_tbl); + parser_free(&assembler->parser); + lexer_free(&assembler->lexer); return M_ERROR; } - asm->meta = NULL; + if (symtbl_init(&assembler->sym_tbl)) { + strtbl_free(&assembler->str_tbl); + strtbl_free(&assembler->shstr_tbl); + parser_free(&assembler->parser); + lexer_free(&assembler->lexer); + return M_ERROR; + } + + assembler->meta = NULL; + assembler->phdr = NULL; + assembler->shdr = NULL; + assembler->symtab = NULL; return M_SUCCESS; } -void assembler_free(struct assembler *asm) +void assembler_free(struct assembler *assembler) { - strtbl_free(&asm->str_tbl); - strtbl_free(&asm->shstr_tbl); - parser_free(&asm->parser); - lexer_free(&asm->lexer); - free(asm->meta); + if (assembler->meta) { + for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { + if (assembler->meta[i].reltbl != NULL) { + free(assembler->meta[i].reltbl); + } + } + free(assembler->meta); + } + + if (assembler->phdr) + free(assembler->phdr); + if (assembler->shdr) + free(assembler->shdr); + if (assembler->symtab) + free(assembler->symtab); + + symtbl_free(&assembler->sym_tbl); + strtbl_free(&assembler->str_tbl); + strtbl_free(&assembler->shstr_tbl); + + parser_free(&assembler->parser); + lexer_free(&assembler->lexer); +} + +int assemble_file(struct assembler_arguments args) { + switch (args.isa) { + case ISA_MIPS32: + return assemble_file_mips32(args); + } + return M_ERROR; } diff --git a/masm/asm.h b/masm/asm.h index b8e6214..86f6b9a 100644 --- a/masm/asm.h +++ b/masm/asm.h @@ -8,6 +8,34 @@ #include "lex.h" #include "parse.h" +enum symbol_flag { + SYM_LOCAL, + SYM_GLOBAL, + SYM_EXTERNAL, +}; + +struct symbol { + char name[MAX_LEX_LENGTH]; + uint32_t index; + struct section *sec; + enum symbol_flag flag; + +}; + +struct symbol_table { + uint32_t count; + uint32_t len; + struct symbol *symbols; +}; + +int symtbl_init(struct symbol_table *sym_tbl); +void symtbl_free(struct symbol_table *sym_tbl); + +int symtbl_push(struct symbol_table *sym_tbl, struct symbol sym); +int symtbl_find(struct symbol_table *sym_tbl, struct symbol **sym, + const char name[MAX_LEX_LENGTH]); + + struct str_table { char *ptr; size_t size; @@ -34,21 +62,43 @@ struct section_meta { }; struct assembler { + // the token lexer struct lexer lexer; + // the expression parser struct parser parser; - struct str_table shstr_tbl; - struct str_table str_tbl; + // shdr indexes struct section_meta *meta; size_t shstrtbl_idx; size_t strtbl_idx; - size_t symtbl_idx; + size_t symtab_idx; + + // symbols and strings + struct symbol_table sym_tbl; + struct str_table shstr_tbl; + struct str_table str_tbl; + + // elf data + void *phdr; // void* since could be Elf32 or Elf64 + void *shdr; + void *symtab; + uint32_t phdr_len; + uint32_t shdr_len; + uint32_t symtab_len; }; -int assembler_init(struct assembler *asm, const char *path); -void assembler_free(struct assembler *asm); +struct assembler_arguments { + char *in_file; + char *out_file; + enum mips_isa isa; +}; + +int assembler_init(struct assembler *assembler, const char *path); +void assembler_free(struct assembler *assembler); + +int assemble_file(struct assembler_arguments args); /* assemble a mips32 file*/ -int assemble_file_mips32(char *path); +int assemble_file_mips32(struct assembler_arguments args); #endif /* __ASM_H__ */ diff --git a/masm/asm_mips32.c b/masm/asm_mips32.c index deda214..7716f4d 100644 --- a/masm/asm_mips32.c +++ b/masm/asm_mips32.c @@ -14,12 +14,12 @@ extern char *current_file; -static int handle_directive(struct assembler *asm, +static int handle_directive(struct assembler *assembler, struct mips32_directive *directive) { switch (directive->type) { case MIPS32_DIRECTIVE_SECTION: { - struct section_table *sec_tbl = &asm->parser.sec_tbl; + struct section_table *sec_tbl = &assembler->parser.sec_tbl; struct section *sec; if (sectbl_get(sec_tbl, &sec, directive->name) == M_SUCCESS) { @@ -35,7 +35,7 @@ static int handle_directive(struct assembler *asm, } case MIPS32_DIRECTIVE_ALIGN: { - asm->parser.sec_tbl.current->alignment = + assembler->parser.sec_tbl.current->alignment = 1 << directive->align; break; } @@ -44,7 +44,7 @@ static int handle_directive(struct assembler *asm, struct section_entry entry; entry.type = ENT_NO_DATA; entry.size = directive->space; - if (sec_push(asm->parser.sec_tbl.current, entry)) + if (sec_push(assembler->parser.sec_tbl.current, entry)) return M_ERROR; break; } @@ -54,7 +54,8 @@ static int handle_directive(struct assembler *asm, struct section_entry entry; entry.type = ENT_WORD; entry.word = directive->words[i]; - if (sec_push(asm->parser.sec_tbl.current, entry)) + if (sec_push(assembler->parser.sec_tbl.current, + entry)) return M_ERROR; } break; @@ -65,7 +66,8 @@ static int handle_directive(struct assembler *asm, struct section_entry entry; entry.type = ENT_HALF; entry.half = directive->halfs[i]; - if (sec_push(asm->parser.sec_tbl.current, entry)) + if (sec_push(assembler->parser.sec_tbl.current, + entry)) return M_ERROR; } break; @@ -76,7 +78,8 @@ static int handle_directive(struct assembler *asm, struct section_entry entry; entry.type = ENT_BYTE; entry.byte = directive->bytes[i]; - if (sec_push(asm->parser.sec_tbl.current, entry)) + if (sec_push(assembler->parser.sec_tbl.current, + entry)) return M_ERROR; } break; @@ -84,7 +87,7 @@ static int handle_directive(struct assembler *asm, case MIPS32_DIRECTIVE_EXTERN: { struct symbol symbol; - if (symtbl_find(&asm->parser.sym_tbl, NULL, directive->name) + if (symtbl_find(&assembler->sym_tbl, NULL, directive->name) == M_SUCCESS) { ERROR("cannot extern local symbol '%s'", directive->name); @@ -93,13 +96,13 @@ static int handle_directive(struct assembler *asm, symbol = (struct symbol) { .name = "", - .sec = asm->parser.sec_tbl.current, - .index = asm->parser.sec_tbl.current->count, + .sec = NULL, + .index = 0, .flag = SYM_EXTERNAL, }; strcpy(symbol.name, directive->name); - if (symtbl_push(&asm->parser.sym_tbl, symbol)) + if (symtbl_push(&assembler->sym_tbl, symbol)) return M_ERROR; break; @@ -107,7 +110,7 @@ static int handle_directive(struct assembler *asm, case MIPS32_DIRECTIVE_GLOBL: { struct symbol symbol; - if (symtbl_find(&asm->parser.sym_tbl, NULL, directive->name) + if (symtbl_find(&assembler->sym_tbl, NULL, directive->name) == M_SUCCESS) { symbol.flag = SYM_GLOBAL; break; @@ -121,7 +124,7 @@ static int handle_directive(struct assembler *asm, }; strcpy(symbol.name, directive->name); - if (symtbl_push(&asm->parser.sym_tbl, symbol)) + if (symtbl_push(&assembler->sym_tbl, symbol)) return M_ERROR; break; @@ -131,15 +134,43 @@ static int handle_directive(struct assembler *asm, return M_SUCCESS; } -static int parse_file(struct assembler *asm) +static int handle_label(struct assembler *assembler, + const char name[MAX_LEX_LENGTH]) { - struct parser *parser = &asm->parser; + struct symbol *ref; + if (symtbl_find(&assembler->sym_tbl, &ref, name) == M_SUCCESS) { + if (ref->flag == SYM_GLOBAL && ref->sec == NULL) { + ref->sec = assembler->parser.sec_tbl.current; + ref->index = assembler->parser.sec_tbl.current->count; + return M_SUCCESS; + } + ERROR("redefined symbol '%s'", name); + return M_ERROR; + } + + struct symbol symbol; + symbol = (struct symbol) { + .name = "", + .sec = assembler->parser.sec_tbl.current, + .index = assembler->parser.sec_tbl.current->count, + .flag = SYM_LOCAL, + }; + strcpy(symbol.name, name); + + if (symtbl_push(&assembler->sym_tbl, symbol)) + return M_ERROR; + + return M_SUCCESS; +} + +static int parse_file(struct assembler *assembler) +{ + struct parser *parser = &assembler->parser; while (1) { struct expr expr; - if (parser_next(parser, &expr)) { - break; - } + if (parser_next(parser, &expr)) + return M_ERROR; switch (expr.type) { case EXPR_INS: @@ -152,16 +183,20 @@ static int parse_file(struct assembler *asm) break; case EXPR_DIRECTIVE: - if (handle_directive(asm, &expr.directive.mips32)) + if (handle_directive(assembler, + &expr.directive.mips32)) + return M_ERROR; + break; + + case EXPR_LABEL: + if (handle_label(assembler, expr.text)) return M_ERROR; break; case EXPR_CONSTANT: - case EXPR_LABEL: - // nothing needed to be done break; - } - } + } + } struct section_meta *meta = malloc(sizeof(struct section_meta) * parser->sec_tbl.count); @@ -170,7 +205,7 @@ static int parse_file(struct assembler *asm) return M_ERROR; } - asm->meta = meta; + assembler->meta = meta; size_t ptr = 0; for (uint32_t i = 0; i < parser->sec_tbl.count; i++) { @@ -182,10 +217,10 @@ static int parse_file(struct assembler *asm) return M_SUCCESS; } -static int assemble_phdr(struct assembler *asm, Elf32_Phdr **res, +static int assemble_phdr(struct assembler *assembler, Elf32_Phdr **res, uint32_t *res2) { - struct parser *parser = &asm->parser; + struct parser *parser = &assembler->parser; Elf32_Phdr *phdr = malloc(sizeof(Elf32_Phdr) * parser->sec_tbl.count); if (phdr == NULL) { @@ -214,28 +249,34 @@ static int assemble_phdr(struct assembler *asm, Elf32_Phdr **res, return M_SUCCESS; } -static int assemble_symtbl(struct assembler *asm, Elf32_Sym **res, +static int assemble_symtab(struct assembler *assembler, Elf32_Sym **res, uint32_t *res2) { - Elf32_Sym *stbl = malloc(sizeof(Elf32_Sym) * asm->parser.sym_tbl + Elf32_Sym *stbl = malloc(sizeof(Elf32_Sym) * assembler->sym_tbl .count); size_t size = 0; if (stbl == NULL) return M_ERROR; - for (uint32_t i = 0; i < asm->parser.sym_tbl.count; i++) { - struct symbol *sym = &asm->parser.sym_tbl.symbols[i]; + for (uint32_t i = 0; i < assembler->sym_tbl.count; i++) { + struct symbol *sym = &assembler->sym_tbl.symbols[i]; size_t str_off; unsigned char bind; unsigned char type = STT_NOTYPE; - if (strtbl_write_str(&asm->str_tbl, sym->name, &str_off)) { + if (strtbl_write_str(&assembler->str_tbl, + sym->name, &str_off)) { free(stbl); return M_ERROR; } - if (sym->flag != SYM_LOCAL) + if (sym->flag == SYM_GLOBAL && sym->sec == NULL) { + ERROR("never defined global symbol '%s'", sym->name); + return M_ERROR; + } + + if (sym->flag == SYM_LOCAL) bind = STB_LOCAL; else bind = STB_GLOBAL; @@ -246,7 +287,7 @@ static int assemble_symtbl(struct assembler *asm, Elf32_Sym **res, .st_size = 0, .st_info = ELF32_ST_INFO(bind, type), .st_other = ELF32_ST_VISIBILITY(STV_DEFAULT), - .st_shndx = asm->meta[sym->sec->index].shdr_idx, + .st_shndx = 0, }; size = i + 1; }; @@ -257,20 +298,31 @@ static int assemble_symtbl(struct assembler *asm, Elf32_Sym **res, return M_SUCCESS; } -static int assemble_reltbl_sec(struct assembler *asm, Elf32_Sym *symtbl, - uint32_t symtbl_len, struct section *sec) +static void assemble_symtab_shndx(struct assembler *assembler, Elf32_Sym *tbl) +{ + for (uint32_t i = 0; i < assembler->sym_tbl.count; i++) { + struct symbol *sym = &assembler->sym_tbl.symbols[i]; + if (sym->sec != NULL) + tbl[i].st_shndx = + assembler->meta[sym->sec->index].shdr_idx; + } +} + +static int assemble_reltbl_sec(struct assembler *assembler, Elf32_Sym *symtab, + uint32_t symtab_len, struct section *sec) { uint32_t len = 0; - for (uint32_t i = 0; i < asm->parser.ref_tbl.count; i++) { - struct reference *ref = &asm->parser.ref_tbl.references[i]; + for (uint32_t i = 0; i < assembler->parser.ref_tbl.count; i++) { + struct reference *ref = + &assembler->parser.ref_tbl.references[i]; if (ref->section->index == sec->index) { len++; } } if (len == 0) { - asm->meta[sec->index].reltbl = NULL; + assembler->meta[sec->index].reltbl = NULL; return M_SUCCESS; } @@ -281,11 +333,11 @@ static int assemble_reltbl_sec(struct assembler *asm, Elf32_Sym *symtbl, return M_ERROR; } - for (uint32_t i = 0; i < asm->parser.ref_tbl.count; i++) { - struct reference *ref = &asm->parser.ref_tbl.references[i]; + for (uint32_t i = 0; i < assembler->parser.ref_tbl.count; i++) { + struct reference *ref = + &assembler->parser.ref_tbl.references[i]; struct mips32_instruction *ins = &ref->section-> entries[ref->index].ins.mips32; - struct section_meta *meta = &asm->meta[ref->section->index]; if (ref->section->index != sec->index) { continue; @@ -306,9 +358,10 @@ static int assemble_reltbl_sec(struct assembler *asm, Elf32_Sym *symtbl, int32_t symidx = -1; - for (uint32_t i = 0; i < symtbl_len; i++) { - Elf32_Sym *sym = &symtbl[i]; - const char *str = &asm->str_tbl.ptr[sym->st_name]; + for (uint32_t i = 0; i < symtab_len; i++) { + Elf32_Sym *sym = &symtab[i]; + const char *str = + &assembler->str_tbl.ptr[sym->st_name]; if (strcmp(ref->name, str) == 0) { symidx = i; break; @@ -324,35 +377,34 @@ static int assemble_reltbl_sec(struct assembler *asm, Elf32_Sym *symtbl, reltbl[i] = (Elf32_Rela) { .r_info = ELF32_R_INFO(symidx, type), .r_addend = addend, - .r_offset = meta->v_addr + - sec_index(ref->section, ref->index), + .r_offset = sec_index(ref->section, ref->index), }; }; - asm->meta[sec->index].reltbl_len = len; - asm->meta[sec->index].reltbl = reltbl; + assembler->meta[sec->index].reltbl_len = len; + assembler->meta[sec->index].reltbl = reltbl; return M_SUCCESS; } -static int assemble_reltbl(struct assembler *asm, Elf32_Sym *symtbl, - uint32_t symtbl_len) +static int assemble_reltbl(struct assembler *assembler, Elf32_Sym *symtab, + uint32_t symtab_len) { - for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) { - struct section *sec = &asm->parser.sec_tbl.sections[i]; - if (assemble_reltbl_sec(asm, symtbl, symtbl_len, sec)) + for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { + struct section *sec = &assembler->parser.sec_tbl.sections[i]; + if (assemble_reltbl_sec(assembler, symtab, symtab_len, sec)) return M_ERROR; } return M_SUCCESS; } -static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res, +static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res, uint32_t *res2) { uint32_t max_entries = 4; // symtab, strtab, shstrtab - max_entries += asm->parser.sec_tbl.count; // sections - max_entries += asm->parser.sec_tbl.count; // reltabs per section + max_entries += assembler->parser.sec_tbl.count; // sections + max_entries += assembler->parser.sec_tbl.count; // reltabs per section Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * max_entries); @@ -360,24 +412,25 @@ static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res, uint32_t count = 0; // eeltables - for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) { + for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - if (asm->meta[i].reltbl == NULL) + if (assembler->meta[i].reltbl == NULL) continue; - struct section *sec = &asm->parser.sec_tbl.sections[i]; + struct section *sec = &assembler->parser.sec_tbl.sections[i]; const char *prefix = ".reltab."; char reltab_name[MAX_LEX_LENGTH + 8]; strcpy(reltab_name, prefix); strcat(reltab_name, sec->name); - if (strtbl_write_str(&asm->shstr_tbl, reltab_name, &str_off)) { + if (strtbl_write_str(&assembler->shstr_tbl, + reltab_name, &str_off)) { free(shdr); return M_ERROR; } - asm->meta[i].reltbl_idx = count; + assembler->meta[i].reltbl_idx = count; shdr[count++] = (Elf32_Shdr) { .sh_name = str_off, .sh_type = SHT_RELA, @@ -393,17 +446,17 @@ static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res, } // for each section - for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) { - struct section *sec = &asm->parser.sec_tbl.sections[i]; + for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { + struct section *sec = &assembler->parser.sec_tbl.sections[i]; char name[MAX_LEX_LENGTH+1] = "."; strcat(name, sec->name); - if (strtbl_write_str(&asm->shstr_tbl, name, &str_off)) { + if (strtbl_write_str(&assembler->shstr_tbl, name, &str_off)) { free(shdr); return M_ERROR; } - asm->meta[i].shdr_idx = count; - if (asm->meta[i].reltbl != NULL) - shdr[asm->meta[i].reltbl_idx].sh_info = count; + assembler->meta[i].shdr_idx = count; + if (assembler->meta[i].reltbl != NULL) + shdr[assembler->meta[i].reltbl_idx].sh_info = count; shdr[count++] = (Elf32_Shdr){ .sh_name = str_off, .sh_type = SHT_PROGBITS, @@ -420,12 +473,12 @@ static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res, } // symbol table - if (strtbl_write_str(&asm->shstr_tbl, ".symtab", &str_off)) { + if (strtbl_write_str(&assembler->shstr_tbl, ".symtab", &str_off)) { free(shdr); return M_ERROR; } - asm->symtbl_idx = count; + assembler->symtab_idx = count; shdr[count++] = (Elf32_Shdr) { .sh_name = str_off, .sh_type = SHT_SYMTAB, @@ -440,12 +493,12 @@ static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res, }; // string table - if (strtbl_write_str(&asm->shstr_tbl, ".strtab", &str_off)) { + if (strtbl_write_str(&assembler->shstr_tbl, ".strtab", &str_off)) { free(shdr); return M_ERROR; } - asm->strtbl_idx = count; + assembler->strtbl_idx = count; shdr[count++] = (Elf32_Shdr) { .sh_name = str_off, .sh_type = SHT_STRTAB, @@ -460,12 +513,12 @@ static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res, }; // sh string table - if (strtbl_write_str(&asm->shstr_tbl, ".shstrtab", &str_off)) { + if (strtbl_write_str(&assembler->shstr_tbl, ".shstrtab", &str_off)) { free(shdr); return M_ERROR; } - asm->shstrtbl_idx = count; + assembler->shstrtbl_idx = count; shdr[count++] = (Elf32_Shdr) { .sh_name = str_off, .sh_type = SHT_STRTAB, @@ -479,10 +532,11 @@ static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res, .sh_entsize = 0, }; - for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) { - if (asm->meta[i].reltbl == NULL) + for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { + if (assembler->meta[i].reltbl == NULL) continue; - shdr[asm->meta[i].reltbl_idx].sh_link = asm->symtbl_idx; + shdr[assembler->meta[i].reltbl_idx].sh_link = + assembler->symtab_idx; } *res = shdr; @@ -491,34 +545,147 @@ static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res, return M_SUCCESS; } -static int assemble_file(struct assembler *asm) +static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr) { - Elf32_Phdr *phdr; - Elf32_Shdr *shdr; - Elf32_Sym *symtbl; - uint32_t phdr_len; - uint32_t shdr_len; - uint32_t symtbl_len; + Elf32_Shdr *shdr = (Elf32_Shdr *) assembler->shdr; + Elf32_Phdr *phdr = (Elf32_Phdr *) assembler->phdr; + uint32_t ptr = 0; - if (assemble_symtbl(asm, &symtbl, &symtbl_len)) - return M_ERROR; + // we must now correct offets and sizes inside the ehdr, phdr, + // and shdr + ptr += sizeof(Elf32_Ehdr); - if (assemble_reltbl(asm, symtbl, symtbl_len)) { - free(symtbl); - return M_ERROR; - }; + // phdr + ehdr->e_phoff = ptr; + ptr += assembler->phdr_len * sizeof(Elf32_Phdr); - if (assemble_phdr(asm, &phdr, &phdr_len)) { - free(symtbl); + // reltbls + for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { + if (assembler->meta[i].reltbl == NULL) + continue; + int idx = assembler->meta[i].reltbl_idx; + int len = assembler->meta[i].reltbl_len; + shdr[idx].sh_offset = ptr; + shdr[idx].sh_size = len * sizeof(Elf32_Rela); + ptr += len * sizeof(Elf32_Rela); + } + + // sections + for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { + int idx = assembler->meta[i].shdr_idx; + phdr[i].p_offset = ptr; + phdr[i].p_vaddr = ptr; + phdr[i].p_paddr = ptr; + shdr[idx].sh_offset = ptr; + shdr[idx].sh_size = phdr[i].p_filesz; + shdr[idx].sh_addr = phdr[i].p_vaddr; + shdr[idx].sh_addralign = phdr[i].p_align; + ptr += phdr[i].p_filesz; + } + + // symtab + shdr[assembler->symtab_idx].sh_offset = ptr; + shdr[assembler->symtab_idx].sh_link = assembler->strtbl_idx; + shdr[assembler->symtab_idx].sh_size = + assembler->symtab_len * sizeof(Elf32_Sym); + ptr += assembler->symtab_len * sizeof(Elf32_Sym); + + // strtbl + shdr[assembler->strtbl_idx].sh_offset = ptr; + shdr[assembler->strtbl_idx].sh_size = assembler->str_tbl.size; + ptr += assembler->str_tbl.size; + + // shstrtbl + shdr[assembler->shstrtbl_idx].sh_offset = ptr; + shdr[assembler->shstrtbl_idx].sh_size = assembler->shstr_tbl.size; + ptr += assembler->shstr_tbl.size; + + // shdr + ehdr->e_shoff = ptr; +} + +static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr, + const char *path) +{ + FILE *out = fopen(path, "w"); + + if (out == NULL) { + ERROR("cannot write '%s'", path); return M_ERROR; } - if (assemble_shdr(asm, &shdr, &shdr_len)) { - free(symtbl); - free(phdr); + // ehdr + fwrite(ehdr, sizeof(Elf32_Ehdr), 1, out); + + // phdr + fwrite(assembler->phdr, sizeof(Elf32_Phdr), assembler->phdr_len, out); + + // reltbls + for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { + if (assembler->meta[i].reltbl == NULL) + continue; + void *ptr = assembler->meta[i].reltbl; + int len = assembler->meta[i].reltbl_len; + fwrite(ptr, sizeof(Elf32_Rela), len, out); + } + + // sections + for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { + struct section *sec = &assembler->parser.sec_tbl.sections[i]; + for (uint32_t j = 0; j < sec->count; j++) { + struct section_entry *entry = &sec->entries[j]; + size_t size = entry->size; + fwrite(&entry->data, size, 1, out); + while(size % sec->alignment) { + uint8_t zero = 0; + fwrite(&zero, 1, 1, out); + size++; + } + } + } + + // sym tbl + fwrite(assembler->symtab, sizeof(Elf32_Sym), + assembler->symtab_len, out); + + // str tbl + fwrite(assembler->str_tbl.ptr, assembler->str_tbl.size, 1, out); + + // shstr tbl + fwrite(assembler->shstr_tbl.ptr, assembler->shstr_tbl.size, 1, out); + + // shdr + fwrite(assembler->shdr, sizeof(Elf32_Shdr), assembler->shdr_len, out); + + fclose(out); + + return M_SUCCESS; +} + +static int assemble_elf(struct assembler *assembler, const char *out) +{ + if (assemble_symtab(assembler, (Elf32_Sym **) &assembler->symtab, + &assembler->symtab_len)) + return M_ERROR; + + if (assemble_reltbl(assembler, assembler->symtab, + assembler->symtab_len)) { return M_ERROR; }; + if (assemble_phdr(assembler, (Elf32_Phdr **) &assembler->phdr, + &assembler->phdr_len)) { + return M_ERROR; + } + + if (assemble_shdr(assembler, (Elf32_Shdr **) &assembler->shdr, + &assembler->shdr_len)) { + return M_ERROR; + }; + + // update the symbol tables with their given section + assemble_symtab_shndx(assembler, assembler->symtab); + Elf32_Ehdr ehdr = { .e_ident = { [EI_MAG0] = ELFMAG0, @@ -541,138 +708,39 @@ static int assemble_file(struct assembler *asm) .e_flags = EF_MIPS_ARCH_32R6, .e_ehsize = sizeof(Elf32_Ehdr), .e_phentsize = sizeof(Elf32_Phdr), - .e_phnum = phdr_len, + .e_phnum = assembler->phdr_len, .e_shentsize = sizeof(Elf32_Shdr), - .e_shnum = shdr_len, - .e_shstrndx = asm->shstrtbl_idx, + .e_shnum = assembler->shdr_len, + .e_shstrndx = assembler->shstrtbl_idx, }; - uint32_t ptr = 0; + update_offsets(assembler, &ehdr); - // we must now correct offets and sizes inside the ehdr, phdr, - // and shdr - ptr += sizeof(Elf32_Ehdr); - - // phdr - ehdr.e_phoff = ptr; - ptr += phdr_len * sizeof(Elf32_Phdr); - - // reltbls - for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) { - if (asm->meta[i].reltbl == NULL) - continue; - int idx = asm->meta[i].reltbl_idx; - int len = asm->meta[i].reltbl_len; - shdr[idx].sh_offset = ptr; - shdr[idx].sh_size = len * sizeof(Elf32_Rela); - ptr += len * sizeof(Elf32_Rela); - } - - // sections - for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) { - int idx = asm->meta[i].shdr_idx; - phdr[i].p_offset = ptr; - phdr[i].p_vaddr = ptr; - phdr[i].p_paddr = ptr; - shdr[idx].sh_offset = ptr; - shdr[idx].sh_size = phdr[i].p_filesz; - shdr[idx].sh_addr = phdr[i].p_vaddr; - shdr[idx].sh_addralign = phdr[i].p_align; - ptr += phdr[i].p_filesz; - } - - // symtbl - shdr[asm->symtbl_idx].sh_offset = ptr; - shdr[asm->symtbl_idx].sh_link = asm->strtbl_idx; - shdr[asm->symtbl_idx].sh_size = symtbl_len * sizeof(Elf32_Sym); - ptr += symtbl_len * sizeof(Elf32_Sym); - - // strtbl - shdr[asm->strtbl_idx].sh_offset = ptr; - shdr[asm->strtbl_idx].sh_size = asm->str_tbl.size; - ptr += asm->str_tbl.size; - - // shstrtbl - shdr[asm->shstrtbl_idx].sh_offset = ptr; - shdr[asm->shstrtbl_idx].sh_size = asm->shstr_tbl.size; - ptr += asm->shstr_tbl.size; - - // shdr - ehdr.e_shoff = ptr; - - FILE *out = fopen("out.o", "w"); - - // ehdr - fwrite(&ehdr, sizeof(Elf32_Ehdr), 1, out); - - // phdr - fwrite(phdr, sizeof(Elf32_Phdr), phdr_len, out); - - // reltbls - for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) { - if (asm->meta[i].reltbl == NULL) - continue; - void *ptr = asm->meta[i].reltbl; - int len = asm->meta[i].reltbl_len; - asm->meta[i].reltbl = NULL; - fwrite(ptr, sizeof(Elf32_Rela), len, out); - } - - // sections - for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) { - struct section *sec = &asm->parser.sec_tbl.sections[i]; - for (uint32_t j = 0; j < sec->count; j++) { - struct section_entry *entry = &sec->entries[j]; - size_t size = entry->size; - fwrite(&entry->data, size, 1, out); - while(size % sec->alignment) { - uint8_t zero = 0; - fwrite(&zero, 1, 1, out); - size++; - } - } - } - - // sym tbl - fwrite(symtbl, sizeof(Elf32_Sym), symtbl_len, out); - - // str tbl - fwrite(asm->str_tbl.ptr, asm->str_tbl.size, 1, out); - - // shstr tbl - fwrite(asm->shstr_tbl.ptr, asm->shstr_tbl.size, 1, out); - - // shdr - fwrite(shdr, sizeof(Elf32_Shdr), shdr_len, out); - - // cleanip - fclose(out); - free(shdr); - free(phdr); - free(symtbl); + if (write_file(assembler, &ehdr, out)) + return M_ERROR; return M_SUCCESS; } -int assemble_file_mips32(char *path) +int assemble_file_mips32(struct assembler_arguments args) { - struct assembler asm; + struct assembler assembler; int res = M_SUCCESS; - current_file = path; + current_file = args.in_file; - if (assembler_init(&asm, path)) + if (assembler_init(&assembler, args.in_file)) return M_ERROR; - mips32_parser_init(&asm.parser); + mips32_parser_init(&assembler.parser); if (res == M_SUCCESS) - res = parse_file(&asm); + res = parse_file(&assembler); if (res == M_SUCCESS) - res = assemble_file(&asm); + res = assemble_elf(&assembler, args.out_file); - assembler_free(&asm); + assembler_free(&assembler); return res; } diff --git a/masm/lex.c b/masm/lex.c index 06c7114..788523b 100644 --- a/masm/lex.c +++ b/masm/lex.c @@ -19,7 +19,7 @@ static int lex_next(struct lexer *lexer) int c = getc(lexer->file); if (c == '\n') { - lexer->x = 0; + lexer->x = 1; lexer->y++; } else { lexer->x++; @@ -296,13 +296,13 @@ int lexer_init(const char *path, struct lexer *lexer) { FILE *file = fopen(path, "r"); if (file == NULL) { - ERROR_POS(pos, "cannot file '%s'", path); + ERROR("cannot read '%s'", path); return M_ERROR; } lexer->file = file; lexer->peek = EOF; - lexer->x = 0; - lexer->y = 0; + lexer->x = 1; + lexer->y = 1; return M_SUCCESS; } diff --git a/masm/main.c b/masm/main.c index 957b34c..be156d8 100644 --- a/masm/main.c +++ b/masm/main.c @@ -1,9 +1,67 @@ +#include +#include +#include + #include "asm.h" +#include "mips.h" + +void help(void) { + printf("usage: masm [options] source.asm\n\n"); + printf("options:\n"); + printf("\t-h\t\tprints this help message\n"); + printf("\t-i isa\t\tselect a ISA to assemble to (mips32)\n"); + printf("\t-o output\tselect a output file destination\n"); +} int main(int argc, char **argv) { - if (argc != 2) - return 0; + struct assembler_arguments args = { + .isa = ISA_MIPS32, + .in_file = NULL, + .out_file = NULL, + }; - return assemble_file_mips32(argv[1]); + int c; + + while ((c = getopt(argc, argv, "ho:i:")) != 1) { + switch(c) { + case 'h': + help(); + return M_SUCCESS; + case 'o': + args.out_file = optarg; + break; + case 'i': + if (strcmp(optarg, "mips32") == 0) { + args.isa = ISA_MIPS32; + } else { + ERROR("invalid isa '%s'", optarg); + return M_ERROR; + } + break; + case '?': + return M_ERROR; + default: + goto next; + } + } + +next: + if (optind < argc - 1) { + ERROR("too many source files passed"); + return M_ERROR; + } + + if (optind >= argc) { + ERROR("no source files passed"); + return M_ERROR; + } + + args.in_file = argv[optind]; + + if (args.out_file == NULL) { + args.out_file = "out.o"; + } + + return assemble_file(args); } diff --git a/masm/parse.c b/masm/parse.c index 9f8ddb1..452045b 100644 --- a/masm/parse.c +++ b/masm/parse.c @@ -110,29 +110,6 @@ static int parse_label(struct parser *parser, return M_ERROR; strcpy(expr->text, token.text); - struct symbol *ref; - if (symtbl_find(&parser->sym_tbl, &ref, token.text) == M_SUCCESS) { - if (ref->flag == SYM_GLOBAL && ref->sec == NULL) { - ref->sec = parser->sec_tbl.current; - ref->index = parser->sec_tbl.current->count; - return M_SUCCESS; - } - ERROR_POS(token, "redefined symbol '%s'", token.text); - return M_ERROR; - } - - struct symbol symbol; - symbol = (struct symbol) { - .name = "", - .sec = parser->sec_tbl.current, - .index = parser->sec_tbl.current->count, - .flag = SYM_LOCAL, - }; - strcpy(symbol.name, token.text); - - if (symtbl_push(&parser->sym_tbl, symbol)) - return M_ERROR; - return M_SUCCESS; } @@ -184,8 +161,6 @@ int parser_init(struct lexer *lexer, struct parser *parser) { parser->lexer = lexer; parser->peek.type = TOK_EOF; - if (symtbl_init(&parser->sym_tbl)) - return M_ERROR; if (sectbl_init(&parser->sec_tbl)) return M_ERROR; if (reftbl_init(&parser->ref_tbl)) @@ -196,7 +171,6 @@ int parser_init(struct lexer *lexer, struct parser *parser) void parser_free(struct parser *parser) { - symtbl_free(&parser->sym_tbl); sectbl_free(&parser->sec_tbl); reftbl_free(&parser->ref_tbl); } diff --git a/masm/parse.h b/masm/parse.h index 5f37052..ea8f929 100644 --- a/masm/parse.h +++ b/masm/parse.h @@ -35,33 +35,6 @@ struct expr { }; }; -enum symbol_flag { - SYM_LOCAL, - SYM_GLOBAL, - SYM_EXTERNAL, -}; - -struct symbol { - char name[MAX_LEX_LENGTH]; - uint32_t index; - struct section *sec; - enum symbol_flag flag; - -}; - -struct symbol_table { - uint32_t count; - uint32_t len; - struct symbol *symbols; -}; - -int symtbl_init(struct symbol_table *sym_tbl); -void symtbl_free(struct symbol_table *sym_tbl); - -int symtbl_push(struct symbol_table *sym_tbl, struct symbol sym); -int symtbl_find(struct symbol_table *sym_tbl, struct symbol **sym, - const char name[MAX_LEX_LENGTH]); - enum section_entry_type { ENT_INS, ENT_WORD, @@ -143,9 +116,6 @@ struct parser { // sections struct section_table sec_tbl; - // symbols - struct symbol_table sym_tbl; - // references struct reference_table ref_tbl; diff --git a/masm/symtbl.c b/masm/symtbl.c index b75c752..8aa7bcf 100644 --- a/masm/symtbl.c +++ b/masm/symtbl.c @@ -3,7 +3,7 @@ #include #include -#include "parse.h" +#include "asm.h" #define SYMTBL_INIT_LEN 24