This commit is contained in:
Freya Murphy 2024-09-10 18:23:46 -04:00
parent 0ff948af3d
commit 92a7e5853c
Signed by: freya
GPG key ID: 744AB800E383AE52
11 changed files with 457 additions and 297 deletions

View file

@ -1,5 +1,5 @@
-c -c
-std=c2x -std=gnu2x
-Wall -Wall
-Wextra -Wextra
-pedantic -pedantic

View file

@ -5,6 +5,10 @@
#include <mips32.h> #include <mips32.h>
enum mips_isa {
ISA_MIPS32
};
union mips_instruction { union mips_instruction {
struct mips32_instruction mips32; struct mips32_instruction mips32;
}; };

View file

@ -1,6 +1,6 @@
# needed cflags # needed cflags
CFLAGS += -std=c2x CFLAGS += -std=gnu2x
# add include directory # add include directory
CFLAGS += -isystem ../include CFLAGS += -isystem ../include

View file

@ -3,39 +3,75 @@
#include "asm.h" #include "asm.h"
int assembler_init(struct assembler *asm, const char *path) int assembler_init(struct assembler *assembler, const char *path)
{ {
if (lexer_init(path, &asm->lexer)) if (lexer_init(path, &assembler->lexer))
return M_ERROR; return M_ERROR;
if (parser_init(&asm->lexer, &asm->parser)) { if (parser_init(&assembler->lexer, &assembler->parser)) {
lexer_free(&asm->lexer); lexer_free(&assembler->lexer);
return M_ERROR; return M_ERROR;
} }
if (strtbl_init(&asm->shstr_tbl)) { if (strtbl_init(&assembler->shstr_tbl)) {
parser_free(&asm->parser); parser_free(&assembler->parser);
lexer_free(&asm->lexer); lexer_free(&assembler->lexer);
return M_ERROR; return M_ERROR;
} }
if (strtbl_init(&asm->str_tbl)) { if (strtbl_init(&assembler->str_tbl)) {
strtbl_free(&asm->shstr_tbl); strtbl_free(&assembler->shstr_tbl);
parser_free(&asm->parser); parser_free(&assembler->parser);
lexer_free(&asm->lexer); lexer_free(&assembler->lexer);
return M_ERROR; return M_ERROR;
} }
asm->meta = NULL; if (symtbl_init(&assembler->sym_tbl)) {
strtbl_free(&assembler->str_tbl);
strtbl_free(&assembler->shstr_tbl);
parser_free(&assembler->parser);
lexer_free(&assembler->lexer);
return M_ERROR;
}
assembler->meta = NULL;
assembler->phdr = NULL;
assembler->shdr = NULL;
assembler->symtab = NULL;
return M_SUCCESS; return M_SUCCESS;
} }
void assembler_free(struct assembler *asm) void assembler_free(struct assembler *assembler)
{ {
strtbl_free(&asm->str_tbl); if (assembler->meta) {
strtbl_free(&asm->shstr_tbl); for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) {
parser_free(&asm->parser); if (assembler->meta[i].reltbl != NULL) {
lexer_free(&asm->lexer); free(assembler->meta[i].reltbl);
free(asm->meta); }
}
free(assembler->meta);
}
if (assembler->phdr)
free(assembler->phdr);
if (assembler->shdr)
free(assembler->shdr);
if (assembler->symtab)
free(assembler->symtab);
symtbl_free(&assembler->sym_tbl);
strtbl_free(&assembler->str_tbl);
strtbl_free(&assembler->shstr_tbl);
parser_free(&assembler->parser);
lexer_free(&assembler->lexer);
}
int assemble_file(struct assembler_arguments args) {
switch (args.isa) {
case ISA_MIPS32:
return assemble_file_mips32(args);
}
return M_ERROR;
} }

View file

@ -8,6 +8,34 @@
#include "lex.h" #include "lex.h"
#include "parse.h" #include "parse.h"
enum symbol_flag {
SYM_LOCAL,
SYM_GLOBAL,
SYM_EXTERNAL,
};
struct symbol {
char name[MAX_LEX_LENGTH];
uint32_t index;
struct section *sec;
enum symbol_flag flag;
};
struct symbol_table {
uint32_t count;
uint32_t len;
struct symbol *symbols;
};
int symtbl_init(struct symbol_table *sym_tbl);
void symtbl_free(struct symbol_table *sym_tbl);
int symtbl_push(struct symbol_table *sym_tbl, struct symbol sym);
int symtbl_find(struct symbol_table *sym_tbl, struct symbol **sym,
const char name[MAX_LEX_LENGTH]);
struct str_table { struct str_table {
char *ptr; char *ptr;
size_t size; size_t size;
@ -34,21 +62,43 @@ struct section_meta {
}; };
struct assembler { struct assembler {
// the token lexer
struct lexer lexer; struct lexer lexer;
// the expression parser
struct parser parser; struct parser parser;
struct str_table shstr_tbl;
struct str_table str_tbl;
// shdr indexes
struct section_meta *meta; struct section_meta *meta;
size_t shstrtbl_idx; size_t shstrtbl_idx;
size_t strtbl_idx; size_t strtbl_idx;
size_t symtbl_idx; size_t symtab_idx;
// symbols and strings
struct symbol_table sym_tbl;
struct str_table shstr_tbl;
struct str_table str_tbl;
// elf data
void *phdr; // void* since could be Elf32 or Elf64
void *shdr;
void *symtab;
uint32_t phdr_len;
uint32_t shdr_len;
uint32_t symtab_len;
}; };
int assembler_init(struct assembler *asm, const char *path); struct assembler_arguments {
void assembler_free(struct assembler *asm); char *in_file;
char *out_file;
enum mips_isa isa;
};
int assembler_init(struct assembler *assembler, const char *path);
void assembler_free(struct assembler *assembler);
int assemble_file(struct assembler_arguments args);
/* assemble a mips32 file*/ /* assemble a mips32 file*/
int assemble_file_mips32(char *path); int assemble_file_mips32(struct assembler_arguments args);
#endif /* __ASM_H__ */ #endif /* __ASM_H__ */

View file

@ -14,12 +14,12 @@
extern char *current_file; extern char *current_file;
static int handle_directive(struct assembler *asm, static int handle_directive(struct assembler *assembler,
struct mips32_directive *directive) struct mips32_directive *directive)
{ {
switch (directive->type) { switch (directive->type) {
case MIPS32_DIRECTIVE_SECTION: { case MIPS32_DIRECTIVE_SECTION: {
struct section_table *sec_tbl = &asm->parser.sec_tbl; struct section_table *sec_tbl = &assembler->parser.sec_tbl;
struct section *sec; struct section *sec;
if (sectbl_get(sec_tbl, &sec, directive->name) if (sectbl_get(sec_tbl, &sec, directive->name)
== M_SUCCESS) { == M_SUCCESS) {
@ -35,7 +35,7 @@ static int handle_directive(struct assembler *asm,
} }
case MIPS32_DIRECTIVE_ALIGN: { case MIPS32_DIRECTIVE_ALIGN: {
asm->parser.sec_tbl.current->alignment = assembler->parser.sec_tbl.current->alignment =
1 << directive->align; 1 << directive->align;
break; break;
} }
@ -44,7 +44,7 @@ static int handle_directive(struct assembler *asm,
struct section_entry entry; struct section_entry entry;
entry.type = ENT_NO_DATA; entry.type = ENT_NO_DATA;
entry.size = directive->space; entry.size = directive->space;
if (sec_push(asm->parser.sec_tbl.current, entry)) if (sec_push(assembler->parser.sec_tbl.current, entry))
return M_ERROR; return M_ERROR;
break; break;
} }
@ -54,7 +54,8 @@ static int handle_directive(struct assembler *asm,
struct section_entry entry; struct section_entry entry;
entry.type = ENT_WORD; entry.type = ENT_WORD;
entry.word = directive->words[i]; entry.word = directive->words[i];
if (sec_push(asm->parser.sec_tbl.current, entry)) if (sec_push(assembler->parser.sec_tbl.current,
entry))
return M_ERROR; return M_ERROR;
} }
break; break;
@ -65,7 +66,8 @@ static int handle_directive(struct assembler *asm,
struct section_entry entry; struct section_entry entry;
entry.type = ENT_HALF; entry.type = ENT_HALF;
entry.half = directive->halfs[i]; entry.half = directive->halfs[i];
if (sec_push(asm->parser.sec_tbl.current, entry)) if (sec_push(assembler->parser.sec_tbl.current,
entry))
return M_ERROR; return M_ERROR;
} }
break; break;
@ -76,7 +78,8 @@ static int handle_directive(struct assembler *asm,
struct section_entry entry; struct section_entry entry;
entry.type = ENT_BYTE; entry.type = ENT_BYTE;
entry.byte = directive->bytes[i]; entry.byte = directive->bytes[i];
if (sec_push(asm->parser.sec_tbl.current, entry)) if (sec_push(assembler->parser.sec_tbl.current,
entry))
return M_ERROR; return M_ERROR;
} }
break; break;
@ -84,7 +87,7 @@ static int handle_directive(struct assembler *asm,
case MIPS32_DIRECTIVE_EXTERN: { case MIPS32_DIRECTIVE_EXTERN: {
struct symbol symbol; struct symbol symbol;
if (symtbl_find(&asm->parser.sym_tbl, NULL, directive->name) if (symtbl_find(&assembler->sym_tbl, NULL, directive->name)
== M_SUCCESS) { == M_SUCCESS) {
ERROR("cannot extern local symbol '%s'", ERROR("cannot extern local symbol '%s'",
directive->name); directive->name);
@ -93,13 +96,13 @@ static int handle_directive(struct assembler *asm,
symbol = (struct symbol) { symbol = (struct symbol) {
.name = "", .name = "",
.sec = asm->parser.sec_tbl.current, .sec = NULL,
.index = asm->parser.sec_tbl.current->count, .index = 0,
.flag = SYM_EXTERNAL, .flag = SYM_EXTERNAL,
}; };
strcpy(symbol.name, directive->name); strcpy(symbol.name, directive->name);
if (symtbl_push(&asm->parser.sym_tbl, symbol)) if (symtbl_push(&assembler->sym_tbl, symbol))
return M_ERROR; return M_ERROR;
break; break;
@ -107,7 +110,7 @@ static int handle_directive(struct assembler *asm,
case MIPS32_DIRECTIVE_GLOBL: { case MIPS32_DIRECTIVE_GLOBL: {
struct symbol symbol; struct symbol symbol;
if (symtbl_find(&asm->parser.sym_tbl, NULL, directive->name) if (symtbl_find(&assembler->sym_tbl, NULL, directive->name)
== M_SUCCESS) { == M_SUCCESS) {
symbol.flag = SYM_GLOBAL; symbol.flag = SYM_GLOBAL;
break; break;
@ -121,7 +124,7 @@ static int handle_directive(struct assembler *asm,
}; };
strcpy(symbol.name, directive->name); strcpy(symbol.name, directive->name);
if (symtbl_push(&asm->parser.sym_tbl, symbol)) if (symtbl_push(&assembler->sym_tbl, symbol))
return M_ERROR; return M_ERROR;
break; break;
@ -131,15 +134,43 @@ static int handle_directive(struct assembler *asm,
return M_SUCCESS; return M_SUCCESS;
} }
static int parse_file(struct assembler *asm) static int handle_label(struct assembler *assembler,
const char name[MAX_LEX_LENGTH])
{ {
struct parser *parser = &asm->parser; struct symbol *ref;
if (symtbl_find(&assembler->sym_tbl, &ref, name) == M_SUCCESS) {
if (ref->flag == SYM_GLOBAL && ref->sec == NULL) {
ref->sec = assembler->parser.sec_tbl.current;
ref->index = assembler->parser.sec_tbl.current->count;
return M_SUCCESS;
}
ERROR("redefined symbol '%s'", name);
return M_ERROR;
}
struct symbol symbol;
symbol = (struct symbol) {
.name = "",
.sec = assembler->parser.sec_tbl.current,
.index = assembler->parser.sec_tbl.current->count,
.flag = SYM_LOCAL,
};
strcpy(symbol.name, name);
if (symtbl_push(&assembler->sym_tbl, symbol))
return M_ERROR;
return M_SUCCESS;
}
static int parse_file(struct assembler *assembler)
{
struct parser *parser = &assembler->parser;
while (1) { while (1) {
struct expr expr; struct expr expr;
if (parser_next(parser, &expr)) { if (parser_next(parser, &expr))
break; return M_ERROR;
}
switch (expr.type) { switch (expr.type) {
case EXPR_INS: case EXPR_INS:
@ -152,13 +183,17 @@ static int parse_file(struct assembler *asm)
break; break;
case EXPR_DIRECTIVE: case EXPR_DIRECTIVE:
if (handle_directive(asm, &expr.directive.mips32)) if (handle_directive(assembler,
&expr.directive.mips32))
return M_ERROR;
break;
case EXPR_LABEL:
if (handle_label(assembler, expr.text))
return M_ERROR; return M_ERROR;
break; break;
case EXPR_CONSTANT: case EXPR_CONSTANT:
case EXPR_LABEL:
// nothing needed to be done
break; break;
} }
} }
@ -170,7 +205,7 @@ static int parse_file(struct assembler *asm)
return M_ERROR; return M_ERROR;
} }
asm->meta = meta; assembler->meta = meta;
size_t ptr = 0; size_t ptr = 0;
for (uint32_t i = 0; i < parser->sec_tbl.count; i++) { for (uint32_t i = 0; i < parser->sec_tbl.count; i++) {
@ -182,10 +217,10 @@ static int parse_file(struct assembler *asm)
return M_SUCCESS; return M_SUCCESS;
} }
static int assemble_phdr(struct assembler *asm, Elf32_Phdr **res, static int assemble_phdr(struct assembler *assembler, Elf32_Phdr **res,
uint32_t *res2) uint32_t *res2)
{ {
struct parser *parser = &asm->parser; struct parser *parser = &assembler->parser;
Elf32_Phdr *phdr = malloc(sizeof(Elf32_Phdr) * Elf32_Phdr *phdr = malloc(sizeof(Elf32_Phdr) *
parser->sec_tbl.count); parser->sec_tbl.count);
if (phdr == NULL) { if (phdr == NULL) {
@ -214,28 +249,34 @@ static int assemble_phdr(struct assembler *asm, Elf32_Phdr **res,
return M_SUCCESS; return M_SUCCESS;
} }
static int assemble_symtbl(struct assembler *asm, Elf32_Sym **res, static int assemble_symtab(struct assembler *assembler, Elf32_Sym **res,
uint32_t *res2) uint32_t *res2)
{ {
Elf32_Sym *stbl = malloc(sizeof(Elf32_Sym) * asm->parser.sym_tbl Elf32_Sym *stbl = malloc(sizeof(Elf32_Sym) * assembler->sym_tbl
.count); .count);
size_t size = 0; size_t size = 0;
if (stbl == NULL) if (stbl == NULL)
return M_ERROR; return M_ERROR;
for (uint32_t i = 0; i < asm->parser.sym_tbl.count; i++) { for (uint32_t i = 0; i < assembler->sym_tbl.count; i++) {
struct symbol *sym = &asm->parser.sym_tbl.symbols[i]; struct symbol *sym = &assembler->sym_tbl.symbols[i];
size_t str_off; size_t str_off;
unsigned char bind; unsigned char bind;
unsigned char type = STT_NOTYPE; unsigned char type = STT_NOTYPE;
if (strtbl_write_str(&asm->str_tbl, sym->name, &str_off)) { if (strtbl_write_str(&assembler->str_tbl,
sym->name, &str_off)) {
free(stbl); free(stbl);
return M_ERROR; return M_ERROR;
} }
if (sym->flag != SYM_LOCAL) if (sym->flag == SYM_GLOBAL && sym->sec == NULL) {
ERROR("never defined global symbol '%s'", sym->name);
return M_ERROR;
}
if (sym->flag == SYM_LOCAL)
bind = STB_LOCAL; bind = STB_LOCAL;
else else
bind = STB_GLOBAL; bind = STB_GLOBAL;
@ -246,7 +287,7 @@ static int assemble_symtbl(struct assembler *asm, Elf32_Sym **res,
.st_size = 0, .st_size = 0,
.st_info = ELF32_ST_INFO(bind, type), .st_info = ELF32_ST_INFO(bind, type),
.st_other = ELF32_ST_VISIBILITY(STV_DEFAULT), .st_other = ELF32_ST_VISIBILITY(STV_DEFAULT),
.st_shndx = asm->meta[sym->sec->index].shdr_idx, .st_shndx = 0,
}; };
size = i + 1; size = i + 1;
}; };
@ -257,20 +298,31 @@ static int assemble_symtbl(struct assembler *asm, Elf32_Sym **res,
return M_SUCCESS; return M_SUCCESS;
} }
static int assemble_reltbl_sec(struct assembler *asm, Elf32_Sym *symtbl, static void assemble_symtab_shndx(struct assembler *assembler, Elf32_Sym *tbl)
uint32_t symtbl_len, struct section *sec) {
for (uint32_t i = 0; i < assembler->sym_tbl.count; i++) {
struct symbol *sym = &assembler->sym_tbl.symbols[i];
if (sym->sec != NULL)
tbl[i].st_shndx =
assembler->meta[sym->sec->index].shdr_idx;
}
}
static int assemble_reltbl_sec(struct assembler *assembler, Elf32_Sym *symtab,
uint32_t symtab_len, struct section *sec)
{ {
uint32_t len = 0; uint32_t len = 0;
for (uint32_t i = 0; i < asm->parser.ref_tbl.count; i++) { for (uint32_t i = 0; i < assembler->parser.ref_tbl.count; i++) {
struct reference *ref = &asm->parser.ref_tbl.references[i]; struct reference *ref =
&assembler->parser.ref_tbl.references[i];
if (ref->section->index == sec->index) { if (ref->section->index == sec->index) {
len++; len++;
} }
} }
if (len == 0) { if (len == 0) {
asm->meta[sec->index].reltbl = NULL; assembler->meta[sec->index].reltbl = NULL;
return M_SUCCESS; return M_SUCCESS;
} }
@ -281,11 +333,11 @@ static int assemble_reltbl_sec(struct assembler *asm, Elf32_Sym *symtbl,
return M_ERROR; return M_ERROR;
} }
for (uint32_t i = 0; i < asm->parser.ref_tbl.count; i++) { for (uint32_t i = 0; i < assembler->parser.ref_tbl.count; i++) {
struct reference *ref = &asm->parser.ref_tbl.references[i]; struct reference *ref =
&assembler->parser.ref_tbl.references[i];
struct mips32_instruction *ins = &ref->section-> struct mips32_instruction *ins = &ref->section->
entries[ref->index].ins.mips32; entries[ref->index].ins.mips32;
struct section_meta *meta = &asm->meta[ref->section->index];
if (ref->section->index != sec->index) { if (ref->section->index != sec->index) {
continue; continue;
@ -306,9 +358,10 @@ static int assemble_reltbl_sec(struct assembler *asm, Elf32_Sym *symtbl,
int32_t symidx = -1; int32_t symidx = -1;
for (uint32_t i = 0; i < symtbl_len; i++) { for (uint32_t i = 0; i < symtab_len; i++) {
Elf32_Sym *sym = &symtbl[i]; Elf32_Sym *sym = &symtab[i];
const char *str = &asm->str_tbl.ptr[sym->st_name]; const char *str =
&assembler->str_tbl.ptr[sym->st_name];
if (strcmp(ref->name, str) == 0) { if (strcmp(ref->name, str) == 0) {
symidx = i; symidx = i;
break; break;
@ -324,35 +377,34 @@ static int assemble_reltbl_sec(struct assembler *asm, Elf32_Sym *symtbl,
reltbl[i] = (Elf32_Rela) { reltbl[i] = (Elf32_Rela) {
.r_info = ELF32_R_INFO(symidx, type), .r_info = ELF32_R_INFO(symidx, type),
.r_addend = addend, .r_addend = addend,
.r_offset = meta->v_addr + .r_offset = sec_index(ref->section, ref->index),
sec_index(ref->section, ref->index),
}; };
}; };
asm->meta[sec->index].reltbl_len = len; assembler->meta[sec->index].reltbl_len = len;
asm->meta[sec->index].reltbl = reltbl; assembler->meta[sec->index].reltbl = reltbl;
return M_SUCCESS; return M_SUCCESS;
} }
static int assemble_reltbl(struct assembler *asm, Elf32_Sym *symtbl, static int assemble_reltbl(struct assembler *assembler, Elf32_Sym *symtab,
uint32_t symtbl_len) uint32_t symtab_len)
{ {
for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) { for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) {
struct section *sec = &asm->parser.sec_tbl.sections[i]; struct section *sec = &assembler->parser.sec_tbl.sections[i];
if (assemble_reltbl_sec(asm, symtbl, symtbl_len, sec)) if (assemble_reltbl_sec(assembler, symtab, symtab_len, sec))
return M_ERROR; return M_ERROR;
} }
return M_SUCCESS; return M_SUCCESS;
} }
static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res, static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res,
uint32_t *res2) uint32_t *res2)
{ {
uint32_t max_entries = 4; // symtab, strtab, shstrtab uint32_t max_entries = 4; // symtab, strtab, shstrtab
max_entries += asm->parser.sec_tbl.count; // sections max_entries += assembler->parser.sec_tbl.count; // sections
max_entries += asm->parser.sec_tbl.count; // reltabs per section max_entries += assembler->parser.sec_tbl.count; // reltabs per section
Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * max_entries); Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * max_entries);
@ -360,24 +412,25 @@ static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res,
uint32_t count = 0; uint32_t count = 0;
// eeltables // eeltables
for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) { for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) {
if (asm->meta[i].reltbl == NULL) if (assembler->meta[i].reltbl == NULL)
continue; continue;
struct section *sec = &asm->parser.sec_tbl.sections[i]; struct section *sec = &assembler->parser.sec_tbl.sections[i];
const char *prefix = ".reltab."; const char *prefix = ".reltab.";
char reltab_name[MAX_LEX_LENGTH + 8]; char reltab_name[MAX_LEX_LENGTH + 8];
strcpy(reltab_name, prefix); strcpy(reltab_name, prefix);
strcat(reltab_name, sec->name); strcat(reltab_name, sec->name);
if (strtbl_write_str(&asm->shstr_tbl, reltab_name, &str_off)) { if (strtbl_write_str(&assembler->shstr_tbl,
reltab_name, &str_off)) {
free(shdr); free(shdr);
return M_ERROR; return M_ERROR;
} }
asm->meta[i].reltbl_idx = count; assembler->meta[i].reltbl_idx = count;
shdr[count++] = (Elf32_Shdr) { shdr[count++] = (Elf32_Shdr) {
.sh_name = str_off, .sh_name = str_off,
.sh_type = SHT_RELA, .sh_type = SHT_RELA,
@ -393,17 +446,17 @@ static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res,
} }
// for each section // for each section
for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) { for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) {
struct section *sec = &asm->parser.sec_tbl.sections[i]; struct section *sec = &assembler->parser.sec_tbl.sections[i];
char name[MAX_LEX_LENGTH+1] = "."; char name[MAX_LEX_LENGTH+1] = ".";
strcat(name, sec->name); strcat(name, sec->name);
if (strtbl_write_str(&asm->shstr_tbl, name, &str_off)) { if (strtbl_write_str(&assembler->shstr_tbl, name, &str_off)) {
free(shdr); free(shdr);
return M_ERROR; return M_ERROR;
} }
asm->meta[i].shdr_idx = count; assembler->meta[i].shdr_idx = count;
if (asm->meta[i].reltbl != NULL) if (assembler->meta[i].reltbl != NULL)
shdr[asm->meta[i].reltbl_idx].sh_info = count; shdr[assembler->meta[i].reltbl_idx].sh_info = count;
shdr[count++] = (Elf32_Shdr){ shdr[count++] = (Elf32_Shdr){
.sh_name = str_off, .sh_name = str_off,
.sh_type = SHT_PROGBITS, .sh_type = SHT_PROGBITS,
@ -420,12 +473,12 @@ static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res,
} }
// symbol table // symbol table
if (strtbl_write_str(&asm->shstr_tbl, ".symtab", &str_off)) { if (strtbl_write_str(&assembler->shstr_tbl, ".symtab", &str_off)) {
free(shdr); free(shdr);
return M_ERROR; return M_ERROR;
} }
asm->symtbl_idx = count; assembler->symtab_idx = count;
shdr[count++] = (Elf32_Shdr) { shdr[count++] = (Elf32_Shdr) {
.sh_name = str_off, .sh_name = str_off,
.sh_type = SHT_SYMTAB, .sh_type = SHT_SYMTAB,
@ -440,12 +493,12 @@ static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res,
}; };
// string table // string table
if (strtbl_write_str(&asm->shstr_tbl, ".strtab", &str_off)) { if (strtbl_write_str(&assembler->shstr_tbl, ".strtab", &str_off)) {
free(shdr); free(shdr);
return M_ERROR; return M_ERROR;
} }
asm->strtbl_idx = count; assembler->strtbl_idx = count;
shdr[count++] = (Elf32_Shdr) { shdr[count++] = (Elf32_Shdr) {
.sh_name = str_off, .sh_name = str_off,
.sh_type = SHT_STRTAB, .sh_type = SHT_STRTAB,
@ -460,12 +513,12 @@ static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res,
}; };
// sh string table // sh string table
if (strtbl_write_str(&asm->shstr_tbl, ".shstrtab", &str_off)) { if (strtbl_write_str(&assembler->shstr_tbl, ".shstrtab", &str_off)) {
free(shdr); free(shdr);
return M_ERROR; return M_ERROR;
} }
asm->shstrtbl_idx = count; assembler->shstrtbl_idx = count;
shdr[count++] = (Elf32_Shdr) { shdr[count++] = (Elf32_Shdr) {
.sh_name = str_off, .sh_name = str_off,
.sh_type = SHT_STRTAB, .sh_type = SHT_STRTAB,
@ -479,10 +532,11 @@ static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res,
.sh_entsize = 0, .sh_entsize = 0,
}; };
for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) { for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) {
if (asm->meta[i].reltbl == NULL) if (assembler->meta[i].reltbl == NULL)
continue; continue;
shdr[asm->meta[i].reltbl_idx].sh_link = asm->symtbl_idx; shdr[assembler->meta[i].reltbl_idx].sh_link =
assembler->symtab_idx;
} }
*res = shdr; *res = shdr;
@ -491,34 +545,147 @@ static int assemble_shdr(struct assembler *asm, Elf32_Shdr **res,
return M_SUCCESS; return M_SUCCESS;
} }
static int assemble_file(struct assembler *asm) static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr)
{ {
Elf32_Phdr *phdr; Elf32_Shdr *shdr = (Elf32_Shdr *) assembler->shdr;
Elf32_Shdr *shdr; Elf32_Phdr *phdr = (Elf32_Phdr *) assembler->phdr;
Elf32_Sym *symtbl; uint32_t ptr = 0;
uint32_t phdr_len;
uint32_t shdr_len;
uint32_t symtbl_len;
if (assemble_symtbl(asm, &symtbl, &symtbl_len)) // we must now correct offets and sizes inside the ehdr, phdr,
return M_ERROR; // and shdr
ptr += sizeof(Elf32_Ehdr);
if (assemble_reltbl(asm, symtbl, symtbl_len)) { // phdr
free(symtbl); ehdr->e_phoff = ptr;
return M_ERROR; ptr += assembler->phdr_len * sizeof(Elf32_Phdr);
};
if (assemble_phdr(asm, &phdr, &phdr_len)) { // reltbls
free(symtbl); for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) {
if (assembler->meta[i].reltbl == NULL)
continue;
int idx = assembler->meta[i].reltbl_idx;
int len = assembler->meta[i].reltbl_len;
shdr[idx].sh_offset = ptr;
shdr[idx].sh_size = len * sizeof(Elf32_Rela);
ptr += len * sizeof(Elf32_Rela);
}
// sections
for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) {
int idx = assembler->meta[i].shdr_idx;
phdr[i].p_offset = ptr;
phdr[i].p_vaddr = ptr;
phdr[i].p_paddr = ptr;
shdr[idx].sh_offset = ptr;
shdr[idx].sh_size = phdr[i].p_filesz;
shdr[idx].sh_addr = phdr[i].p_vaddr;
shdr[idx].sh_addralign = phdr[i].p_align;
ptr += phdr[i].p_filesz;
}
// symtab
shdr[assembler->symtab_idx].sh_offset = ptr;
shdr[assembler->symtab_idx].sh_link = assembler->strtbl_idx;
shdr[assembler->symtab_idx].sh_size =
assembler->symtab_len * sizeof(Elf32_Sym);
ptr += assembler->symtab_len * sizeof(Elf32_Sym);
// strtbl
shdr[assembler->strtbl_idx].sh_offset = ptr;
shdr[assembler->strtbl_idx].sh_size = assembler->str_tbl.size;
ptr += assembler->str_tbl.size;
// shstrtbl
shdr[assembler->shstrtbl_idx].sh_offset = ptr;
shdr[assembler->shstrtbl_idx].sh_size = assembler->shstr_tbl.size;
ptr += assembler->shstr_tbl.size;
// shdr
ehdr->e_shoff = ptr;
}
static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr,
const char *path)
{
FILE *out = fopen(path, "w");
if (out == NULL) {
ERROR("cannot write '%s'", path);
return M_ERROR; return M_ERROR;
} }
if (assemble_shdr(asm, &shdr, &shdr_len)) { // ehdr
free(symtbl); fwrite(ehdr, sizeof(Elf32_Ehdr), 1, out);
free(phdr);
// phdr
fwrite(assembler->phdr, sizeof(Elf32_Phdr), assembler->phdr_len, out);
// reltbls
for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) {
if (assembler->meta[i].reltbl == NULL)
continue;
void *ptr = assembler->meta[i].reltbl;
int len = assembler->meta[i].reltbl_len;
fwrite(ptr, sizeof(Elf32_Rela), len, out);
}
// sections
for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) {
struct section *sec = &assembler->parser.sec_tbl.sections[i];
for (uint32_t j = 0; j < sec->count; j++) {
struct section_entry *entry = &sec->entries[j];
size_t size = entry->size;
fwrite(&entry->data, size, 1, out);
while(size % sec->alignment) {
uint8_t zero = 0;
fwrite(&zero, 1, 1, out);
size++;
}
}
}
// sym tbl
fwrite(assembler->symtab, sizeof(Elf32_Sym),
assembler->symtab_len, out);
// str tbl
fwrite(assembler->str_tbl.ptr, assembler->str_tbl.size, 1, out);
// shstr tbl
fwrite(assembler->shstr_tbl.ptr, assembler->shstr_tbl.size, 1, out);
// shdr
fwrite(assembler->shdr, sizeof(Elf32_Shdr), assembler->shdr_len, out);
fclose(out);
return M_SUCCESS;
}
static int assemble_elf(struct assembler *assembler, const char *out)
{
if (assemble_symtab(assembler, (Elf32_Sym **) &assembler->symtab,
&assembler->symtab_len))
return M_ERROR;
if (assemble_reltbl(assembler, assembler->symtab,
assembler->symtab_len)) {
return M_ERROR; return M_ERROR;
}; };
if (assemble_phdr(assembler, (Elf32_Phdr **) &assembler->phdr,
&assembler->phdr_len)) {
return M_ERROR;
}
if (assemble_shdr(assembler, (Elf32_Shdr **) &assembler->shdr,
&assembler->shdr_len)) {
return M_ERROR;
};
// update the symbol tables with their given section
assemble_symtab_shndx(assembler, assembler->symtab);
Elf32_Ehdr ehdr = { Elf32_Ehdr ehdr = {
.e_ident = { .e_ident = {
[EI_MAG0] = ELFMAG0, [EI_MAG0] = ELFMAG0,
@ -541,138 +708,39 @@ static int assemble_file(struct assembler *asm)
.e_flags = EF_MIPS_ARCH_32R6, .e_flags = EF_MIPS_ARCH_32R6,
.e_ehsize = sizeof(Elf32_Ehdr), .e_ehsize = sizeof(Elf32_Ehdr),
.e_phentsize = sizeof(Elf32_Phdr), .e_phentsize = sizeof(Elf32_Phdr),
.e_phnum = phdr_len, .e_phnum = assembler->phdr_len,
.e_shentsize = sizeof(Elf32_Shdr), .e_shentsize = sizeof(Elf32_Shdr),
.e_shnum = shdr_len, .e_shnum = assembler->shdr_len,
.e_shstrndx = asm->shstrtbl_idx, .e_shstrndx = assembler->shstrtbl_idx,
}; };
uint32_t ptr = 0; update_offsets(assembler, &ehdr);
// we must now correct offets and sizes inside the ehdr, phdr, if (write_file(assembler, &ehdr, out))
// and shdr return M_ERROR;
ptr += sizeof(Elf32_Ehdr);
// phdr
ehdr.e_phoff = ptr;
ptr += phdr_len * sizeof(Elf32_Phdr);
// reltbls
for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) {
if (asm->meta[i].reltbl == NULL)
continue;
int idx = asm->meta[i].reltbl_idx;
int len = asm->meta[i].reltbl_len;
shdr[idx].sh_offset = ptr;
shdr[idx].sh_size = len * sizeof(Elf32_Rela);
ptr += len * sizeof(Elf32_Rela);
}
// sections
for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) {
int idx = asm->meta[i].shdr_idx;
phdr[i].p_offset = ptr;
phdr[i].p_vaddr = ptr;
phdr[i].p_paddr = ptr;
shdr[idx].sh_offset = ptr;
shdr[idx].sh_size = phdr[i].p_filesz;
shdr[idx].sh_addr = phdr[i].p_vaddr;
shdr[idx].sh_addralign = phdr[i].p_align;
ptr += phdr[i].p_filesz;
}
// symtbl
shdr[asm->symtbl_idx].sh_offset = ptr;
shdr[asm->symtbl_idx].sh_link = asm->strtbl_idx;
shdr[asm->symtbl_idx].sh_size = symtbl_len * sizeof(Elf32_Sym);
ptr += symtbl_len * sizeof(Elf32_Sym);
// strtbl
shdr[asm->strtbl_idx].sh_offset = ptr;
shdr[asm->strtbl_idx].sh_size = asm->str_tbl.size;
ptr += asm->str_tbl.size;
// shstrtbl
shdr[asm->shstrtbl_idx].sh_offset = ptr;
shdr[asm->shstrtbl_idx].sh_size = asm->shstr_tbl.size;
ptr += asm->shstr_tbl.size;
// shdr
ehdr.e_shoff = ptr;
FILE *out = fopen("out.o", "w");
// ehdr
fwrite(&ehdr, sizeof(Elf32_Ehdr), 1, out);
// phdr
fwrite(phdr, sizeof(Elf32_Phdr), phdr_len, out);
// reltbls
for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) {
if (asm->meta[i].reltbl == NULL)
continue;
void *ptr = asm->meta[i].reltbl;
int len = asm->meta[i].reltbl_len;
asm->meta[i].reltbl = NULL;
fwrite(ptr, sizeof(Elf32_Rela), len, out);
}
// sections
for (uint32_t i = 0; i < asm->parser.sec_tbl.count; i++) {
struct section *sec = &asm->parser.sec_tbl.sections[i];
for (uint32_t j = 0; j < sec->count; j++) {
struct section_entry *entry = &sec->entries[j];
size_t size = entry->size;
fwrite(&entry->data, size, 1, out);
while(size % sec->alignment) {
uint8_t zero = 0;
fwrite(&zero, 1, 1, out);
size++;
}
}
}
// sym tbl
fwrite(symtbl, sizeof(Elf32_Sym), symtbl_len, out);
// str tbl
fwrite(asm->str_tbl.ptr, asm->str_tbl.size, 1, out);
// shstr tbl
fwrite(asm->shstr_tbl.ptr, asm->shstr_tbl.size, 1, out);
// shdr
fwrite(shdr, sizeof(Elf32_Shdr), shdr_len, out);
// cleanip
fclose(out);
free(shdr);
free(phdr);
free(symtbl);
return M_SUCCESS; return M_SUCCESS;
} }
int assemble_file_mips32(char *path) int assemble_file_mips32(struct assembler_arguments args)
{ {
struct assembler asm; struct assembler assembler;
int res = M_SUCCESS; int res = M_SUCCESS;
current_file = path; current_file = args.in_file;
if (assembler_init(&asm, path)) if (assembler_init(&assembler, args.in_file))
return M_ERROR; return M_ERROR;
mips32_parser_init(&asm.parser); mips32_parser_init(&assembler.parser);
if (res == M_SUCCESS) if (res == M_SUCCESS)
res = parse_file(&asm); res = parse_file(&assembler);
if (res == M_SUCCESS) if (res == M_SUCCESS)
res = assemble_file(&asm); res = assemble_elf(&assembler, args.out_file);
assembler_free(&asm); assembler_free(&assembler);
return res; return res;
} }

View file

@ -19,7 +19,7 @@ static int lex_next(struct lexer *lexer)
int c = getc(lexer->file); int c = getc(lexer->file);
if (c == '\n') { if (c == '\n') {
lexer->x = 0; lexer->x = 1;
lexer->y++; lexer->y++;
} else { } else {
lexer->x++; lexer->x++;
@ -296,13 +296,13 @@ int lexer_init(const char *path, struct lexer *lexer)
{ {
FILE *file = fopen(path, "r"); FILE *file = fopen(path, "r");
if (file == NULL) { if (file == NULL) {
ERROR_POS(pos, "cannot file '%s'", path); ERROR("cannot read '%s'", path);
return M_ERROR; return M_ERROR;
} }
lexer->file = file; lexer->file = file;
lexer->peek = EOF; lexer->peek = EOF;
lexer->x = 0; lexer->x = 1;
lexer->y = 0; lexer->y = 1;
return M_SUCCESS; return M_SUCCESS;
} }

View file

@ -1,9 +1,67 @@
#include <unistd.h>
#include <merror.h>
#include <string.h>
#include "asm.h" #include "asm.h"
#include "mips.h"
void help(void) {
printf("usage: masm [options] source.asm\n\n");
printf("options:\n");
printf("\t-h\t\tprints this help message\n");
printf("\t-i isa\t\tselect a ISA to assemble to (mips32)\n");
printf("\t-o output\tselect a output file destination\n");
}
int main(int argc, char **argv) { int main(int argc, char **argv) {
if (argc != 2) struct assembler_arguments args = {
return 0; .isa = ISA_MIPS32,
.in_file = NULL,
.out_file = NULL,
};
return assemble_file_mips32(argv[1]); int c;
while ((c = getopt(argc, argv, "ho:i:")) != 1) {
switch(c) {
case 'h':
help();
return M_SUCCESS;
case 'o':
args.out_file = optarg;
break;
case 'i':
if (strcmp(optarg, "mips32") == 0) {
args.isa = ISA_MIPS32;
} else {
ERROR("invalid isa '%s'", optarg);
return M_ERROR;
}
break;
case '?':
return M_ERROR;
default:
goto next;
}
}
next:
if (optind < argc - 1) {
ERROR("too many source files passed");
return M_ERROR;
}
if (optind >= argc) {
ERROR("no source files passed");
return M_ERROR;
}
args.in_file = argv[optind];
if (args.out_file == NULL) {
args.out_file = "out.o";
}
return assemble_file(args);
} }

View file

@ -110,29 +110,6 @@ static int parse_label(struct parser *parser,
return M_ERROR; return M_ERROR;
strcpy(expr->text, token.text); strcpy(expr->text, token.text);
struct symbol *ref;
if (symtbl_find(&parser->sym_tbl, &ref, token.text) == M_SUCCESS) {
if (ref->flag == SYM_GLOBAL && ref->sec == NULL) {
ref->sec = parser->sec_tbl.current;
ref->index = parser->sec_tbl.current->count;
return M_SUCCESS;
}
ERROR_POS(token, "redefined symbol '%s'", token.text);
return M_ERROR;
}
struct symbol symbol;
symbol = (struct symbol) {
.name = "",
.sec = parser->sec_tbl.current,
.index = parser->sec_tbl.current->count,
.flag = SYM_LOCAL,
};
strcpy(symbol.name, token.text);
if (symtbl_push(&parser->sym_tbl, symbol))
return M_ERROR;
return M_SUCCESS; return M_SUCCESS;
} }
@ -184,8 +161,6 @@ int parser_init(struct lexer *lexer, struct parser *parser)
{ {
parser->lexer = lexer; parser->lexer = lexer;
parser->peek.type = TOK_EOF; parser->peek.type = TOK_EOF;
if (symtbl_init(&parser->sym_tbl))
return M_ERROR;
if (sectbl_init(&parser->sec_tbl)) if (sectbl_init(&parser->sec_tbl))
return M_ERROR; return M_ERROR;
if (reftbl_init(&parser->ref_tbl)) if (reftbl_init(&parser->ref_tbl))
@ -196,7 +171,6 @@ int parser_init(struct lexer *lexer, struct parser *parser)
void parser_free(struct parser *parser) void parser_free(struct parser *parser)
{ {
symtbl_free(&parser->sym_tbl);
sectbl_free(&parser->sec_tbl); sectbl_free(&parser->sec_tbl);
reftbl_free(&parser->ref_tbl); reftbl_free(&parser->ref_tbl);
} }

View file

@ -35,33 +35,6 @@ struct expr {
}; };
}; };
enum symbol_flag {
SYM_LOCAL,
SYM_GLOBAL,
SYM_EXTERNAL,
};
struct symbol {
char name[MAX_LEX_LENGTH];
uint32_t index;
struct section *sec;
enum symbol_flag flag;
};
struct symbol_table {
uint32_t count;
uint32_t len;
struct symbol *symbols;
};
int symtbl_init(struct symbol_table *sym_tbl);
void symtbl_free(struct symbol_table *sym_tbl);
int symtbl_push(struct symbol_table *sym_tbl, struct symbol sym);
int symtbl_find(struct symbol_table *sym_tbl, struct symbol **sym,
const char name[MAX_LEX_LENGTH]);
enum section_entry_type { enum section_entry_type {
ENT_INS, ENT_INS,
ENT_WORD, ENT_WORD,
@ -143,9 +116,6 @@ struct parser {
// sections // sections
struct section_table sec_tbl; struct section_table sec_tbl;
// symbols
struct symbol_table sym_tbl;
// references // references
struct reference_table ref_tbl; struct reference_table ref_tbl;

View file

@ -3,7 +3,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "parse.h" #include "asm.h"
#define SYMTBL_INIT_LEN 24 #define SYMTBL_INIT_LEN 24