diff --git a/include/melf.h b/include/melf.h index 095518b..e7311cd 100644 --- a/include/melf.h +++ b/include/melf.h @@ -32,7 +32,7 @@ static const Elf32_Ehdr MIPS_ELF_EHDR = .e_machine = B16(EM_MIPS), .e_version = B32(EV_CURRENT), .e_entry = 0x00, - .e_flags = B32(EF_MIPS_ARCH_32R6), + .e_flags = 0x00, // B32(EF_MIPS_ARCH_32R6), .e_ehsize = B16(sizeof(Elf32_Ehdr)), .e_phentsize = B16(sizeof(Elf32_Phdr)), .e_shentsize = B16(sizeof(Elf32_Shdr)), diff --git a/include/merror.h b/include/merror.h index de47ed8..4b32159 100644 --- a/include/merror.h +++ b/include/merror.h @@ -8,8 +8,8 @@ /* Error codes */ #define M_SUCCESS 0 -#define M_EOF 1 -#define M_ERROR -1 +#define M_ERROR 1 +#define M_EOF 2 #define __DEBUG 1 #define __WARNING 2 diff --git a/include/mips.h b/include/mips.h deleted file mode 100644 index 9a1b204..0000000 --- a/include/mips.h +++ /dev/null @@ -1,467 +0,0 @@ -/* Copyright (c) 2024 Freya Murphy */ - -#ifndef __MIPS_H__ -#define __MIPS_H__ - -#include -#include - -/* all mips registers $0-$31 */ -enum mips_register { - MIPS_REG_ZERO = 0, - MIPS_REG_AT = 1, - MIPS_REG_V0 = 2, - MIPS_REG_V1 = 3, - MIPS_REG_A0 = 4, - MIPS_REG_A1 = 5, - MIPS_REG_A2 = 6, - MIPS_REG_A3 = 7, - MIPS_REG_T0 = 8, - MIPS_REG_T1 = 9, - MIPS_REG_T2 = 10, - MIPS_REG_T3 = 11, - MIPS_REG_T4 = 12, - MIPS_REG_T5 = 13, - MIPS_REG_T6 = 14, - MIPS_REG_T7 = 15, - MIPS_REG_S0 = 16, - MIPS_REG_S1 = 17, - MIPS_REG_S2 = 18, - MIPS_REG_S3 = 19, - MIPS_REG_S4 = 20, - MIPS_REG_S5 = 21, - MIPS_REG_S6 = 22, - MIPS_REG_S7 = 23, - MIPS_REG_T8 = 24, - MIPS_REG_T9 = 25, - MIPS_REG_K0 = 26, - MIPS_REG_K1 = 27, - MIPS_REG_GP = 28, - MIPS_REG_SP = 29, - MIPS_REG_FP = 30, - MIPS_REG_RA = 31, -}; - -/* mips instructions */ -enum mips_instruction_type { - MIPS_INS_ADD, - MIPS_INS_ADDI, - MIPS_INS_ADDIU, - MIPS_INS_ADDU, - MIPS_INS_AND, - MIPS_INS_ANDI, - MIPS_INS_BAL, - MIPS_INS_BALC, - MIPS_INS_BC, - MIPS_INS_BEQ, - MIPS_INS_BEQL, - MIPS_INS_BGEZ, - MIPS_INS_BGEZAL, - MIPS_INS_BGEZALL, - MIPS_INS_BGEZL, - MIPS_INS_BGTZ, - MIPS_INS_BGTZL, - MIPS_INS_BLEZ, - MIPS_INS_BLEZL, - MIPS_INS_BLTZ, - MIPS_INS_BLTZAL, - MIPS_INS_BLTZALL, - MIPS_INS_BLTZL, - MIPS_INS_BNE, - MIPS_INS_BNEL, - MIPS_INS_DIV, - MIPS_INS_MOD, - MIPS_INS_DIVU, - MIPS_INS_MODU, - MIPS_INS_J, - MIPS_INS_JAL, - MIPS_INS_JALR, - MIPS_INS_JALX, - MIPS_INS_JR, - MIPS_INS_LB, - MIPS_INS_LBU, - MIPS_INS_LH, - MIPS_INS_LHU, - MIPS_INS_LUI, - MIPS_INS_LW, - MIPS_INS_MFHI, - MIPS_INS_MFLO, - MIPS_INS_MTHI, - MIPS_INS_MTLO, - MIPS_INS_MUL, - MIPS_INS_MUH, - MIPS_INS_MULU, - MIPS_INS_MUHU, - MIPS_INS_SB, - MIPS_INS_SH, - MIPS_INS_SW, - MIPS_INS_SLL, - MIPS_INS_SLLV, - MIPS_INS_SLT, - MIPS_INS_SLTI, - MIPS_INS_SLTIU, - MIPS_INS_SLTU, - MIPS_INS_SRA, - MIPS_INS_SRAV, - MIPS_INS_SRL, - MIPS_INS_SRLV, - MIPS_INS_SUB, - MIPS_INS_SUBU, - MIPS_INS_SYSCALL, - MIPS_INS_OR, - MIPS_INS_ORI, - MIPS_INS_NOR, - MIPS_INS_XOR, - MIPS_INS_XORI, - // gets the size of the enum - __MIPS_INS_LEN, -}; - -union mips_instruction_data { - /* raw ins */ - uint32_t raw : 32; - /* register type */ - struct { - uint32_t funct : 6; - uint32_t shamt : 5; - uint32_t rd : 5; - uint32_t rt : 5; - uint32_t rs : 5; - uint32_t op : 6; - }; - /* immediate type */ - struct { - uint32_t immd : 16; - uint32_t : 16; - }; - /* jump type */ - struct { - uint32_t target : 26; - uint32_t : 6; - }; - /* branch compact */ - struct { - int32_t offs26 : 26; - uint32_t : 6; - }; - /* branch */ - struct { - int32_t offset : 16; - uint32_t bfunct : 5; - uint32_t : 11; - }; -} __attribute__((packed)); - -/* mips instruction information */ -struct mips_instruction { - // metadata - enum mips_instruction_type type; - const char *name; - - // data - union mips_instruction_data data; -}; - -#define MIPS_INS(ins, ...) \ - [MIPS_INS_ ##ins] = { \ - MIPS_INS_ ##ins, \ - #ins, \ - .data = { __VA_ARGS__ } \ - }, \ - -static const struct mips_instruction mips_instructions[] = { -/* ADD - add */ -#define MIPS_OP_SPECIAL 0b000000 -#define MIPS_FUNCT_ADD 0b100000 -MIPS_INS(ADD, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_ADD) - -/* ADDI - add immediate */ -#define MIPS_OP_ADDI 0b001000 -MIPS_INS(ADDI, .op = MIPS_OP_ADDI) - -/* ADDIU - add immediate unsigned */ -#define MIPS_OP_ADDIU 0b001001 -MIPS_INS(ADDIU, .op = MIPS_OP_ADDIU) - -/* ADDU - add unsigned */ -#define MIPS_FUNCT_ADDU 0b100001 -MIPS_INS(ADDU, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_ADDU) - -/* AND - and */ -#define MIPS_FUNCT_AND 0b100100 -MIPS_INS(AND, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_AND) - -/* ANDI - and immediate */ -#define MIPS_OP_ANDI 0b001100 -MIPS_INS(ANDI, .op = MIPS_OP_ANDI) - -/* BAL - branch and link */ -#define MIPS_OP_REGIMM 0b000001 -#define MIPS_FUNCT_BAL 0b10001 -MIPS_INS(BAL, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BAL) - -/* BALC - branch and link, compact */ -#define MIPS_OP_BALC 0b111010 -MIPS_INS(BALC, .op = MIPS_OP_BALC) - -/* BC - branch, compact */ -#define MIPS_OP_BC 0b110010 -MIPS_INS(BC, .op = MIPS_OP_BC) - -/* BEQ - branch on equal */ -#define MIPS_OP_BEQ 0b000100 -MIPS_INS(BEQ, .op = MIPS_OP_BEQ) - -/* BEQL - branch on equal likely */ -#define MIPS_OP_BEQL 0b010100 -MIPS_INS(BEQL, .op = MIPS_OP_BEQL) - -/* BGEZ - branch on greater than or equal to zero */ -#define MIPS_FUNCT_BGEZ 0b00001 -MIPS_INS(BGEZ, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BGEZ) - -/* BGEZAL - branch on greater than or equal to zero and link */ -#define MIPS_FUNCT_BGEZAL 0b10001 -MIPS_INS(BGEZAL, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BGEZAL) - -/* BGEZAL - branch on greater than or equal to zero and link likely */ -#define MIPS_FUNCT_BGEZALL 0b10011 -MIPS_INS(BGEZALL, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BGEZALL) - -/* BGEZL - branch on greater than or equal to zero likely */ -#define MIPS_FUNCT_BGEZL 0b00011 -MIPS_INS(BGEZL, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BGEZL) - -/* BGTZ - branch on greater than zero */ -#define MIPS_OP_BGTZ 0b000111 -MIPS_INS(BGTZ, .op = MIPS_OP_BGTZ) - -/* BGTZL - branch on greater than zero likely */ -#define MIPS_OP_BGTZL 0b010111 -MIPS_INS(BGTZL, .op = MIPS_OP_BGTZL) - -/* BLEZ - branch on less than or equal to zero */ -#define MIPS_OP_BLEZ 0b000110 -MIPS_INS(BLEZ, .op = MIPS_OP_BLEZ) - -/* BLEZL - branch on less than or equal to zero likely */ -#define MIPS_OP_BLEZL 0b010110 -MIPS_INS(BLEZL, .op = MIPS_OP_BLEZL) - -/* BLTZ - branch on less than zero */ -#define MIPS_FUNCT_BLTZ 0b00000 -MIPS_INS(BLTZ, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BLTZ) - -/* BLTZAL - branch on less than zero and link */ -#define MIPS_FUNCT_BLTZAL 0b10000 -MIPS_INS(BLTZAL, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BLTZAL) - -/* BLTZALL - branch on less than zero and link likely */ -#define MIPS_FUNCT_BLTZALL 0b10010 -MIPS_INS(BLTZALL, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BLTZALL) - -/* BLTZL - branch on less than zero likely */ -#define MIPS_FUNCT_BLTZL 0b00010 -MIPS_INS(BLTZL, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BLTZL) - -/* BNE - branch on not equal */ -#define MIPS_OP_BNE 0b000101 -MIPS_INS(BNE, .op = MIPS_OP_BNE) - -/* BNEL - branch on not equal likely */ -#define MIPS_OP_BNEL 0b010101 -MIPS_INS(BNEL, .op = MIPS_OP_BNEL) - -/* DIV - divide */ -#define MIPS_FUNCT_SOP32 0b011010 -#define MIPS_SOP32_DIV 0b00010 -MIPS_INS(DIV, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP32_DIV, - .funct = MIPS_FUNCT_SOP32) - -/* MOD - modulo */ -#define MIPS_SOP32_MOD 0b00011 -MIPS_INS(MOD, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP32_MOD, - .funct = MIPS_FUNCT_SOP32) - -/* DIVU - divide unsigned */ -#define MIPS_FUNCT_SOP33 0b011011 -#define MIPS_SOP33_DIVU 0b00010 -MIPS_INS(DIVU, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP33_DIVU, - .funct = MIPS_FUNCT_SOP33) - -/* MODU - modulo unsigned */ -#define MIPS_SOP33_MODU 0b00011 -MIPS_INS(MODU, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP33_MODU, - .funct = MIPS_FUNCT_SOP33) - -/* J - jump */ -#define MIPS_OP_J 0b000010 -MIPS_INS(J, .op = MIPS_OP_J) - -/* JAL - jump and link */ -#define MIPS_OP_JAL 0b000011 -MIPS_INS(JAL, .op = MIPS_OP_JAL) - -/* JALR - jump and link register */ -#define MIPS_FUNCT_JALR 0b001001 -MIPS_INS(JALR, .rd = MIPS_REG_RA, .op = MIPS_OP_SPECIAL, - .funct = MIPS_FUNCT_JALR) - -/* JALX - jump and link exchange */ -#define MIPS_OP_JALX 0b011101 -MIPS_INS(JALX, .op = MIPS_OP_JALX) - -/* JR - jump register */ -#define MIPS_FUNCT_JR 0b001000 -MIPS_INS(JR, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_JR) - -/* LB - load byte */ -#define MIPS_OP_LB 0b100000 -MIPS_INS(LB, .op = MIPS_OP_LB) - -/* LBU - load byte unsigned */ -#define MIPS_OP_LBU 0b100100 -MIPS_INS(LBU, .op = MIPS_OP_LBU) - -/* LH - load half */ -#define MIPS_OP_LH 0b100001 -MIPS_INS(LH, .op = MIPS_OP_LH) - -/* LHU - load half unsigned */ -#define MIPS_OP_LHU 0b100101 -MIPS_INS(LHU, .op = MIPS_OP_LHU) - -/* LUI - load upper immediate */ -#define MIPS_OP_LUI 0b001111 -MIPS_INS(LUI, .op = MIPS_OP_LUI) - -/* LW - load word */ -#define MIPS_OP_LW 0b100011 -MIPS_INS(LW, .op = MIPS_OP_LW) - -/* MFHI - move from hi */ -#define MIPS_FUNCT_MFHI 0b010000 -MIPS_INS(MFHI, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_MFHI) - -/* MFLO - move from hi */ -#define MIPS_FUNCT_MFLO 0b010010 -MIPS_INS(MFLO, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_MFLO) - -/* MTHI - move from hi */ -#define MIPS_FUNCT_MTHI 0b010001 -MIPS_INS(MTHI, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_MTHI) - -/* MTLO - move from hi */ -#define MIPS_FUNCT_MTLO 0b010011 -MIPS_INS(MTLO, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_MTLO) - -/* MUL - multiply low word */ -#define MIPS_FUNCT_SOP30 0b011000 -#define MIPS_SOP30_MUL 0b00010 -MIPS_INS(MUL, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP30_MUL, - .funct = MIPS_FUNCT_SOP30) - -/* MUH - multiply high word */ -#define MIPS_SOP30_MUH 0b00011 -MIPS_INS(MUH, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP30_MUH, - .funct = MIPS_FUNCT_SOP30) - -/* MULU - multiply low word unsigned */ -#define MIPS_FUNCT_SOP31 0b011001 -#define MIPS_SOP31_MULU 0b00010 -MIPS_INS(MULU, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP31_MULU, - .funct = MIPS_FUNCT_SOP31) - -/* MUHU - multiply high word unsgined */ -#define MIPS_SOP31_MUHU 0b00011 -MIPS_INS(MUHU, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP31_MUHU, - .funct = MIPS_FUNCT_SOP31) - -/* SB - store byte */ -#define MIPS_OP_SB 0b101000 -MIPS_INS(SB, .op = MIPS_OP_SB) - -/* SH - store half */ -#define MIPS_OP_SH 0b101001 -MIPS_INS(SH, .op = MIPS_OP_SH) - -/* SW - store word */ -#define MIPS_OP_SW 0b101011 -MIPS_INS(SW, .op = MIPS_OP_SW) - -/* SLL - shift left logical */ -#define MIPS_FUNCT_SLL 0b000000 -MIPS_INS(SLL, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SLL) - -/* SLLV - shift left logical variable */ -#define MIPS_FUNCT_SLLV 0b000100 -MIPS_INS(SLLV, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SLLV) - -/* SLT - set less then */ -#define MIPS_FUNCT_SLT 0b101010 -MIPS_INS(SLT, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SLT) - -/* SLTI - set less then immediate */ -#define MIPS_OP_SLTI 0b001010 -MIPS_INS(SLTI, .op = MIPS_OP_SLTI) - -/* SLTIU - set less then imemdiate unsigned */ -#define MIPS_OP_SLTIU 0b001011 -MIPS_INS(SLTIU, .op = MIPS_OP_SLTIU) - -/* SLTU - set less than unsigned */ -#define MIPS_FUNCT_SLTU 0b101011 -MIPS_INS(SLTU, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SLTU) - -/* SRA - shift right arithmetic */ -#define MIPS_FUNCT_SRA 0b000011 -MIPS_INS(SRA, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SRA) - -/* SRAV - shift right arithmetic variable */ -#define MIPS_FUNCT_SRAV 0b000111 -MIPS_INS(SRAV, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SRAV) - -/* SRL - shift right logical */ -#define MIPS_FUNCT_SRL 0b000010 -MIPS_INS(SRL, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SRL) - -/* SRLV - shift right logical variable */ -#define MIPS_FUNCT_SRLV 0b000110 -MIPS_INS(SRLV, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SRLV) - -/* SUB - subtract */ -#define MIPS_FUNCT_SUB 0b100010 -MIPS_INS(SUB, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SUB) - -/* SUBU - subtract unsigned */ -#define MIPS_FUNCT_SUBU 0b100011 -MIPS_INS(SUBU, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SUBU) - -/* SYSCALL - syscall */ -#define MIPS_FUNCT_SYSCALL 0b001100 -MIPS_INS(SYSCALL, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SYSCALL) - -/* OR - or */ -#define MIPS_FUNCT_OR 0b100101 -MIPS_INS(OR, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_OR) - -/* ORI - or imemdiate */ -#define MIPS_OP_ORI 0b001101 -MIPS_INS(ORI, .op = MIPS_OP_ORI) - -/* NOR - not or */ -#define MIPS_FUNCT_NOR 0b100111 -MIPS_INS(NOR, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_NOR) - -/* XOR - exclusive or */ -#define MIPS_FUNCT_XOR 0b100110 -MIPS_INS(XOR, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_XOR) - -/* XORI - exclusive or immediate */ -#define MIPS_OP_XORI 0b001110 -MIPS_INS(XORI, .op = MIPS_OP_XORI) -}; - -#undef MIPS_INS - -#endif /* __MIPS_H__ */ diff --git a/include/mips32.h b/include/mips32.h new file mode 100644 index 0000000..e2b86b7 --- /dev/null +++ b/include/mips32.h @@ -0,0 +1,142 @@ +/* Copyright (c) 2024 Freya Murphy */ + +#ifndef __MIPS32_H__ +#define __MIPS32_H__ + +#include +#include +#include + +/* all mips registers $0-$31 */ +enum mips32_register { + MIPS32_REG_ZERO = 0, + MIPS32_REG_AT = 1, + MIPS32_REG_V0 = 2, + MIPS32_REG_V1 = 3, + MIPS32_REG_A0 = 4, + MIPS32_REG_A1 = 5, + MIPS32_REG_A2 = 6, + MIPS32_REG_A3 = 7, + MIPS32_REG_T0 = 8, + MIPS32_REG_T1 = 9, + MIPS32_REG_T2 = 10, + MIPS32_REG_T3 = 11, + MIPS32_REG_T4 = 12, + MIPS32_REG_T5 = 13, + MIPS32_REG_T6 = 14, + MIPS32_REG_T7 = 15, + MIPS32_REG_S0 = 16, + MIPS32_REG_S1 = 17, + MIPS32_REG_S2 = 18, + MIPS32_REG_S3 = 19, + MIPS32_REG_S4 = 20, + MIPS32_REG_S5 = 21, + MIPS32_REG_S6 = 22, + MIPS32_REG_S7 = 23, + MIPS32_REG_T8 = 24, + MIPS32_REG_T9 = 25, + MIPS32_REG_K0 = 26, + MIPS32_REG_K1 = 27, + MIPS32_REG_GP = 28, + MIPS32_REG_SP = 29, + MIPS32_REG_FP = 30, + MIPS32_REG_RA = 31, +}; + +/* mips instruction */ +union mips32_instruction { + /* raw ins */ + uint32_t raw : 32; + /* register type */ + struct { + uint32_t funct : 6; + uint32_t shamt : 5; + uint32_t rd : 5; + uint32_t rt : 5; + uint32_t rs : 5; + uint32_t op : 6; + }; + /* immediate type */ + struct { + uint32_t immd : 16; + uint32_t : 16; + }; + /* jump type */ + struct { + uint32_t target : 26; + uint32_t : 6; + }; + /* branch compact */ + struct { + int32_t offs26 : 26; + uint32_t : 6; + }; + /* branch */ + struct { + int32_t offset : 16; + uint32_t bfunct : 5; + uint32_t : 11; + }; +} __attribute__((packed)); + +/// grammer syntax: +/// +/// ... the grammer takes entries parsed from the instruction, +/// and updates the instructions with values based on the type +/// of entry. i.e. immd would require a immd in the next argument, +/// and update the low 16bits of the instruction. +/// +/// GRAMMER -> ENTRIES +/// GRAMMER -> ε +/// ENTRIES -> ENTRIES, ENTRY +/// ENTRY -> rd // i.e. $at +/// ENTRY -> rs +/// ENTRY -> rt +/// ENTRY -> immd // i.e. 0x80 +/// ENTRY -> offset // i.e. main (16bits) +/// ENTRY -> offest(base) // i.e. 4($sp) +/// ENTRY -> target // i.e. main (28bits shifted) +/// +/// // grammer entries are always defined onto themselves... meaning the +/// // name of their type directly corresponds to the mips field in the +/// // instruction +/// +/// pseudo grammer syntax: +/// +/// ... psuedo entries represents what values should be placed where +/// in each of the pseudo instructions. psuedo grammer is extended such +/// that hardcoded values can be returned. i.e. setting rt=$at +/// +/// GRAMMER -> ENTRIES +/// GRAMMER -> ε +/// ENTREIS -> ENTRIES, ENTRYSET +/// ENTRYSET -> ENTRY | SET +/// SET -> ENTRY = +/// ENTRY -> // i.e. any valid entry from grammer synax +/// ENTRY -> hi // high 16bits of into +/// ENTRY -> lo // low 16bits of into + +/* mips grammer */ +struct mips32_grammer { + // the name of the ins + char *name; + // the grammer of the ins + char *grammer; + // the index of the ins (if real) + int enum_index; + + // for pseudo instructions only + int pseudo_len; + struct mips32__pseudo_grammer { + // what instruction is this + // part in the pseudo instruction + int enum_index; + // what parts of the instruction + // to update with values from + // grammer + char *update; + + } pseudo_grammer[MAX_ARG_LENGTH]; +}; + +#endif /* __MIPS32_H__ */ diff --git a/include/mips32r6.h b/include/mips32r6.h new file mode 100644 index 0000000..c2aad2d --- /dev/null +++ b/include/mips32r6.h @@ -0,0 +1,158 @@ +/* Copyright (c) 2024 Freya Murphy */ + +#ifndef __MIPS32R6_H__ +#define __MIPS32R6_H__ + +#include +#include +#include + +/* mips instructions */ +enum mips32r6_instruction_type { + MIPS32R6_INS_ADD, + MIPS32R6_INS_ADDI, + MIPS32R6_INS_ADDIU, + MIPS32R6_INS_ADDU, + MIPS32R6_INS_AND, + MIPS32R6_INS_ANDI, + MIPS32R6_INS_BAL, + MIPS32R6_INS_BALC, + MIPS32R6_INS_BC, + MIPS32R6_INS_BEQ, + MIPS32R6_INS_BGEZ, + MIPS32R6_INS_BGEZAL, + MIPS32R6_INS_BGTZ, + MIPS32R6_INS_BLEZ, + MIPS32R6_INS_BLTZ, + MIPS32R6_INS_BLTZAL, + MIPS32R6_INS_BNE, + MIPS32R6_INS_DIV, + MIPS32R6_INS_MOD, + MIPS32R6_INS_DIVU, + MIPS32R6_INS_MODU, + MIPS32R6_INS_J, + MIPS32R6_INS_JAL, + MIPS32R6_INS_JALR, + MIPS32R6_INS_JALX, + MIPS32R6_INS_JR, + MIPS32R6_INS_LB, + MIPS32R6_INS_LBU, + MIPS32R6_INS_LH, + MIPS32R6_INS_LHU, + MIPS32R6_INS_LUI, + MIPS32R6_INS_LW, + MIPS32R6_INS_MUL, + MIPS32R6_INS_MUH, + MIPS32R6_INS_MULU, + MIPS32R6_INS_MUHU, + MIPS32R6_INS_SB, + MIPS32R6_INS_SH, + MIPS32R6_INS_SW, + MIPS32R6_INS_SLL, + MIPS32R6_INS_SLLV, + MIPS32R6_INS_SLT, + MIPS32R6_INS_SLTI, + MIPS32R6_INS_SLTIU, + MIPS32R6_INS_SLTU, + MIPS32R6_INS_SRA, + MIPS32R6_INS_SRAV, + MIPS32R6_INS_SRL, + MIPS32R6_INS_SRLV, + MIPS32R6_INS_SUB, + MIPS32R6_INS_SUBU, + MIPS32R6_INS_SYSCALL, + MIPS32R6_INS_OR, + MIPS32R6_INS_ORI, + MIPS32R6_INS_NOR, + MIPS32R6_INS_XOR, + MIPS32R6_INS_XORI, + __MIPS32R6_INS_NULL, +}; + +#define MIPS32R6_OP_SPECIAL 0b000000 +#define MIPS32R6_OP_ADDI 0b001000 +#define MIPS32R6_OP_ADDIU 0b001001 +#define MIPS32R6_OP_ANDI 0b001100 +#define MIPS32R6_OP_REGIMM 0b000001 +#define MIPS32R6_OP_BALC 0b111010 +#define MIPS32R6_OP_BC 0b110010 +#define MIPS32R6_OP_BEQ 0b000100 +#define MIPS32R6_OP_BEQL 0b010100 +#define MIPS32R6_OP_BGTZ 0b000111 +#define MIPS32R6_OP_BGTZL 0b010111 +#define MIPS32R6_OP_BLEZ 0b000110 +#define MIPS32R6_OP_BLEZL 0b010110 +#define MIPS32R6_OP_BNE 0b000101 +#define MIPS32R6_OP_BNEL 0b010101 +#define MIPS32R6_OP_J 0b000010 +#define MIPS32R6_OP_JAL 0b000011 +#define MIPS32R6_OP_JALX 0b011101 +#define MIPS32R6_OP_LB 0b100000 +#define MIPS32R6_OP_LBU 0b100100 +#define MIPS32R6_OP_LH 0b100001 +#define MIPS32R6_OP_LHU 0b100101 +#define MIPS32R6_OP_LUI 0b001111 +#define MIPS32R6_OP_LW 0b100011 +#define MIPS32R6_OP_SB 0b101000 +#define MIPS32R6_OP_SH 0b101001 +#define MIPS32R6_OP_SW 0b101011 +#define MIPS32R6_OP_SLTI 0b001010 +#define MIPS32R6_OP_SLTIU 0b001011 +#define MIPS32R6_OP_ORI 0b001101 +#define MIPS32R6_OP_XORI 0b001110 + +#define MIPS32R6_FUNCT_ADD 0b100000 +#define MIPS32R6_FUNCT_ADDU 0b100001 +#define MIPS32R6_FUNCT_AND 0b100100 +#define MIPS32R6_FUNCT_SOP32 0b011010 +#define MIPS32R6_FUNCT_SOP33 0b011011 +#define MIPS32R6_FUNCT_JALR 0b001001 +#define MIPS32R6_FUNCT_JR 0b001000 +#define MIPS32R6_FUNCT_MFHI 0b010000 +#define MIPS32R6_FUNCT_MFLO 0b010010 +#define MIPS32R6_FUNCT_MTHI 0b010001 +#define MIPS32R6_FUNCT_MTLO 0b010011 +#define MIPS32R6_FUNCT_SOP30 0b011000 +#define MIPS32R6_FUNCT_SOP31 0b011001 +#define MIPS32R6_FUNCT_SLL 0b000000 +#define MIPS32R6_FUNCT_SLLV 0b000100 +#define MIPS32R6_FUNCT_SLT 0b101010 +#define MIPS32R6_FUNCT_SLTU 0b101011 +#define MIPS32R6_FUNCT_SRA 0b000011 +#define MIPS32R6_FUNCT_SRAV 0b000111 +#define MIPS32R6_FUNCT_SRL 0b000010 +#define MIPS32R6_FUNCT_SRLV 0b000110 +#define MIPS32R6_FUNCT_SUB 0b100010 +#define MIPS32R6_FUNCT_SUBU 0b100011 +#define MIPS32R6_FUNCT_SYSCALL 0b001100 +#define MIPS32R6_FUNCT_OR 0b100101 +#define MIPS32R6_FUNCT_NOR 0b100111 +#define MIPS32R6_FUNCT_XOR 0b100110 + +#define MIPS32R6_FUNCT_BAL 0b10001 +#define MIPS32R6_FUNCT_BGEZ 0b00001 +#define MIPS32R6_FUNCT_BGEZAL 0b10001 +#define MIPS32R6_FUNCT_BGEZALL 0b10011 +#define MIPS32R6_FUNCT_BGEZL 0b00011 +#define MIPS32R6_FUNCT_BLTZ 0b00000 +#define MIPS32R6_FUNCT_BLTZAL 0b10000 +#define MIPS32R6_FUNCT_BLTZALL 0b10010 +#define MIPS32R6_FUNCT_BLTZL 0b00010 + +#define MIPS32R6_SOP30_MUL 0b00010 +#define MIPS32R6_SOP30_MUH 0b00011 +#define MIPS32R6_SOP31_MULU 0b00010 +#define MIPS32R6_SOP31_MUHU 0b00011 +#define MIPS32R6_SOP32_DIV 0b00010 +#define MIPS32R6_SOP32_MOD 0b00011 +#define MIPS32R6_SOP33_DIVU 0b00010 +#define MIPS32R6_SOP33_MODU 0b00011 + +#define __MIPS32R6_INS_LEN (__MIPS32R6_INS_NULL) +#define __MIPS32R6_PSEUDO_LEN (4) +#define __MIPS32R6_GRAMMER_LEN (__MIPS32R6_INS_LEN + __MIPS32R6_PSEUDO_LEN) + +extern struct mips32_grammer mips32r6_grammers[__MIPS32R6_GRAMMER_LEN]; +extern union mips32_instruction mips32r6_instructions[__MIPS32R6_INS_LEN]; + +#endif /* __MIPS32R6_H__ */ diff --git a/lib/mips32r6.c b/lib/mips32r6.c new file mode 100644 index 0000000..1f1fe61 --- /dev/null +++ b/lib/mips32r6.c @@ -0,0 +1,286 @@ +#include + +#define RTYPE "rd,rs,rt" +#define ITYPE "rt,rs,immd" +#define JTYPE "target" +#define LOAD "rt,offset(base)" +#define SHIFT "rd,rt,sa" +#define SHIFTV "rd,rt,rs" +#define BRANCH "rs,rt,offset" +#define BRANCHZ "rs,offset" + +#define INS(name, grammer) {#name, grammer, MIPS32R6_INS_ ##name, \ + /* pseudo stub */ 0, {{0, ""}}} + +#define PSEUDO(name, grammer, ...) {name, grammer, __MIPS32R6_INS_NULL, \ + __VA_ARGS__ } + +struct mips32_grammer mips32r6_grammers[__MIPS32R6_GRAMMER_LEN] = { + + // real instructions + + INS(ADD, RTYPE), + INS(ADDI, ITYPE), + INS(ADDIU, ITYPE), + INS(ADDU, RTYPE), + INS(AND, RTYPE), + INS(ADDI, ITYPE), + INS(ANDI, ITYPE), + INS(BAL, "offset"), + INS(BALC, "target"), + INS(BC, "target"), + INS(BEQ, BRANCH), + INS(BGEZ, BRANCHZ), + INS(BGEZAL, BRANCHZ), + INS(BGTZ, BRANCHZ), + INS(BLEZ, BRANCHZ), + INS(BLTZ, BRANCHZ), + INS(BLTZAL, BRANCHZ), + INS(BNE, BRANCH), + INS(DIV, RTYPE), + INS(MOD, RTYPE), + INS(DIVU, RTYPE), + INS(MODU, RTYPE), + INS(J, JTYPE), + INS(JAL, JTYPE), + INS(JALR, "rs"), + INS(JR, "rs"), + INS(LB, LOAD), + INS(LBU, LOAD), + INS(LH, LOAD), + INS(LHU, LOAD), + INS(LUI, "rt,immd"), + INS(LW, LOAD), + INS(MUL, RTYPE), + INS(MUH, RTYPE), + INS(MULU, RTYPE), + INS(MUHU, RTYPE), + INS(SB, LOAD), + INS(SH, LOAD), + INS(SW, LOAD), + INS(SLL, SHIFT), + INS(SLLV, SHIFTV), + INS(SLT, RTYPE), + INS(SLTI, ITYPE), + INS(SLTIU, ITYPE), + INS(SLTU, RTYPE), + INS(SRA, SHIFT), + INS(SRAV, SHIFTV), + INS(SRL, SHIFT), + INS(SRLV, SHIFT), + INS(SUB, RTYPE), + INS(SUBU, RTYPE), + INS(SYSCALL, ""), + INS(OR, RTYPE), + INS(ORI, ITYPE), + INS(NOR, RTYPE), + INS(XOR, RTYPE), + INS(XORI, ITYPE), + + // pseudo instructions + + PSEUDO("li", "rt,immd", 1, { + {MIPS32R6_INS_ADDI, "rt,immd"} + }), + + PSEUDO("la", "rt,target", 2, { + {MIPS32R6_INS_LUI, "rt=$at,hi"}, + {MIPS32R6_INS_ORI, "rt,rs=$at,lo"}, + }), + + PSEUDO("move", "rd,rs", 1, { + {MIPS32R6_INS_OR, "rd,rs"} + }), + + PSEUDO("nop", "", 1, { + {MIPS32R6_INS_SLL, ""}, + }), +}; + +#define MIPS_INS(ins, ...) \ + [MIPS32R6_INS_ ##ins] = { __VA_ARGS__ }, + +union mips32_instruction mips32r6_instructions[__MIPS32R6_INS_LEN] = { +/* ADD - add */ +MIPS_INS(ADD, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_ADD) + +/* ADDI - add immediate */ +MIPS_INS(ADDI, .op = MIPS32R6_OP_ADDI) + +/* ADDIU - add immediate unsigned */ +MIPS_INS(ADDIU, .op = MIPS32R6_OP_ADDIU) + +/* ADDU - add unsigned */ +MIPS_INS(ADDU, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_ADDU) + +/* AND - and */ +MIPS_INS(AND, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_AND) + +/* ANDI - and immediate */ +MIPS_INS(ANDI, .op = MIPS32R6_OP_ANDI) + +/* BAL - branch and link */ +MIPS_INS(BAL, .op = MIPS32R6_OP_REGIMM, .bfunct = MIPS32R6_FUNCT_BAL) + +/* BALC - branch and link, compact */ +MIPS_INS(BALC, .op = MIPS32R6_OP_BALC) + +/* BC - branch, compact */ +MIPS_INS(BC, .op = MIPS32R6_OP_BC) + +/* BEQ - branch on equal */ +MIPS_INS(BEQ, .op = MIPS32R6_OP_BEQ) + +/* BGEZ - branch on greater than or equal to zero */ +MIPS_INS(BGEZ, .op = MIPS32R6_OP_REGIMM, .bfunct = MIPS32R6_FUNCT_BGEZ) + +/* BGEZAL - branch on greater than or equal to zero and link */ +MIPS_INS(BGEZAL, .op = MIPS32R6_OP_REGIMM, .bfunct = MIPS32R6_FUNCT_BGEZAL) + +/* BGTZ - branch on greater than zero */ +MIPS_INS(BGTZ, .op = MIPS32R6_OP_BGTZ) + +/* BLEZ - branch on less than or equal to zero */ +MIPS_INS(BLEZ, .op = MIPS32R6_OP_BLEZ) + +/* BLTZ - branch on less than zero */ +MIPS_INS(BLTZ, .op = MIPS32R6_OP_REGIMM, .bfunct = MIPS32R6_FUNCT_BLTZ) + +/* BLTZAL - branch on less than zero and link */ +MIPS_INS(BLTZAL, .op = MIPS32R6_OP_REGIMM, .bfunct = MIPS32R6_FUNCT_BLTZAL) + +/* BNE - branch on not equal */ +MIPS_INS(BNE, .op = MIPS32R6_OP_BNE) + +/* DIV - divide */ +MIPS_INS(DIV, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP32_DIV, + .funct = MIPS32R6_FUNCT_SOP32) + +/* MOD - modulo */ +MIPS_INS(MOD, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP32_MOD, + .funct = MIPS32R6_FUNCT_SOP32) + +/* DIVU - divide unsigned */ +MIPS_INS(DIVU, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP33_DIVU, + .funct = MIPS32R6_FUNCT_SOP33) + +/* MODU - modulo unsigned */ +MIPS_INS(MODU, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP33_MODU, + .funct = MIPS32R6_FUNCT_SOP33) + +/* J - jump */ +MIPS_INS(J, .op = MIPS32R6_OP_J) + +/* JAL - jump and link */ +MIPS_INS(JAL, .op = MIPS32R6_OP_JAL) + +/* JALR - jump and link register */ +MIPS_INS(JALR, .rd = MIPS32_REG_RA, .op = MIPS32R6_OP_SPECIAL, + .funct = MIPS32R6_FUNCT_JALR) + +/* JALX - jump and link exchange */ +MIPS_INS(JALX, .op = MIPS32R6_OP_JALX) + +/* JR - jump register */ +MIPS_INS(JR, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_JR) + +/* LB - load byte */ +MIPS_INS(LB, .op = MIPS32R6_OP_LB) + +/* LBU - load byte unsigned */ +MIPS_INS(LBU, .op = MIPS32R6_OP_LBU) + +/* LH - load half */ +MIPS_INS(LH, .op = MIPS32R6_OP_LH) + +/* LHU - load half unsigned */ +MIPS_INS(LHU, .op = MIPS32R6_OP_LHU) + +/* LUI - load upper immediate */ +MIPS_INS(LUI, .op = MIPS32R6_OP_LUI) + +/* LW - load word */ +MIPS_INS(LW, .op = MIPS32R6_OP_LW) + +/* MUL - multiply low word */ +MIPS_INS(MUL, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP30_MUL, + .funct = MIPS32R6_FUNCT_SOP30) + +/* MUH - multiply high word */ +MIPS_INS(MUH, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP30_MUH, + .funct = MIPS32R6_FUNCT_SOP30) + +/* MULU - multiply low word unsigned */ +MIPS_INS(MULU, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP31_MULU, + .funct = MIPS32R6_FUNCT_SOP31) + +/* MUHU - multiply high word unsgined */ +MIPS_INS(MUHU, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP31_MUHU, + .funct = MIPS32R6_FUNCT_SOP31) + +/* SB - store byte */ +MIPS_INS(SB, .op = MIPS32R6_OP_SB) + +/* SH - store half */ +MIPS_INS(SH, .op = MIPS32R6_OP_SH) + +/* SW - store word */ +MIPS_INS(SW, .op = MIPS32R6_OP_SW) + +/* SLL - shift left logical */ +MIPS_INS(SLL, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SLL) + +/* SLLV - shift left logical variable */ +MIPS_INS(SLLV, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SLLV) + +/* SLT - set less then */ +MIPS_INS(SLT, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SLT) + +/* SLTI - set less then immediate */ +MIPS_INS(SLTI, .op = MIPS32R6_OP_SLTI) + +/* SLTIU - set less then imemdiate unsigned */ +MIPS_INS(SLTIU, .op = MIPS32R6_OP_SLTIU) + +/* SLTU - set less than unsigned */ +MIPS_INS(SLTU, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SLTU) + +/* SRA - shift right arithmetic */ +MIPS_INS(SRA, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SRA) + +/* SRAV - shift right arithmetic variable */ +MIPS_INS(SRAV, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SRAV) + +/* SRL - shift right logical */ +MIPS_INS(SRL, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SRL) + +/* SRLV - shift right logical variable */ +MIPS_INS(SRLV, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SRLV) + +/* SUB - subtract */ +MIPS_INS(SUB, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SUB) + +/* SUBU - subtract unsigned */ +MIPS_INS(SUBU, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SUBU) + +/* SYSCALL - syscall */ +MIPS_INS(SYSCALL, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SYSCALL) + +/* OR - or */ +MIPS_INS(OR, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_OR) + +/* ORI - or imemdiate */ +MIPS_INS(ORI, .op = MIPS32R6_OP_ORI) + +/* NOR - not or */ +MIPS_INS(NOR, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_NOR) + +/* XOR - exclusive or */ +MIPS_INS(XOR, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_XOR) + +/* XORI - exclusive or immediate */ +MIPS_INS(XORI, .op = MIPS32R6_OP_XORI) +}; + +#undef MIPS_INS + diff --git a/makefile.mk b/makefile.mk index 8135205..e004a64 100644 --- a/makefile.mk +++ b/makefile.mk @@ -3,7 +3,7 @@ CFLAGS += -std=gnu2x # add include directory -CFLAGS += -isystem ../include +CFLAGS += -isystem ../include -DPREFIX=$(PREFIX) INCLUDE += ../include # add lib directory @@ -13,7 +13,7 @@ H_SRC = $(shell find $(SRC) $(INCLUDE) -type f -name "*.h") C_SRC = $(shell find $(SRC) -type f -name "*.c") C_OBJ = $(patsubst %.c,$(BIN)/%.o,$(C_SRC)) -.PHONY: clean build run test +.PHONY: clean build run fuzz build: $(BIN)/$(OUT) @@ -24,8 +24,8 @@ clean: run: build $(BIN)/$(OUT) -test: - make -C ../test $(OUT) +fuzz: clean + make -C . build CC=afl-cc LD=afl-cc mkdir -p ../fuzz rm -fr ../fuzz/$(OUT) afl-fuzz -i ../test/$(OUT) -o ../fuzz -M $(OUT) -- $(BIN)/$(OUT) @@ diff --git a/masm/asm.c b/masm/asm.c index 328ae16..619f9e5 100644 --- a/masm/asm.c +++ b/masm/asm.c @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -9,8 +8,9 @@ #include #include "asm.h" +#include "gen.h" #include "mlimits.h" -#include "parse.h" +#include "tab.h" extern char *current_file; @@ -19,325 +19,158 @@ extern char *current_file; #define SEC_ALIGN 0x1000 -static int create_symbol(struct assembler *assembler, - const char name[MAX_LEX_LENGTH], - ssize_t section_idx, - size_t section_offset, - unsigned char bind) +static int elf_rel_type(enum reference_type ty) { + switch (ty) { + case REF_NONE: + return R_MIPS_NONE; + case REF_MIPS_16: + return R_MIPS_16; + case REF_MIPS_26: + return R_MIPS_26; + case REF_MIPS_PC16: + return R_MIPS_PC16; + case REF_MIPS_LO16: + return R_MIPS_LO16; + case REF_MIPS_HI16: + return R_MIPS_HI16; + } + + return R_MIPS_NONE; +} + +static int elf_section_init_reltab(struct section *sec, + struct elf_section *elf_sec) { - size_t str_off; - if (strtab_write_str(&assembler->strtab, name, &str_off)) + Elf32_Rel *reltab = malloc(sizeof(Elf32_Rel) * + sec->reftab.len); + + if (reltab == NULL) { + PERROR("cannot alloc"); return M_ERROR; + } + for (uint32_t i = 0; i < sec->reftab.len; i++) { + Elf32_Rel *rel = &reltab[i]; + struct reference *ref = &sec->reftab.references[i]; + rel->r_offset = B32(ref->offset); + int sym = ref->symbol->tabidx + 1; + int type = elf_rel_type(ref->type); + rel->r_info = B32(ELF32_R_INFO(sym, type)); + } - Elf32_Sym symbol = { - .st_name = B32(str_off), - .st_value = B32(section_offset), - .st_size = 0, - .st_info = ELF32_ST_INFO(bind, STT_NOTYPE), - .st_other = ELF32_ST_VISIBILITY(STV_DEFAULT), - .st_shndx = B16(section_idx), - }; + elf_sec->reltab_len = sec->reftab.len; + elf_sec->reltab = reltab; - // dont put magic flag values inside symbol, only real indexes - if (section_idx < 0) - symbol.st_shndx = 0; + return M_SUCCESS; +} - if (symtab_push(&assembler->symtab, symbol, section_idx)) +static int elf_section_init(struct section *sec, struct elf_section *elf_sec) +{ + elf_sec->data = sec; + elf_sec->shdr_idx = 0; // dont know yet + elf_sec->reltab_shidx = 0; // dont know yet + elf_sec->reltab_len = sec->reftab.len; + elf_sec->reltab = NULL; + + if (sec->reftab.len && elf_section_init_reltab(sec, elf_sec)) return M_ERROR; return M_SUCCESS; } -static int find_symbol_or_stub(struct assembler *assembler, - const char name[MAX_LEX_LENGTH], - Elf32_Sym **res, - size_t *res2) +/* free an elf section */ +static void elf_section_free(struct elf_section *sec) { - if (symtab_find(&assembler->symtab, res, res2, name) == M_SUCCESS) - return M_SUCCESS; + if (sec->reltab != NULL) + free(sec->reltab); +} - if (create_symbol(assembler, name, SYMSEC_STUB, 0, STB_LOCAL)) +static int asm_init_sections(struct assembler *assembler) +{ + struct section *sections = assembler->gen.sections; + uint32_t len = assembler->gen.sections_len; + + struct elf_section *elftab = malloc(sizeof(struct elf_section) * len); + if (elftab == NULL) { + PERROR("cannot alloc"); return M_ERROR; + } - size_t idx = assembler->symtab.len - 1; - - if (res != NULL) - *res = &assembler->symtab.symbols[idx]; - if (res2 != NULL) - *res2 = idx; + for (uint32_t i = 0; i < len; i++) { + struct elf_section *elfsec = &elftab[i]; + elfsec->data = §ions[i]; + if (elf_section_init(§ions[i], elfsec)) { + free(elftab); + return M_ERROR; + } + } + assembler->sections = elftab; + assembler->section_len = len; return M_SUCCESS; } -static int handle_directive(struct assembler *assembler, - struct mips_directive *directive) -{ - switch (directive->type) { - case MIPS_DIRECTIVE_SECTION: { - struct section_table *sec_tbl = &assembler->sectab; - struct section *sec; - if (sectab_get(sec_tbl, &sec, directive->name) - == M_SUCCESS) { - sec_tbl->current = sec; - break; - } - - if (sectab_alloc(sec_tbl, &sec, directive->name)) - return M_ERROR; - - sec_tbl->current = sec; - break; +static int elf_sym_bind(enum symbol_type ty) { + switch (ty) { + case SYM_LOCAL: + return STB_LOCAL; + case SYM_GLOBAL: + return STB_GLOBAL; + case SYM_EXTERN: + return STB_GLOBAL; } - case MIPS_DIRECTIVE_ALIGN: { - assembler->sectab.current->alignment = - 1 << directive->align; - if (assembler->sectab.current->alignment == 0) { - ERROR("cannot align to zero"); - return M_ERROR; - } - break; - } - - case MIPS_DIRECTIVE_SPACE: { - struct section_entry entry; - entry.type = ENT_NO_DATA; - entry.size = directive->space; - if (sec_push(assembler->sectab.current, entry)) - return M_ERROR; - break; - } - - case MIPS_DIRECTIVE_WORD: { - for (uint32_t i = 0; i < directive->len; i++) { - struct section_entry entry; - entry.type = ENT_WORD; - entry.word = directive->words[i]; - entry.size = sizeof(uint32_t); - if (sec_push(assembler->sectab.current, - entry)) - return M_ERROR; - } - break; - } - - case MIPS_DIRECTIVE_HALF: { - for (uint32_t i = 0; i < directive->len; i++) { - struct section_entry entry; - entry.type = ENT_HALF; - entry.half = directive->halfs[i]; - entry.size = sizeof(uint16_t); - if (sec_push(assembler->sectab.current, - entry)) - return M_ERROR; - } - break; - } - - case MIPS_DIRECTIVE_BYTE: { - for (uint32_t i = 0; i < directive->len; i++) { - struct section_entry entry; - entry.type = ENT_BYTE; - entry.byte = directive->bytes[i]; - entry.size = sizeof(uint8_t); - if (sec_push(assembler->sectab.current, - entry)) - return M_ERROR; - } - break; - } - - case MIPS_DIRECTIVE_EXTERN: { - if (symtab_find(&assembler->symtab, NULL, NULL, - directive->name) == M_SUCCESS) { - ERROR("cannot extern local symbol '%s'", - directive->name); - return M_ERROR; - } - - if (create_symbol(assembler, directive->name, SYMSEC_EXTERN, 0, - STB_GLOBAL)) - return M_ERROR; - - break; - } - - case MIPS_DIRECTIVE_GLOBL: { - Elf32_Sym *sym; - if (symtab_find(&assembler->symtab, &sym, NULL, - directive->name) == M_SUCCESS) { - sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_NOTYPE); - break; - } - - if (create_symbol(assembler, directive->name, SYMSEC_STUB, 0, - STB_GLOBAL)) - return M_ERROR; - - break; - } - - case MIPS_DIRECTIVE_ASCII: { - struct section_entry entry; - entry.type = ENT_STR; - entry.size = strlen(directive->name); - memcpy(entry.str, directive->name, entry.size); - if (sec_push(assembler->sectab.current, entry)) - return M_ERROR; - break; - } - - case MIPS_DIRECTIVE_ASCIIZ: { - struct section_entry entry; - entry.type = ENT_STR; - entry.size = strlen(directive->name) + 1; - memcpy(entry.str, directive->name, entry.size); - if (sec_push(assembler->sectab.current, entry)) - return M_ERROR; - break; - } - } - - return M_SUCCESS; + return STB_GLOBAL; } -static int handle_label(struct assembler *assembler, - const char name[MAX_LEX_LENGTH]) -{ - struct section *cur = assembler->sectab.current; - - Elf32_Sym *ref; - size_t symidx; - - if (symtab_find(&assembler->symtab, &ref, &symidx, name) == M_SUCCESS) { - ssize_t *sec = &assembler->symtab.sections[symidx]; - - // check if the symbol is acutally jus a stub, if so - // we need to update it - if (*sec == SYMSEC_STUB) { - *sec = cur->index; - ref->st_value = B32(sec_size(cur)); - return M_SUCCESS; - } - - ERROR("redefined symbol '%s'", name); - return M_ERROR; +static int asm_init_symtab(struct assembler *assembler) { + struct symbol_table *symtab = &assembler->gen.symtab; + size_t len = symtab->len + 1; + Elf32_Sym *elftab = malloc(sizeof(Elf32_Sym) * len); + if (elftab == NULL) { + PERROR("cannot alloc"); } - if (create_symbol(assembler, name, cur->index, sec_size(cur), - STB_LOCAL)) - return M_ERROR; + // add null entry + elftab[0] = (Elf32_Sym) {0}; - return M_SUCCESS; -} + // add rest of the entries + for (uint32_t i = 0; i < symtab->len; i++) { + struct symbol *sym = &symtab->symbols[i]; + int bind = elf_sym_bind(sym->type); + int type = STT_NOTYPE; -static int handle_ins(struct assembler *assembler, - struct ins_expr *expr) -{ - struct section *sec = assembler->sectab.current; - size_t secidx = sec->len; - - for (size_t i = 0; i < expr->ins_len; i++) { - union mips_instruction_data *ins = - &expr->ins[i].data; - struct reference *ref = - &expr->ref[i]; - struct section_entry entry; - - entry.type = ENT_INS; - entry.size = sizeof(union mips_instruction_data); - entry.ins = B32(ins->raw); - - if (sec_push(sec, entry)) + // get name + size_t str_off; + if (strtab_write_str(&assembler->strtab, sym->name.str, + &str_off)) { + free(elftab); return M_ERROR; + } - if (ref->type == R_MIPS_NONE) - continue; - - size_t symidx; - if (find_symbol_or_stub(assembler, ref->name, NULL, &symidx)) - return M_ERROR; - - Elf32_Rela rel = { - .r_info = B32(ELF32_R_INFO(symidx, ref->type)), - .r_addend = B32(ref->addend), - .r_offset = B32(sec_index(sec, secidx + i)), + elftab[i+1] = (Elf32_Sym) { + .st_name = B32(str_off), + .st_info = ELF32_ST_INFO(bind, type), + .st_size = 0, + .st_other = 0, + .st_value = B32(sym->offset), + .st_shndx = 0, }; - - if (reltab_push(&sec->reltab, rel)) - return M_ERROR; } + assembler->symbols = elftab; + assembler->symtab_len = len; + return M_SUCCESS; } static int parse_file(struct assembler *assembler) { - struct parser *parser = &assembler->parser; - - while (1) { - struct expr expr; - int res = parser_next(parser, &expr); - - if (res == M_ERROR) - return M_ERROR; - - if (res == M_EOF) - return M_SUCCESS; - - switch (expr.type) { - case EXPR_INS: - if (handle_ins(assembler, &expr.ins)) - return M_ERROR; - break; - case EXPR_DIRECTIVE: - if (handle_directive(assembler, - &expr.directive)) - return M_ERROR; - break; - - case EXPR_LABEL: - if (handle_label(assembler, expr.label)) - return M_ERROR; - break; - - case EXPR_CONSTANT: - break; - } - } - - return M_SUCCESS; -} - -static int assemble_phdr(struct assembler *assembler, Elf32_Phdr **res, - uint32_t *res2) -{ - Elf32_Phdr *phdr = malloc(sizeof(Elf32_Phdr) * - assembler->sectab.len); - if (phdr == NULL) { - PERROR("cannot alloc"); - return M_ERROR;; - } - - for (uint32_t i = 0; i < assembler->sectab.len; i++) { - Elf32_Phdr *hdr = &phdr[i]; - struct section *sec = &assembler->sectab.sections[i]; - size_t size = sec_size(sec); - hdr->p_type = B32(PT_LOAD); - hdr->p_flags = B32( - (sec->execute << 0) | - (sec->write << 1) | - (sec->read << 2)); - hdr->p_offset = 0; - hdr->p_vaddr = 0; - hdr->p_paddr = 0; - hdr->p_filesz = B32(size); - hdr->p_memsz = B32(size); - hdr->p_align = B32(SEC_ALIGN); - } - - *res = phdr; - *res2 = assembler->sectab.len; + if (generate_mips32r6(&assembler->gen)) + return M_ERROR; + if (asm_init_sections(assembler)) + return M_ERROR; + if (asm_init_symtab(assembler)) + return M_ERROR; return M_SUCCESS; } @@ -349,8 +182,8 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res, max_entries += 1; // symtab max_entries += 1; // strtab max_entries += 1; // shtrtab - max_entries += assembler->sectab.len; // sections - max_entries += assembler->sectab.len; // reltabs per section + max_entries += assembler->section_len; // sections + max_entries += assembler->section_len; // reltabs per section Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * max_entries); @@ -366,16 +199,17 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res, shdr[count++] = (Elf32_Shdr) {0}; // reltables - for (uint32_t i = 0; i < assembler->sectab.len; i++) { - struct section *sec = &assembler->sectab.sections[i]; - const char *prefix = ".reltab."; - char reltab_name[MAX_LEX_LENGTH + 8]; + for (uint32_t i = 0; i < assembler->section_len; i++) { + struct elf_section *sec = &assembler->sections[i]; + const char *prefix = ".reltab"; + char reltab_name[MAX_LEX_LENGTH + strlen(prefix)]; - if (sec->reltab.len == 0) + if (sec->reltab_len == 0) continue; strcpy(reltab_name, prefix); - strcat(reltab_name, sec->name); + strncat(reltab_name, sec->data->name.str, + MAX_LEX_LENGTH - strlen(prefix)); if (strtab_write_str(&assembler->shstrtab, reltab_name, &str_off)) { @@ -386,7 +220,7 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res, sec->reltab_shidx = count; shdr[count++] = (Elf32_Shdr) { .sh_name = B32(str_off), - .sh_type = B32(SHT_RELA), + .sh_type = B32(SHT_REL), .sh_flags = 0, .sh_addr = 0, .sh_offset = 0, @@ -394,41 +228,41 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res, .sh_link = 0, .sh_info = 0, .sh_addralign = B32(1), - .sh_entsize = B32(sizeof(Elf32_Rela)), + .sh_entsize = B32(sizeof(Elf32_Rel)), }; } // for each section - for (uint32_t i = 0; i < assembler->sectab.len; i++) { - struct section *sec = &assembler->sectab.sections[i]; - char name[MAX_LEX_LENGTH+1] = "."; + for (uint32_t i = 0; i < assembler->section_len; i++) { + struct elf_section *sec = &assembler->sections[i]; + const char *name = sec->data->name.str; - strcat(name, sec->name); if (strtab_write_str(&assembler->shstrtab, name, &str_off)) { free(shdr); return M_ERROR; } sec->shdr_idx = count; - if (sec->reltab.len != 0) + if (sec->reltab_len != 0) shdr[sec->reltab_shidx].sh_info = B32(count); - shdr[count++] = (Elf32_Shdr){ - .sh_name = B32(str_off), - .sh_type = B32(SHT_PROGBITS), - .sh_flags = B32( - (sec->write << 0) | - (sec->execute << 2) | + shdr[count++] = (Elf32_Shdr){ + .sh_name = B32(str_off), + .sh_type = B32(sec->data->execute ? + SHT_PROGBITS : SHT_NOBITS), + .sh_flags = B32( + (sec->data->write << 0) | + (sec->data->execute << 2) | SHF_ALLOC), - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = B32(sec->alignment), - .sh_entsize = 0, - }; - } + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = B32(SEC_ALIGN), + .sh_entsize = 0, + }; + } // symbol table if (strtab_write_str(&assembler->shstrtab, ".symtab", &str_off)) { @@ -490,9 +324,9 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res, .sh_entsize = 0, }; - for (uint32_t i = 0; i < assembler->sectab.len; i++) { - struct section *sec = &assembler->sectab.sections[i]; - if (sec->reltab.len == 0) + for (uint32_t i = 0; i < assembler->section_len; i++) { + struct elf_section *sec = &assembler->sections[i]; + if (sec->reltab_len == 0) continue; shdr[sec->reltab_shidx].sh_link = B32(assembler->symtab_shidx); @@ -507,61 +341,53 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res, static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr) { Elf32_Shdr *shdr = (Elf32_Shdr *) assembler->shdr; - Elf32_Phdr *phdr = (Elf32_Phdr *) assembler->phdr; uint32_t ptr = 0; // we must now correct offets and sizes inside the ehdr, phdr, // and shdr ptr += sizeof(Elf32_Ehdr); - // phdr - ehdr->e_phoff = B32(ptr); - ptr += assembler->phdr_len * sizeof(Elf32_Phdr); - // reltbls - for (uint32_t i = 0; i < assembler->sectab.len; i++) { - struct section *sec = &assembler->sectab.sections[i]; - if (sec->reltab.len == 0) + for (uint32_t i = 0; i < assembler->section_len; i++) { + struct elf_section *sec = &assembler->sections[i]; + if (sec->reltab_len == 0) continue; int idx = sec->reltab_shidx; - int len = sec->reltab.len; + int len = sec->reltab_len; shdr[idx].sh_offset = B32(ptr); - shdr[idx].sh_size = B32(len * sizeof(Elf32_Rela)); - ptr += len * sizeof(Elf32_Rela); - } - - // section padding - { - uint32_t mod = ptr % SEC_ALIGN; - if (mod != 0) - assembler->secalign = (SEC_ALIGN - mod); - else - assembler->secalign = 0; - ptr += assembler->secalign; + shdr[idx].sh_size = B32(len * sizeof(Elf32_Rel)); + ptr += len * sizeof(Elf32_Rel); } // sections size_t v_addr = 0; - for (uint32_t i = 0; i < assembler->sectab.len; i++) { - struct section *sec = &assembler->sectab.sections[i]; + for (uint32_t i = 0; i < assembler->section_len; i++) { + + size_t pad = v_addr % SEC_ALIGN; + if (pad) + pad = SEC_ALIGN - pad; + v_addr += pad; + + struct elf_section *sec = &assembler->sections[i]; uint32_t idx = sec->shdr_idx; - uint32_t size = ntohl(phdr[i].p_filesz); - phdr[i].p_offset = B32(ptr); - phdr[i].p_vaddr = B32(v_addr); - phdr[i].p_paddr = B32(v_addr); + uint32_t size = sec->data->len; shdr[idx].sh_offset = B32(ptr); - shdr[idx].sh_size = phdr[i].p_filesz; - shdr[idx].sh_addr = phdr[i].p_vaddr; + shdr[idx].sh_size = B32(size); + shdr[idx].sh_addr = B32(v_addr); v_addr += size; ptr += size; } // symtab - shdr[assembler->symtab_shidx].sh_offset = B32(ptr); - shdr[assembler->symtab_shidx].sh_link = B32(assembler->strtab_shidx); - shdr[assembler->symtab_shidx].sh_size = - B32(assembler->symtab.len * sizeof(Elf32_Sym)); - ptr += assembler->symtab.len * sizeof(Elf32_Sym); + { + uint32_t len = assembler->symtab_len; + uint32_t size = len * sizeof(Elf32_Sym); + shdr[assembler->symtab_shidx].sh_offset = B32(ptr); + shdr[assembler->symtab_shidx].sh_link = + B32(assembler->strtab_shidx); + shdr[assembler->symtab_shidx].sh_size = B32(size); + ptr += size; + } // strtab shdr[assembler->strtab_shidx].sh_offset = B32(ptr); @@ -573,24 +399,10 @@ static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr) shdr[assembler->shstrtab_shidx].sh_size = B32(assembler->shstrtab.size); ptr += assembler->shstrtab.size; - // shdr ehdr->e_shoff = B32(ptr); } -static void update_sym_shindx(struct assembler *assembler) -{ - for (size_t i = 0; i < assembler->symtab.len; i++) { - Elf32_Sym *sym = &assembler->symtab.symbols[i]; - ssize_t sec = assembler->symtab.sections[i]; - - if (sec >= 0) { - sym->st_shndx = B16(assembler-> - sectab.sections[sec].shdr_idx); - } - } -} - static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr, const char *path) { @@ -605,80 +417,70 @@ static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr, // ehdr fwrite(ehdr, sizeof(Elf32_Ehdr), 1, out); - // phdr - fwrite(assembler->phdr, sizeof(Elf32_Phdr), assembler->phdr_len, out); - // reltbls - for (uint32_t i = 0; i < assembler->sectab.len; i++) { - struct section *sec = &assembler->sectab.sections[i]; - if (sec->reltab.len == 0) + for (uint32_t i = 0; i < assembler->section_len; i++) { + struct elf_section *sec = &assembler->sections[i]; + void *ptr = sec->reltab; + int len = sec->reltab_len; + if (len < 1) continue; - void *ptr = sec->reltab.data; - int len = sec->reltab.len; - fwrite(ptr, sizeof(Elf32_Rela), len, out); - } - - // section padding - for (uint32_t i = 0; i < assembler->secalign; i++) { - uint8_t zero = 0; - fwrite(&zero, 1, 1, out); + fwrite(ptr, sizeof(Elf32_Rel), len, out); } // sections - for (uint32_t i = 0; i < assembler->sectab.len; i++) { - struct section *sec = &assembler->sectab.sections[i]; - for (uint32_t j = 0; j < sec->len; j++) { - struct section_entry *entry = &sec->entries[j]; - size_t size = entry->size; - size_t zeros = size % sec->alignment;; - if (entry->type != ENT_NO_DATA) - fwrite(&entry->data, size, 1, out); - else - zeros += size; - while(zeros) { - fputc(0, out); - zeros--; - } - } + for (uint32_t i = 0; i < assembler->section_len; i++) { + struct elf_section *sec = &assembler->sections[i]; + void *ptr = sec->data->data; + size_t size = sec->data->len; + fwrite(ptr, 1, size, out); } // sym tbl - fwrite(assembler->symtab.symbols, sizeof(Elf32_Sym), - assembler->symtab.len, out); + fwrite(assembler->symbols, sizeof(Elf32_Sym), assembler->symtab_len, + out); // str tbl - fwrite(assembler->strtab.ptr, assembler->strtab.size, 1, out); + fwrite(assembler->strtab.ptr, 1, assembler->strtab.size, out); // shstr tbl - fwrite(assembler->shstrtab.ptr, assembler->shstrtab.size, 1, out); + fwrite(assembler->shstrtab.ptr, 1, assembler->shstrtab.size, out); // shdr fwrite(assembler->shdr, sizeof(Elf32_Shdr), assembler->shdr_len, out); + // close fclose(out); return M_SUCCESS; } +static void update_sym_shndx(struct assembler *assembler) +{ + for (uint32_t i = 1; i < assembler->symtab_len; i++) { + Elf32_Sym *esym = &assembler->symbols[i]; + struct symbol *sym = &assembler->gen.symtab.symbols[i - 1]; + + // get shindx + int shindx = 0; + if (sym->secidx != SYM_SEC_STUB) + shindx = assembler->sections[sym->secidx].shdr_idx; + else if (sym->type == SYM_EXTERN) + shindx = 0; + + esym->st_shndx = B16(shindx); + } +} + static int assemble_elf(struct assembler *assembler, const char *out) { - if (assemble_phdr(assembler, (Elf32_Phdr **) &assembler->phdr, - &assembler->phdr_len)) { + if (assemble_shdr(assembler, &assembler->shdr, &assembler->shdr_len)) return M_ERROR; - } - - if (assemble_shdr(assembler, (Elf32_Shdr **) &assembler->shdr, - &assembler->shdr_len)) { - return M_ERROR; - }; Elf32_Ehdr ehdr = MIPS_ELF_EHDR; - ehdr.e_phnum = B16(assembler->phdr_len); ehdr.e_shnum = B16(assembler->shdr_len); ehdr.e_shstrndx = B16(assembler->shstrtab_shidx); - update_offsets(assembler, &ehdr); - update_sym_shindx(assembler); + update_sym_shndx(assembler); if (write_file(assembler, &ehdr, out)) return M_ERROR; @@ -709,10 +511,16 @@ int assemble_file(struct assembler_arguments args) int assembler_init(struct assembler *assembler, const char *path) { - if (lexer_init(path, &assembler->lexer)) - return M_ERROR; + assembler->shdr = NULL; + assembler->symbols = NULL; + assembler->sections = NULL; + assembler->strtab.ptr = NULL; + assembler->shstrtab.ptr = NULL; + assembler->gen.sections = NULL; + assembler->gen.symtab.symbols = NULL; + assembler->section_len = 0; - if (parser_init(&assembler->lexer, &assembler->parser)) + if (generator_init(path, &assembler->gen)) return M_ERROR; if (strtab_init(&assembler->shstrtab)) @@ -721,31 +529,22 @@ int assembler_init(struct assembler *assembler, const char *path) if (strtab_init(&assembler->strtab)) return M_ERROR; - if (symtab_init(&assembler->symtab)) - return M_ERROR; - - if (sectab_init(&assembler->sectab)) - return M_ERROR; - - assembler->symtab.strtab = &assembler->strtab; - assembler->phdr = NULL; - assembler->shdr = NULL; - return M_SUCCESS; } void assembler_free(struct assembler *assembler) { - if (assembler->phdr) - free(assembler->phdr); if (assembler->shdr) free(assembler->shdr); + if (assembler->symbols) + free(assembler->symbols); + if (assembler->sections) { + for (uint32_t i = 0; i < assembler->section_len; i++) + elf_section_free(&assembler->sections[i]); + free(assembler->sections); + } - sectab_free(&assembler->sectab); - symtab_free(&assembler->symtab); strtab_free(&assembler->strtab); strtab_free(&assembler->shstrtab); - - parser_free(&assembler->parser); - lexer_free(&assembler->lexer); + generator_free(&assembler->gen); } diff --git a/masm/asm.h b/masm/asm.h index 1162164..fecd335 100644 --- a/masm/asm.h +++ b/masm/asm.h @@ -3,19 +3,15 @@ #ifndef __ASM_H__ #define __ASM_H__ -#include #include -#include -#include "mlimits.h" -#include "parse.h" -#include "lex.h" +#include "gen.h" /// /// ELF string table /// -struct str_table { +struct elf_str_table { // size of the ptr in bytes size_t size; @@ -25,199 +21,60 @@ struct str_table { }; /* initalize a string table */ -int strtab_init(struct str_table *strtab); +int strtab_init(struct elf_str_table *strtab); /* free a string table */ -void strtab_free(struct str_table *strtab); +void strtab_free(struct elf_str_table *strtab); /* get a string form the string table */ -int strtab_get_str(struct str_table *strtab, const char *str, size_t *res); +int strtab_get_str(struct elf_str_table *strtab, const char *str, size_t *res); /* get or append a string into the string table */ -int strtab_write_str(struct str_table *strtab, const char *str, size_t *res); - +int strtab_write_str(struct elf_str_table *strtab, const char *str, size_t *res); /// -/// ELF symbol table -/// - -struct symbol_table { - // length in size in sym ammt - size_t len; - size_t size; - - // the Elf symbols - Elf32_Sym *symbols; - - // keeps track of what section each ELF symbol is in - // *!!this is NOT the section header index in the ELF ehdr!!* - ssize_t *sections; - - // symbols reference a string table that acutally - // holds the strings - // - // *weak* ptr, we do not own this!!! - struct str_table *strtab; - -}; - -/* initalize a symbol table */ -int symtab_init(struct symbol_table *symtab); - -/* free the symbol table */ -void symtab_free(struct symbol_table *symtab); - -/* add a symbol to the symbol tbl */ -int symtab_push(struct symbol_table *symtab, const Elf32_Sym sym, - ssize_t sec_idx); - -/* find a symbol by name in the symbol table */ -int symtab_find(struct symbol_table *symtab, Elf32_Sym **sym, size_t *idx, - const char name[MAX_LEX_LENGTH]); - -/// -/// ELF relocation table -/// - -struct relocation_table { - size_t len; - size_t size; - Elf32_Rela *data; -}; - -/* initalize a relocation table */ -int reltab_init(struct relocation_table *reltab); - -/* free the relocation table */ -void reltab_free(struct relocation_table *reltab); - -/* add a entry to the relocation table */ -int reltab_push(struct relocation_table *reltab, const Elf32_Rela rel); - -/// -/// section entry -/// - -enum section_entry_type { - ENT_INS, - ENT_WORD, - ENT_HALF, - ENT_BYTE, - ENT_STR, - ENT_NO_DATA, -}; - -/* holds a entry inside the section, i.e. a instruction, raw data, - * special directives */ -struct section_entry { - size_t size; - enum section_entry_type type; - - union { - // to get memory address - char data; - - // data - uint32_t ins; - char str[MAX_LEX_LENGTH]; - int32_t word; - int16_t half; - int8_t byte; - }; -}; - -/// -/// section +/// elf section /// /* holds a section of the asm file (i.e. .text, .bss, .data) */ -struct section { - // length and size of amount of entries - size_t len; - size_t size; - struct section_entry *entries; - - // section name - char name[MAX_LEX_LENGTH]; +struct elf_section { + // section data *weak* pointer + struct section *data; // index of the section in - // all the sections - size_t index; - - // index of the sectio in // the ELF shdr size_t shdr_idx; - // ELF section data - bool read; - bool write; - bool execute; - uint16_t alignment; - - // ELF tables + // relocation table size_t reltab_shidx; - struct relocation_table reltab; + uint32_t reltab_len; + Elf32_Rel *reltab; }; -/* get the size of the section in bytes */ -size_t sec_size(struct section *section); - -/* get the index of a entry in bytes */ -size_t sec_index(struct section *section, size_t index); - -/* add a section entry to the section */ -int sec_push(struct section *section, struct section_entry entry); - -/* holds eachs section */ -struct section_table { - // length and size of amount of sections - size_t len; - size_t size; - struct section *sections; - - // the current section - struct section *current; -}; - -/* initalize the section table */ -int sectab_init(struct section_table *sec_tbl); - -/* free the section table */ -void sectab_free(struct section_table *sec_tbl); - -/* create a new section in the section table */ -int sectab_alloc(struct section_table *sec_tbl, struct section **sec, - const char name[MAX_LEX_LENGTH]); - -/* get a section by name from the section table */ -int sectab_get(struct section_table *sec_tbl, struct section **sec, - const char name[MAX_LEX_LENGTH]); - /// /// assembler /// struct assembler { - // the token lexer - struct lexer lexer; - // the expression parser - struct parser parser; + // the code generator + struct generator gen; - /// ELF tables + /// symbol table size_t symtab_shidx; - struct symbol_table symtab; + size_t symtab_len; + Elf32_Sym *symbols; + + // sh string table size_t strtab_shidx; - struct str_table strtab; + struct elf_str_table strtab; + + // string table size_t shstrtab_shidx; - struct str_table shstrtab; + struct elf_str_table shstrtab; - /// Segments - struct section_table sectab; - uint32_t secalign; // align sections to 0x1000 when writing - - /// program header - Elf32_Phdr *phdr; - uint32_t phdr_len; + /// sections + uint32_t section_len; + struct elf_section *sections; /// section header Elf32_Shdr *shdr; diff --git a/masm/gen.c b/masm/gen.c new file mode 100644 index 0000000..13d2848 --- /dev/null +++ b/masm/gen.c @@ -0,0 +1,812 @@ +#include +#include +#include +#include +#include + +#include "tab.h" +#include "gen.h" +#include "parse.h" + +/// +/// section table +/// + +static void section_get_default_perm(struct section *sec, const char *name) +{ + #define __LEN 7 + static const struct perms { + char *name; + bool read; + bool write; + bool execute; + int alignment; + } defaults[__LEN] = { + {".text", true, false, true, 4}, + {".code", true, false, true, 4}, + {".data", true, true, false, 1}, + {".stack", true, true, false, 1}, + {".rodata", true, false, false, 1}, + {".bss", true, true, false, 1}, + {".robss", true, false, false, 1}, + }; + + for (int i = 0; i < __LEN; i++) { + const struct perms *p = &defaults[i]; + if (strcasecmp(name, p->name) != 0) + continue; + sec->read = p->read; + sec->write = p->write; + sec->execute = p->execute; + sec->align = p->alignment; + break; + } + +} + +static int section_get(struct generator *gen, struct section **res, + const struct string *const name) +{ + /// find the section if it exists + for (size_t i = 0; i < gen->sections_len; i++) { + struct section *sec = &gen->sections[i]; + if (sec->name.len != name->len) + continue; + if (strcmp(sec->name.str, name->str) != 0) + continue; + *res = sec; + return M_SUCCESS; + } + + /// allocate a new one if it doesnt + size_t size = gen->sections_size ? gen->sections_size * 2 : 8; + void *new = realloc(gen->sections, size * sizeof(struct section)); + if (new == NULL) { + PERROR("cannot realloc"); + return M_ERROR; + } + + gen->sections_size = size; + gen->sections = new; + + struct section *sec = &gen->sections[gen->sections_len++]; + + // alloc reftab + if (reftab_init(&sec->reftab)) + return M_ERROR; + + // copy name + if (string_clone(&sec->name, name)) + return M_ERROR; + + // set defaults + sec->len = 0; + sec->size = 0; + sec->align = 1; + sec->data = NULL; + sec->read = true; + sec->write = true; + sec->execute = false; + section_get_default_perm(sec, name->str); + + *res = sec; + return M_SUCCESS; +} + +static int section_extend(struct section *section, size_t space) +{ + size_t newlen = section->len + space; + if (newlen < section->size) + return M_SUCCESS; + + size_t size = section->size ? section->size * 2 + newlen : newlen * 2; + void *new = realloc(section->data, size); + if (new == NULL) { + PERROR("cannot realloc"); + return M_ERROR; + } + section->size = size; + section->data = new; + + return M_SUCCESS; +} + +static int section_push(struct section *section, void *data, size_t len) +{ + size_t newlen = section->len + len; + size_t zeros = newlen % section->align; + if (zeros) + zeros = section->align - zeros; + + if (section_extend(section, len + zeros)) + return M_ERROR; + + memset(section->data + section->len, 0, zeros); + memcpy(section->data + section->len + zeros, data, len); + section->len += len + zeros; + + return M_SUCCESS; +} + +static int section_zero(struct section *section, size_t len) +{ + size_t zeros = section->len % section->align; + if (zeros) + zeros = section->align - zeros; + + if (section_extend(section, len + zeros)) + return M_ERROR; + + memset(section->data + section->len, 0, len + zeros); + section->len += len + zeros; + + return M_SUCCESS; +} + +void section_free(struct section *section) +{ + reftab_free(§ion->reftab); + string_free(§ion->name); + free(section->data); +} + +/// +/// generation functions +/// + +static void print_curr_line(struct generator *gen, + const struct expr *const expr) +{ + int line = expr->line_no, + len = expr->byte_end - expr->byte_start, + nl = true, + c = EOF; + FILE *file = gen->parser.lexer.file; + + fseek(file, expr->byte_start, SEEK_SET); + + while (len--) { + c = getc(file); + if (c == EOF || c == '\0') + break; + if (nl) { + fprintf(stderr, "\t%d | ", line); + line++; + nl = false; + } + if (c == '\n') + nl = true; + putc(c, stderr); + } + +} + +static int gen_directive_whb(struct generator *gen, const void *data, + uint32_t count, uint32_t len) +{ + // TODO: endianess + for (uint32_t i = 0; i < count; i++) { + void *ptr = (char *) data + (len * i); + if (section_push(gen->current, ptr, len)) + return M_ERROR; + } + + return M_SUCCESS; +} + +static int gen_directive(struct generator *gen, + const struct expr *const e) +{ + const struct expr_directive *const expr = &e->directive; + int res = M_SUCCESS; + + switch (expr->type) { + case EXPR_DIRECTIVE_ALIGN: + if (expr->align < 1) { + ERROR("alignment cannot be zero"); + print_curr_line(gen, e); + return M_ERROR; + } + gen->current->align = expr->align; + break; + case EXPR_DIRECTIVE_SPACE: + res = section_zero(gen->current, expr->space); + break; + case EXPR_DIRECTIVE_WORD: + res = gen_directive_whb(gen, expr->words, expr->len, + sizeof(uint32_t)); + break; + case EXPR_DIRECTIVE_HALF: + res = gen_directive_whb(gen, expr->halfs, expr->len, + sizeof(uint16_t)); + break; + case EXPR_DIRECTIVE_BYTE: + res = gen_directive_whb(gen, expr->bytes, expr->len, + sizeof(uint8_t)); + break; + case EXPR_DIRECTIVE_SECTION: + res = section_get(gen, &gen->current, &expr->section); + break; + case EXPR_DIRECTIVE_EXTERN: { + struct symbol *sym; + res = symtab_find_or_stub(&gen->symtab, &sym, &expr->label); + if (res == M_SUCCESS) + sym->type = SYM_EXTERN; + break; + } + case EXPR_DIRECTIVE_GLOBL: { + struct symbol *sym; + res = symtab_find_or_stub(&gen->symtab, &sym, &expr->label); + if (res == M_SUCCESS) + sym->type = SYM_GLOBAL; + break; + } + case EXPR_DIRECTIVE_ASCII: + res = section_push(gen->current, expr->string.str, + expr->string.len - 1); + break; + case EXPR_DIRECTIVE_ASCIIZ: + res = section_push(gen->current, expr->string.str, + expr->string.len); + break; + } + + return res; +} + +static int gen_constant(struct generator *gen, struct expr_const *const expr) +{ + (void) gen; + (void) expr; + + ERROR("constants not yet implemented"); + return M_ERROR; +} + +static enum grammer_type get_gmr_type(const char *name, size_t *len) +{ + #define CHK(part, str) { \ + if (strncasecmp(str, name, strlen(str)) == 0) { \ + *len = strlen(str); \ + return GMR_ ##part; \ + }} \ + + CHK(RD, "rd") + CHK(RS, "rs") + CHK(RT, "rt") + CHK(IMMD, "immd") + CHK(OFFSET_BASE, "offset(base)") + CHK(OFFSET, "offset") + CHK(TARGET, "target") + CHK(HI, "hi") + CHK(LO, "lo") + + #undef CHK + + ERROR("!!! BUG: this should never hit !!!"); + exit(1); +} + +static int parse_register(enum mips32_register *reg, struct string *name) +{ + int len = name->len; + int c0 = len > 0 ? name->str[0] : '\0', + c1 = len > 1 ? name->str[1] : '\0', + c2 = len > 2 ? name->str[2] : '\0', + c3 = len > 3 ? name->str[3] : '\0'; + + // $zero + if (c0 == 'z') { + if (c1 == 'e' && c2 == 'r' && c3 == 'o') { + *reg = MIPS32_REG_ZERO; + return M_SUCCESS; + } + } + + // $a0-a3 $at + else if (c0 == 'a') { + if (c1 == 't') { + *reg = MIPS32_REG_AT; + return M_SUCCESS; + } + if (c1 >= '0' && c1 <= '3') { + *reg = MIPS32_REG_A0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $v0-v1 + else if (c0 == 'v') { + if (c1 >= '0' && c1 <= '1') { + *reg = MIPS32_REG_V0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $t0-t9 + else if (c0 == 't') { + if (c1 >= '0' && c1 <= '7') { + *reg = MIPS32_REG_T0; + *reg += c1 - '0'; + return M_SUCCESS; + } + // reg T8-T9 are not in order with T0-T7 + if (c1 >= '8' && c1 <= '9') { + *reg = MIPS32_REG_T8; + *reg += c1 - '8'; + return M_SUCCESS; + } + } + + // $s0-s7 $sp + else if (c0 == 's') { + if (c1 >= '0' && c1 <= '7') { + *reg = MIPS32_REG_S0; + *reg += c1 - '0'; + return M_SUCCESS; + } + if (c1 == 'p') { + *reg = MIPS32_REG_SP; + return M_SUCCESS; + } + } + + // $k0-k1 + else if (c0 == 'k') { + if (c1 >= '0' && c1 <= '1') { + *reg = MIPS32_REG_K0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $gp + else if (c0 == 'g') { + if (c1 == 'p') { + *reg = MIPS32_REG_GP; + return M_SUCCESS; + } + } + + // $fp + else if (c0 == 'f') { + if (c1 == 'p') { + *reg = MIPS32_REG_FP; + return M_SUCCESS; + } + } + + // $rp + else if (c0 == 'r') { + if (c1 == 'a') { + *reg = MIPS32_REG_RA; + return M_SUCCESS; + } + } + + // $0-31 (non aliased register names) + else if (c0 >= '0' && c0 <= '9') { + int i = c0 - '0'; + if (c1 >= '0' && c1 <= '9') { + i *= 10; + i += c1 - '0'; + } + if (i <= 31) { + *reg = i; + return M_SUCCESS; + } + } + + ERROR("unknown register $%.*s", name->len, name->str); + return M_ERROR; +} + +static int gen_ins_read_state(struct generator *gen, + struct expr *const expr, + struct gen_ins_state *state, + struct mips32_grammer *grammer) +{ + char *ptr = grammer->grammer; + uint32_t argi = 0; + + // read values into state + while (*ptr != '\0') { + + if (argi >= expr->instruction.args_len) { + ERROR("not enough arguments passed"); + print_curr_line(gen, expr); + return M_ERROR; + } + struct expr_ins_arg *arg = &expr->instruction.args[argi++]; + + size_t skip; + switch (get_gmr_type(ptr, &skip)) { + case GMR_RD: + // rd + if (arg->type != EXPR_INS_ARG_REGISTER) { + ERROR("expected a register"); + print_curr_line(gen, expr); + return M_ERROR; + } + if (parse_register(&state->rd, &arg->reg)) { + print_curr_line(gen, expr); + return M_ERROR; + } + break; + case GMR_RS: + // rs + if (arg->type != EXPR_INS_ARG_REGISTER) { + ERROR("expected a register"); + print_curr_line(gen, expr); + return M_ERROR; + } + if (parse_register(&state->rs, &arg->reg)) { + print_curr_line(gen, expr); + return M_ERROR; + } + break; + case GMR_RT: + // rt + if (arg->type != EXPR_INS_ARG_REGISTER) { + ERROR("expected a register"); + print_curr_line(gen, expr); + return M_ERROR; + } + if (parse_register(&state->rt, &arg->reg)) { + print_curr_line(gen, expr); + return M_ERROR; + } + break; + case GMR_IMMD: + // immd + if (arg->type != EXPR_INS_ARG_IMMEDIATE) { + ERROR("expected an immediate"); + print_curr_line(gen, expr); + return M_ERROR; + } + state->immd = arg->immd; + break; + case GMR_OFFSET: + // offset + state->offset = 0; + if (arg->type == EXPR_INS_ARG_IMMEDIATE) + state->offset = arg->immd; + else if (arg->type == EXPR_INS_ARG_LABEL) + state->label = &arg->label; + else { + ERROR("invalid instruction"); + print_curr_line(gen, expr); + return M_ERROR; + } + break; + case GMR_OFFSET_BASE: + // offset(base) + if (arg->type != EXPR_INS_ARG_OFFSET) { + ERROR("expected an offset($base)"); + print_curr_line(gen, expr); + return M_ERROR; + } + state->offset = arg->offset.immd; + if (parse_register(&state->base, &arg->offset.reg)) { + print_curr_line(gen, expr); + return M_ERROR; + } + break; + case GMR_TARGET: + // target + state->target = 0; + if (arg->type == EXPR_INS_ARG_IMMEDIATE) + state->target = arg->immd; + else if (arg->type == EXPR_INS_ARG_LABEL) + state->label = &arg->label; + else { + ERROR("invalid instruction"); + print_curr_line(gen, expr); + return M_ERROR; + } + break; + default: + break; + } + + // skip entry + ptr += skip; + + // skip comma + if (*ptr == ',') { + ptr++; + continue; + } else if (*ptr == '\0') { + break; + } else { + ERROR("!! BUG3: invalid splitting char %c !!!", *ptr); + exit(1); + } + + } + + return M_SUCCESS; +} + +static int gen_ins_write_state( + struct generator *gen, + union mips32_instruction ins, // the instruction to modify + struct gen_ins_state *state, // the current read state + char *grammer) // the gramemr to parse +{ + char *ptr = grammer; + enum reference_type reftype = REF_NONE; + + // read values into state + while (*ptr != '\0') { + + // parse next dsl entry + size_t skip; + enum grammer_type gmr = get_gmr_type(ptr, &skip); + + // check for dsl hardcoded register argument + bool hardcoded = false; + enum mips32_register hard_reg; + if (*(ptr + skip) == '=') { + // parse argument + char *rptr = ptr + skip + 2; + hardcoded = true; + struct string regname; + string_bss(®name, rptr); + if (parse_register(&hard_reg, ®name)) { + ERROR("!!! BUG2: this should never hit !!!"); + exit(1); + } + } + + // skip till next comma + for (;*ptr != '\0' && *ptr != ','; ptr++); + if (*ptr == ',') + ptr++; + + switch (gmr) { + case GMR_RD: + ins.rd = hardcoded ? hard_reg : state->rd; + break; + case GMR_RS: + ins.rs = hardcoded ? hard_reg : state->rs; + break; + case GMR_RT: + ins.rt = hardcoded ? hard_reg : state->rt; + break; + case GMR_IMMD: + ins.immd = state->immd; + break; + case GMR_OFFSET: + ins.offset = state->offset; + reftype = REF_MIPS_16; + break; + case GMR_OFFSET_BASE: + ins.offset = state->offset; + ins.rs = state->base; + reftype = REF_MIPS_16; + break; + case GMR_TARGET: + ins.target = state->target; + reftype = REF_MIPS_26; + break; + case GMR_HI: + ins.immd = state->target >> 16; + reftype = REF_MIPS_HI16; + break; + case GMR_LO: + ins.immd = state->target & 0x0000FFFF; + reftype = REF_MIPS_LO16; + break; + } + } + + // get offset for reference (if needed) + uint32_t offset = gen->current->len; + size_t zeros = offset % gen->current->align; + if (zeros) + zeros = gen->current->align - zeros; + offset += zeros; + + // write instructon to section + uint32_t raw = B32(ins.raw); + if (section_push(gen->current, &raw, sizeof(uint32_t))) { + return M_ERROR; + } + + // create reference (if needed) + if (reftype != REF_NONE && state->label != NULL) { + struct symbol *sym; + + if (symtab_find_or_stub(&gen->symtab, &sym, state->label)) + return M_ERROR; + + struct reference ref = { + .type = reftype, + .symbol = sym, + .offset = offset + }; + + if (reftab_push(&gen->current->reftab, &ref)) { + return M_ERROR; + } + } + + return M_SUCCESS; +} + +static int gen_ins(struct generator *gen, struct expr *const expr) +{ + struct mips32_grammer *grammer = NULL; + for (uint32_t i = 0; i < gen->grammers_len; i++) { + struct mips32_grammer *temp = &gen->grammers[i]; + if (strcasecmp(temp->name, expr->instruction.name.str) != 0) + continue; + grammer = temp; + break; + } + + if (grammer == NULL) { + ERROR("unknown instruction"); + print_curr_line(gen, expr); + return M_ERROR; + } + + struct gen_ins_state state; + state.label = NULL; + + // read in the values from the parser + if (gen_ins_read_state(gen, expr, &state, grammer)) + return M_ERROR; + + // write the values into the instructions + // ...and then the sections + if (grammer->pseudo_len > 0) { + // write pseudo + for (int i = 0; i < grammer->pseudo_len; i++) { + union mips32_instruction ins = gen->instructions[ + grammer->pseudo_grammer[i].enum_index]; + if (gen_ins_write_state(gen, ins, &state, + grammer->pseudo_grammer[i].update)) + return M_ERROR; + } + } else { + // write real + union mips32_instruction ins + = gen->instructions[grammer->enum_index]; + if (gen_ins_write_state(gen, ins, &state, grammer->grammer)) + return M_ERROR; + } + + return M_SUCCESS; +} + +static int gen_label(struct generator *gen, struct string *const label) +{ + uint32_t offset = gen->current->len; + ptrdiff_t secidx = gen->current - gen->sections; + size_t zeros = offset % gen->current->align; + if (zeros) + zeros = gen->current->align - zeros; + offset += zeros; + + struct symbol *sym; + /* update existing symbol (if exists) */ + if (symtab_find(&gen->symtab, &sym, label->str) == M_SUCCESS) { + if (sym->secidx != SYM_SEC_STUB) { + // symbols that are not labeled stub are fully defined, + // it is a error to redefine them + ERROR("redefined symbol '%s'", label->str); + return M_ERROR; + } + sym->secidx = secidx; + sym->offset = offset; + /* create a new symbol */ + } else { + struct symbol new = { + .secidx = secidx, + .offset = offset, + .type = SYM_LOCAL, + }; + if (string_clone(&new.name, label)) + return M_ERROR; + if (symtab_push(&gen->symtab, &new)) { + string_free(&new.name); + return M_ERROR; + } + } + + return M_SUCCESS; +} + +/* run codegen */ +static int generate(struct generator *gen) +{ + struct expr expr; + int res = M_SUCCESS; + + // get the next expression + if ((res = parser_next(&gen->parser, &expr))) + return res; + + // if its not a segment directive + // (and we dont have a section) + // create the default + if (( + expr.type != EXPR_DIRECTIVE || + expr.directive.type != EXPR_DIRECTIVE_SECTION) && + gen->current == NULL) { + // create .data section + struct string temp = { + .str = ".data", + .len = 5, + .size = 5, + .allocated = false + }; + if (section_get(gen, &gen->current, &temp)) { + expr_free(&expr); + return M_ERROR; + } + } + + res = M_SUCCESS; + switch (expr.type) { + case EXPR_DIRECTIVE: + res = gen_directive(gen, &expr); + break; + case EXPR_CONSTANT: + res = gen_constant(gen, &expr.constant); + break; + case EXPR_INS: + res = gen_ins(gen, &expr); + break; + case EXPR_LABEL: + res = gen_label(gen, &expr.label); + break; + } + + expr_free(&expr); + return res; +} + +/* run codegen with the mips32r6 specification */ +int generate_mips32r6(struct generator *gen) +{ + gen->instructions_len = __MIPS32R6_INS_LEN; + gen->instructions = mips32r6_instructions; + gen->grammers_len = __MIPS32R6_GRAMMER_LEN; + gen->grammers = mips32r6_grammers; + + int res; + while (res = generate(gen), 1) { + if (res == M_ERROR) + return M_ERROR; + if (res == M_EOF) + break; + } + + return M_SUCCESS; +} + +int generator_init(const char *file, struct generator *gen) +{ + if (parser_init(file, &gen->parser)) + return M_ERROR; + if (symtab_init(&gen->symtab)) + return M_ERROR; + gen->sections = NULL; + gen->sections_len = 0; + gen->sections_size = 0; + return M_SUCCESS; +} + +void generator_free(struct generator *gen) +{ + parser_free(&gen->parser); + symtab_free(&gen->symtab); + for (size_t i = 0; i < gen->sections_len; i++) + section_free(&gen->sections[i]); + free(gen->sections); +} diff --git a/masm/gen.h b/masm/gen.h new file mode 100644 index 0000000..19f575c --- /dev/null +++ b/masm/gen.h @@ -0,0 +1,118 @@ +/* Copyright (c) 2024 Freya Murphy */ + +#ifndef __GEN_H__ +#define __GEN_H__ + +#include +#include +#include + +#include "parse.h" +#include "tab.h" + +// predefine +struct generator; + +/// +/// a section +/// +struct section { + // name + struct string name; + + // alignment + size_t align; + + // data + char *data; + size_t len; + size_t size; + + // permissions + bool read; + bool write; + bool execute; + + /// reference table + struct reference_table reftab; +}; + +void section_free(struct section *section); + +/// +/// instruction generation state +/// + +struct gen_ins_state { + // rd,rst,rt + enum mips32_register rd; + enum mips32_register rs; + enum mips32_register rt; + + // immd + uint16_t immd; + + // offset(base) + uint16_t offset; + enum mips32_register base; + + // target + uint32_t target; + + // current referencd label + struct string *label; +}; + +/// +/// grammer type +/// + +enum grammer_type { + GMR_RD, + GMR_RS, + GMR_RT, + GMR_IMMD, + GMR_OFFSET, + GMR_OFFSET_BASE, + GMR_TARGET, + GMR_HI, + GMR_LO, +}; + +/// +/// generates assembley +/// from a parser stream +/// +struct generator { + struct parser parser; + + // current instruction table + size_t instructions_len; + union mips32_instruction *instructions; + + // current grammer table + size_t grammers_len; + struct mips32_grammer *grammers; + + // segments + size_t sections_len; + size_t sections_size; + struct section *sections; + + // current section + struct section *current; + + // symbol table + struct symbol_table symtab; +}; + +/* generate the input as mips32r6 */ +int generate_mips32r6(struct generator *gen); + +/* initalize a generator */ +int generator_init(const char *file, struct generator *gen); + +/* free a generator */ +void generator_free(struct generator *gen); + +#endif /* __GEN_H__ */ diff --git a/masm/lex.c b/masm/lex.c index a7707d6..b835a7f 100644 --- a/masm/lex.c +++ b/masm/lex.c @@ -2,6 +2,10 @@ #include #include +#include +#include +#include +#include static struct { int x; @@ -46,64 +50,24 @@ static void skip_comment(struct lexer *lexer) } } -/* lexes text until whitespace - * returns error on zero length or too long */ -static int lex_ident(struct lexer *lexer, char text[MAX_LEX_LENGTH]) -{ - int len = 0; - char *ptr = text; - int c; - - while (1) { - c = lex_peek(lexer); - if (!( - (c >= 'a' && c <= 'z') || - (c >= 'A' && c <= 'Z') || - (c >= '0' && c <= '9') || - (c == '_') - )) { - break; - } - - // pop char out of lexer - lex_next(lexer); - - if (len + 1 == MAX_LEX_LENGTH) { - ERROR_POS(pos, "ident has max length of %d", - MAX_LEX_LENGTH); - return M_ERROR; - } - - *ptr++ = c; - len++; - } - - if (len == 0) { - ERROR_POS(pos, "attempted to lex empty ident %d", - MAX_LEX_LENGTH); - return M_ERROR; - } - - *ptr = '\0'; - return M_SUCCESS; -} - /* lexes a string until closing quote * returns error if string is too long or hit newline */ -static int lex_string(struct lexer *lexer,char text[MAX_LEX_LENGTH]) +static int lex_string(struct lexer *lexer, struct string *string) { - int len = 0; - char *ptr = text; - int c; + char c; + string_init(string); while (1) { c = lex_next(lexer); + + // stop on ending quote if (c == '"') break; // strings cannot span multiple lines if (c == '\n') { ERROR_POS(pos, "reached newline before end of string"); + string_free(string); return M_ERROR; } @@ -129,20 +93,73 @@ static int lex_string(struct lexer *lexer,char text[MAX_LEX_LENGTH]) } } - if (len + 1 == MAX_LEX_LENGTH) { - ERROR_POS(pos, "string has max length of %d", - MAX_LEX_LENGTH); + // push char into string + if (string_push(string, c)) { + string_free(string); return M_ERROR; } - - *ptr++ = c; - len++; } - *ptr = '\0'; + // null terminate string + if (string_push(string, '\0')) { + free(string->str); + return M_ERROR; + } + return M_SUCCESS; } +/* lexes text until whitespace + * returns error on zero length or too long */ +static int lex_ident(struct lexer *lexer, struct string *string, + char prefix) +{ + char c; + string_init(string); + + if (prefix != '\0' && string_push(string, prefix)) { + string_free(string); + return M_ERROR; + } + + while (1) { + c = lex_peek(lexer); + if (!( + (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + (c == '_') + )) { + break; + } + + // pop char out of lexer + lex_next(lexer); + + // push char into string + if (string_push(string, c)) { + free(string->str); + return M_ERROR; + } + } + + // empty idents are not allowed + if (string->len < 1) { + string_free(string); + ERROR("empty ident tokens are not allowed"); + return M_ERROR; + } + + // null terminate string + if (string_push(string, '\0')) { + string_free(string); + return M_ERROR; + } + + return M_SUCCESS; +} + + /* lexes a integer number in base 2,8,10, or 16, * uses base 10 by default but chan be changed by 0b, 0o, and 0x */ static int lex_number(struct lexer *lexer, int64_t *n) @@ -221,6 +238,7 @@ int lexer_next(struct lexer *lexer, struct token *token) again: // use label to avoid whitespace recursion token->x = lexer->x; token->y = lexer->y; + token->off = ftell(lexer->file); pos.x = lexer->x; pos.y = lexer->y; token->type = TOK_EOF; @@ -231,54 +249,80 @@ again: // use label to avoid whitespace recursion switch (c) { case EOF: + + // return a EOF token case '\0': token->type = TOK_EOF; break; + + // skip the comment + // .. and return a NL token case ';': case '#': skip_comment(lexer); token->type = TOK_NL; break; + + // skip the whitespace and + // try to parse the next character case ' ': case '\t': // skip white space lex_next(lexer); goto again; + + // return a NL token case '\n': lex_next(lexer); token->type = TOK_NL; break; + + // return a comma token case ',': lex_next(lexer); token->type = TOK_COMMA; break; + + // return a equal token case '=': lex_next(lexer); token->type = TOK_EQUAL; break; + + // return a left paren token case '(': lex_next(lexer); token->type = TOK_LPAREN; break; + + // return a right paren token case ')': token->type = TOK_RPAREN; lex_next(lexer); break; + + // return a register token case '$': token->type = TOK_REG; lex_next(lexer); - res = lex_ident(lexer, token->text); + res = lex_ident(lexer, &token->string, '\0'); break; + + // return a directive token case '.': token->type = TOK_DIRECTIVE; lex_next(lexer); - res = lex_ident(lexer, token->text); + res = lex_ident(lexer, &token->string, '.'); break; + + // return a string token case '"': token->type = TOK_STRING; lex_next(lexer); - res = lex_string(lexer, token->text); + res = lex_string(lexer, &token->string); break; + + // return a number token case '-': case '0': case '1': @@ -293,68 +337,78 @@ again: // use label to avoid whitespace recursion token->type = TOK_NUMBER; res = lex_number(lexer, &token->number); break; + + // return a ident or label token depending + // if it ends with a colon default: token->type = TOK_IDENT; - res = lex_ident(lexer, token->text); + res = lex_ident(lexer, &token->string, '\0'); if (lex_peek(lexer) == ':') { lex_next(lexer); token->type = TOK_LABEL; } break; } + return res; } int lexer_init(const char *path, struct lexer *lexer) { - FILE *file = fopen(path, "r"); - if (file == NULL) { - PERROR("cannot read '%s'", path); - return M_ERROR; - } - lexer->file = file; + /// defaults + lexer->file = NULL; lexer->peek = EOF; lexer->x = 1; lexer->y = 1; + + /// load file + lexer->file = fopen(path, "r"); + if (lexer->file == NULL) { + PERROR("cannot read"); + return M_ERROR; + } + return M_SUCCESS; } -int lexer_free(struct lexer *lexer) +void lexer_free(struct lexer *lexer) { - return fclose(lexer->file); + if (lexer->file) + fclose(lexer->file); } char *token_str(enum token_type type) { switch (type) { - case TOK_IDENT: + case TOK_IDENT: return "ident"; - case TOK_REG: + case TOK_REG: return "register"; - case TOK_LABEL: + case TOK_LABEL: return "label"; - case TOK_STRING: + case TOK_STRING: return "string"; - case TOK_COMMA: + case TOK_COMMA: return "comma"; - case TOK_EQUAL: + case TOK_EQUAL: return "equal"; - case TOK_LPAREN: + case TOK_LPAREN: return "left parentheses"; - case TOK_RPAREN: + case TOK_RPAREN: return "right parentheses"; - case TOK_NUMBER: + case TOK_NUMBER: return "number"; - case TOK_EOF: + case TOK_EOF: return "end of file"; - case TOK_NL: + case TOK_NL: return "new line"; - case TOK_DIRECTIVE: + case TOK_DIRECTIVE: return "directive"; - } + } return "unknown"; } +/* save the current state from the lexer */ void lexer_save(struct lexer *lexer, struct lexer_state *state) { state->x = lexer->x; @@ -371,3 +425,18 @@ void lexer_load(struct lexer *lexer, const struct lexer_state *state) lexer->peek = state->peek; fseek(lexer->file, state->offset, SEEK_SET); } + +void token_free(struct token *token) +{ + switch (token->type) { + case TOK_REG: + case TOK_IDENT: + case TOK_LABEL: + case TOK_STRING: + case TOK_DIRECTIVE: + if (token->string.str) + free(token->string.str); + break; + default: + } +} diff --git a/masm/lex.h b/masm/lex.h index e08d0a3..8da6558 100644 --- a/masm/lex.h +++ b/masm/lex.h @@ -7,13 +7,86 @@ #include #include -struct lexer { - FILE *file; - int peek; - int x; - int y; +/// represents a non null +/// terminated string +struct string { + char *str; + uint32_t len; + uint32_t size; + bool allocated; }; +/* initalize a string */ +void string_init(struct string *string); +/* free a string */ +void string_free(struct string *string); +/* clone a string, leave the old one */ +int string_clone(struct string *dst, const struct string *const src); +/* move a string, delete the old one */ +void string_move(struct string *dst, struct string *src); +/* pushes a char onto a string */ +int string_push(struct string *string, char c); +/* load a string from the bss (not allocated) */ +void string_bss(struct string *string, char *src); + +enum token_type { + /// has no associated + /// data + TOK_COMMA, + TOK_EQUAL, + TOK_LPAREN, + TOK_RPAREN, + TOK_EOF, + TOK_NL, + + /// uses number + TOK_NUMBER, + + /// uses string + TOK_REG, + TOK_IDENT, + TOK_LABEL, + TOK_STRING, + TOK_DIRECTIVE, +}; + +/// represents a token +/// returned from the lexer +struct token { + /// type + enum token_type type; + + /// position + int x, y; + /// pos in bytes + int off; + + /// data + union { + int64_t number; + struct string string; + }; +}; + +/* frees a token*/ +void token_free(struct token *token); + +/// holds the data +/// for the current lexer +struct lexer { + // the currently + // open file + FILE *file; + + // the last character peeked + int peek; + + // the current position + int x, y; +}; + +/// holds a previous state of a +/// lexer, which allows rebounding struct lexer_state { long offset; int peek; @@ -21,36 +94,11 @@ struct lexer_state { int y; }; -enum token_type { - TOK_IDENT, - TOK_REG, - TOK_LABEL, - TOK_STRING, - TOK_COMMA, - TOK_EQUAL, - TOK_LPAREN, - TOK_RPAREN, - TOK_NUMBER, - TOK_EOF, - TOK_NL, - TOK_DIRECTIVE, -}; - -struct token { - enum token_type type; - union { - int64_t number; - char text[MAX_LEX_LENGTH]; - }; - int x; - int y; -}; - /* initalize a lexer */ int lexer_init(const char *file, struct lexer *lexer); -/* free the lxer */ -int lexer_free(struct lexer *lexer); +/* free the lexer */ +void lexer_free(struct lexer *lexer); /* lexes the next token, returns M_ERROR on error, * and TOK_EOF on EOF */ diff --git a/masm/out.o b/masm/out.o new file mode 100644 index 0000000..ab24e9b Binary files /dev/null and b/masm/out.o differ diff --git a/masm/parse.c b/masm/parse.c index dbe6ade..b36aa1e 100644 --- a/masm/parse.c +++ b/masm/parse.c @@ -1,1326 +1,533 @@ #include -#include #include -#include -#include -#include +#include +#include -#include "parse.h" #include "lex.h" -#include "mips.h" +#include "parse.h" -#define B16(x) (x) -#define B32(x) (x) +/// +/// Token Functions +/// either get a token, peek a token, +/// or assert a token was returned +/// +/* get the next token from the lexer */ static int next_token(struct parser *parser, struct token *tok) { + // return peeked first if (parser->peek.type != TOK_EOF) { if (tok != NULL) *tok = parser->peek; + else + token_free(&parser->peek); + parser->peek.type = TOK_EOF; return M_SUCCESS; } + + // get next token struct token token; - if (lexer_next(parser->lexer, &token)) + if (lexer_next(&parser->lexer, &token)) return M_ERROR; - if (tok != NULL) + + // return value if given pointer + // else free + if (tok != NULL) { *tok = token; + } else { + token_free(&token); + } + return M_SUCCESS; } - +/* peek the next token from the lexer */ static int peek_token(struct parser *parser, struct token *tok) { + // if we dont have a saved token + // get the next one if (parser->peek.type == TOK_EOF) { if (next_token(parser, &parser->peek)) return M_ERROR; } + + // return it if we were given + // a pointer if (tok != NULL) *tok = parser->peek; + return M_SUCCESS; } - +/* get the next token from the lexer, and assert its of type */ static int assert_token(struct parser *parser, enum token_type type, struct token *tok) { + // get next token struct token token; if (next_token(parser, &token)) return M_ERROR; + + // assert its of type if (token.type != type) { ERROR_POS(token, "expected a token of type '%s', got '%s'", token_str(type), token_str(token.type)); + token_free(&token); return M_ERROR; } - if (tok != NULL) + + // return value if given pointer + // else free + if (tok != NULL) { *tok = token; + } else { + token_free(&token); + } + return M_SUCCESS; } +/* get the next token from the lexer, and assert its of type NL */ static int assert_eol(struct parser *parser) { struct token token; if (next_token(parser, &token)) return M_ERROR; if (token.type != TOK_NL && token.type != TOK_EOF) { - ERROR_POS(token, "expected a new line or end of file"); + ERROR_POS(token, "expected a new line or end of file, got '%s'", + token_str(token.type)); return M_ERROR; } + token_free(&token); return M_SUCCESS; } -/* each instruction has a given parse format - * internal to the parser */ -enum mips_parse_format { - // register type: rs, rt, td - MIPS_PARSE_R, - // register type: rs, rt - MIPS_PARSE_R2, - // register type: rd - MIPS_PARSE_RD, - // register type: rs - MIPS_PARSE_RS, - // imeediate type: rs, rt, immd - MIPS_PARSE_I, - // jump type: offset - MIPS_PARSE_J, - // offset 16b type: offset - MIPS_PARSE_O16, - // offset 26b type: offset - MIPS_PARSE_O26, - // breanch equal type: rs, rt, offset - MIPS_PARSE_BE, - // branch zero type: rs, offset - MIPS_PARSE_BZ, - // store and load: rt, offset(base) - MIPS_PARSE_SL, - // store and load immediate: rt, immediate - MIPS_PARSE_SLI, - // shift: rd, rt, sa - MIPS_PARSE_S, - // shift variable: rd, rt, rs - MIPS_PARSE_SV, - // none: - MIPS_PARSE_NONE, -}; - -#define FORMAT(ins, format) \ - [MIPS_INS_##ins] = MIPS_PARSE_##format, \ - -const enum mips_parse_format mips_parse_formats[] = { - FORMAT(ADD, R) - FORMAT(ADDI, I) - FORMAT(ADDIU, I) - FORMAT(ADDU, R) - FORMAT(AND, R) - FORMAT(ANDI, I) - FORMAT(BAL, O16) - FORMAT(BALC, O26) - FORMAT(BC, O26) - FORMAT(BEQ, BE) - FORMAT(BEQL, BE) - FORMAT(BGEZ, BZ) - FORMAT(BGEZAL, BZ) - FORMAT(BGEZALL, BZ) - FORMAT(BGEZL, BZ) - FORMAT(BGTZ, BZ) - FORMAT(BGTZL, BZ) - FORMAT(BLEZ, BZ) - FORMAT(BLEZL, BZ) - FORMAT(BLTZ, BZ) - FORMAT(BLTZAL, BZ) - FORMAT(BLTZALL, BZ) - FORMAT(BLTZL, BZ) - FORMAT(BNE, BE) - FORMAT(BNEL, BE) - FORMAT(DIV, R) - FORMAT(MOD, R) - FORMAT(DIVU, R) - FORMAT(MODU, R) - FORMAT(J, J) - FORMAT(JAL, J) - FORMAT(JALR, RS) // TODO: handle rd - FORMAT(JALX, J) - FORMAT(JR, RS) - FORMAT(LB, SL) - FORMAT(LBU, SL) - FORMAT(LH, SL) - FORMAT(LHU, SL) - FORMAT(LUI, SLI) - FORMAT(LW, SL) - FORMAT(MFHI, RD) - FORMAT(MFLO, RD) - FORMAT(MTHI, RS) - FORMAT(MTLO, RS) - FORMAT(MUL, R) - FORMAT(MUH, R) - FORMAT(MULU, R) - FORMAT(MUHU, R) - FORMAT(SB, SL) - FORMAT(SH, SL) - FORMAT(SW, SL) - FORMAT(SLL, S) - FORMAT(SLLV, SV) - FORMAT(SLT, R) - FORMAT(SLTI, I) - FORMAT(SLTIU, I) - FORMAT(SLTU, R) - FORMAT(SRA, S) - FORMAT(SRAV, SV) - FORMAT(SRL, S) - FORMAT(SRLV, SV) - FORMAT(SYSCALL, NONE) - FORMAT(OR, R) - FORMAT(ORI, I) - FORMAT(NOR, R) - FORMAT(SUB, R) - FORMAT(SUBU, R) - FORMAT(XOR, R) - FORMAT(XORI, I) -}; - -#undef FORMAT - -#define MAX5 (1 << 5) -#define MAX16 (1 << 16) -#define MAX26 (1 << 25) -#define MAX32 (1 << 31) - -static int get_reference(struct parser *parser, uint64_t *offset, - struct reference *ref, unsigned char type) +/* peek the next token and return SUCCESS on eol */ +static int peek_eol(struct parser *parser) { struct token token; + if (peek_token(parser, &token)) + return M_ERROR; + int res = (token.type == TOK_NL || token.type == TOK_EOF) ? + M_SUCCESS : M_ERROR; + return res; +} - if (next_token(parser, &token)) +/// +/// PARSER FUNCTIONS +/// parses each type of expression +/// + +static int parse_directive_whb(struct parser *parser, void *data, uint32_t *res, + size_t length, size_t max_size) +{ + struct token token; + int len = 0; + + while (1) { + if (peek_eol(parser) == M_SUCCESS) + break; + + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if ((uint64_t)token.number > max_size) { + ERROR_POS(token, "number cannot exceed max size of %zu", + max_size); + return M_ERROR; + } + + if (len >= MAX_ARG_LENGTH) { + ERROR_POS(token, "exceeded max argument length for " + "directives"); + return M_ERROR; + } + + // BUG: does this only work on little endian??? + memcpy((uint8_t *) data + (len++ * length), &token.number, + max_size); + } + + *res = len; + return M_SUCCESS; +} + +static int parse_immd(struct parser *parser, uint16_t *num) +{ + struct token token; + if (assert_token(parser, TOK_NUMBER, &token)) return M_ERROR; - if (token.type == TOK_NUMBER) { + // TOK_NUMBER does not need to be freed + *num = token.number; + return M_SUCCESS; +} - *offset = token.number; +static int parse_ident(struct parser *parser, struct string *ident) +{ + struct token token; + if (assert_token(parser, TOK_IDENT, &token)) + return M_ERROR; + string_move(ident, &token.string); + return M_SUCCESS; +} + +static int parse_string(struct parser *parser, struct string *string) +{ + struct token token; + if (assert_token(parser, TOK_STRING, &token)) + return M_ERROR; + string_move(string, &token.string); + return M_SUCCESS; +} + +/* parses a directive */ +static int parse_directive(struct parser *parser, struct string *name, + struct expr_directive *expr) +{ + #define CHK(n) if (strcmp(name->str, #n) == 0) + + CHK(.align) { + expr->type = EXPR_DIRECTIVE_ALIGN; + return parse_immd(parser, &expr->align); + } else CHK(.space) { + expr->type = EXPR_DIRECTIVE_SPACE; + return parse_immd(parser, &expr->space); + } else CHK(.word) { + expr->type = EXPR_DIRECTIVE_WORD; + return parse_directive_whb(parser, expr->words, &expr->len, + sizeof(uint32_t), UINT32_MAX); + } else CHK(.half) { + expr->type = EXPR_DIRECTIVE_HALF; + return parse_directive_whb(parser, expr->halfs, &expr->len, + sizeof(uint16_t), UINT16_MAX); + } else CHK(.byte) { + expr->type = EXPR_DIRECTIVE_BYTE; + return parse_directive_whb(parser, expr->bytes, &expr->len, + sizeof(uint8_t), UINT8_MAX); + } else CHK(.extern) { + expr->type = EXPR_DIRECTIVE_EXTERN; + return parse_ident(parser, &expr->label); + } else CHK(.globl) { + expr->type = EXPR_DIRECTIVE_GLOBL; + return parse_ident(parser, &expr->label); + } else CHK(.ascii) { + expr->type = EXPR_DIRECTIVE_ASCII; + return parse_string(parser, &expr->string); + } else CHK(.asciiz) { + expr->type = EXPR_DIRECTIVE_ASCIIZ; + return parse_string(parser, &expr->string); + } else { + expr->type = EXPR_DIRECTIVE_SECTION; + string_move(&expr->section, name); return M_SUCCESS; } - if (token.type != TOK_IDENT) { - ERROR_POS(token, "unexpected token of type '%s'", - token_str(token.type)); + #undef CHK +} + +static int parse_constant(struct parser *parser, struct string *name, + struct expr_const *constant) +{ + if (assert_token(parser, TOK_EQUAL, NULL)) return M_ERROR; - } - strcpy(ref->name, token.text); - ref->type = type; - ref->addend = 0; + struct token token; + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; - // return zero for now - *offset = 0; + string_move(&constant->name, name); + constant->num = token.number; return M_SUCCESS; } -static int get_offset(struct parser *parser, int32_t *offset, - struct reference *ref) +static int parse_offset(struct parser *parser, + struct expr_ins_arg *arg, + uint64_t immd) { - uint64_t off; - if (get_reference(parser, &off, ref, R_MIPS_PC16)) + // the immediate has already been parsed + // now parse (REG) + + if (assert_token(parser, TOK_LPAREN, NULL)) return M_ERROR; - if (off % 4) { - ERROR_POS((*parser->lexer), "cannot use offset of '%ld', must " - "be divisble by four", off); - return M_ERROR; - } - - if (off > MAX16) { - ERROR("offset '%d' cannot be larger than 16 bits", off); - return M_ERROR; - } - - *offset = off; - return M_SUCCESS; -} - -static int get_offset_26(struct parser *parser, int32_t *offset, - struct reference *ref) -{ - uint64_t off; - if (get_reference(parser, &off, ref, R_MIPS_PC26_S2)) - return M_ERROR; - - if (off % 4) { - ERROR_POS((*parser->lexer), "cannot use offset of '%ld', must " - "be divisble by four", off); - return M_ERROR; - } - - if (off > MAX26) { - ERROR("offset '%d' cannot be larger than 26 bits", off); - return M_ERROR; - } - - *offset = off; - return M_SUCCESS; -} - -static int get_target(struct parser *parser, uint32_t *offset, - struct reference *ref) -{ - uint64_t off; - if (get_reference(parser, &off, ref, R_MIPS_26)) - return M_ERROR; - - if (off > MAX26) { - ERROR("target '%d' cannot be larger than 26 bits", off); - return M_ERROR; - } - - *offset = off; - return M_SUCCESS; -} - -static int get_instruction(const char *ident, struct mips_instruction *res) -{ - for (int i = 0; i < __MIPS_INS_LEN; i++) { - struct mips_instruction ins = - mips_instructions[i]; - if (strcasecmp(ident, ins.name) == 0) { - if (res != NULL) - *res = ins; - return M_SUCCESS; - } - } - return M_ERROR; -} - -static int parse_register(struct parser *parser, enum mips_register *reg) -{ struct token token; if (assert_token(parser, TOK_REG, &token)) return M_ERROR; - int len = strlen(token.text); - int c0 = len > 0 ? token.text[0] : '\0', - c1 = len > 1 ? token.text[1] : '\0', - c2 = len > 2 ? token.text[2] : '\0', - c3 = len > 3 ? token.text[3] : '\0'; + // set values + string_move(&arg->offset.reg, &token.string); + arg->offset.immd = immd; - // $zero - if (c0 == 'z') { - if (c1 == 'e' && c2 == 'r' && c3 == 'o') { - *reg = MIPS_REG_ZERO; - return M_SUCCESS; - } + if (assert_token(parser, TOK_RPAREN, NULL)) { + string_free(&arg->offset.reg); + return M_ERROR; } - // $a0-a3 $at - else if (c0 == 'a') { - if (c1 == 't') { - *reg = MIPS_REG_AT; - return M_SUCCESS; - } - if (c1 >= '0' && c1 <= '3') { - *reg = MIPS_REG_A0; - *reg += c1 - '0'; - return M_SUCCESS; - } - } - - // $v0-v1 - else if (c0 == 'v') { - if (c1 >= '0' && c1 <= '1') { - *reg = MIPS_REG_V0; - *reg += c1 - '0'; - return M_SUCCESS; - } - } - - // $t0-t9 - else if (c0 == 't') { - if (c1 >= '0' && c1 <= '7') { - *reg = MIPS_REG_T0; - *reg += c1 - '0'; - return M_SUCCESS; - } - // reg T8-T9 are not in order with T0-T7 - if (c1 >= '8' && c1 <= '9') { - *reg = MIPS_REG_T8; - *reg += c1 - '8'; - return M_SUCCESS; - } - } - - // $s0-s7 $sp - else if (c0 == 's') { - if (c1 >= '0' && c1 <= '7') { - *reg = MIPS_REG_S0; - *reg += c1 - '0'; - return M_SUCCESS; - } - if (c1 == 'p') { - *reg = MIPS_REG_SP; - return M_SUCCESS; - } - } - - // $k0-k1 - else if (c0 == 'k') { - if (c1 >= '0' && c1 <= '1') { - *reg = MIPS_REG_K0; - *reg += c1 - '0'; - return M_SUCCESS; - } - } - - // $gp - else if (c0 == 'g') { - if (c1 == 'p') { - *reg = MIPS_REG_GP; - return M_SUCCESS; - } - } - - // $fp - else if (c0 == 'f') { - if (c1 == 'p') { - *reg = MIPS_REG_FP; - return M_SUCCESS; - } - } - - // $rp - else if (c0 == 'r') { - if (c1 == 'a') { - *reg = MIPS_REG_RA; - return M_SUCCESS; - } - } - - // $0-31 (non aliased register names) - else if (c0 >= '0' && c0 <= '9') { - int i = c0 - '0'; - if (c1 >= '0' && c1 <= '9') { - i *= 10; - i += c1 - '0'; - } - if (i <= 31) { - *reg = i; - return M_SUCCESS; - } - } - - ERROR_POS(token, "unknown register $%s", token.text); - return M_ERROR; + return M_SUCCESS; } -static int get_reg_offset(struct parser *parser, - struct ins_expr *expr) +static int parse_instruction_arg(struct parser *parser, + struct expr_ins_arg *arg) { + // allowed token matches: + // + // register: + // REG + // + // label: + // IDENT + // + // immediate: + // IMMD + // + // offset: + // (REG) + // IMMD(REG) + struct token token; - enum mips_register reg; - - struct mips_instruction *fi = &expr->ins[0]; - struct mips_instruction *si = &expr->ins[1]; // possibly pseudo - struct reference *fr = &expr->ref[0]; - struct reference *sr = &expr->ref[1]; - - expr->ins_len = 1; - fr->type = R_MIPS_NONE; - -// ============================================= - - // defaults - fi->data.rs = MIPS_REG_ZERO; - fi->data.immd = 0; - if (peek_token(parser, &token)) return M_ERROR; - if (token.type == TOK_IDENT) - goto label; - else if (token.type == TOK_LPAREN) - goto reg; - else - goto off; - -// ============================================= - -label: - - next_token(parser, &token); - - expr->ins_len = 2; - - // move over first instruction to add in a LUI - *si = *fi; - si->data.rs = MIPS_REG_AT; - si->data.offset = 0; - - // update LUI - *fi = mips_instructions[MIPS_INS_LUI]; - fi->data.rt = MIPS_REG_AT; - fi->data.immd = 0; - - // update references - strcpy(fr->name, token.text); - fr->type = R_MIPS_HI16; - fr->addend = 0; - strcpy(sr->name, token.text); - sr->type = R_MIPS_LO16; - sr->addend = 0; - - goto end; - -// ============================================= - -off: - - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - fi->data.immd = B16(token.number); - - if (peek_token(parser, &token)) - return M_ERROR; - - if (token.type == TOK_LPAREN) - goto reg; - else - goto end; - -// ============================================= - -reg: - if (assert_token(parser, TOK_LPAREN, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - fi->data.rs = reg; - - if (assert_token(parser, TOK_RPAREN, NULL)) - return M_ERROR; - -// ============================================= -end: - if (peek_token(parser, &token)) - return M_ERROR; - - return M_SUCCESS; -} - -static int parse_number(struct parser *parser, uint32_t *n, uint32_t max) -{ - struct token token; - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - if (max && token.number > max) { - ERROR_POS(token, "number cannot be larger than '%d'", max); - return M_ERROR; - } - *n = token.number; - return M_SUCCESS; -} - -static int parse_instruction_r(struct parser *parser, - struct mips_instruction *ins) -{ - // format: rs, rt, rd - enum mips_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rd = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - return M_SUCCESS; -} - -static int parse_instruction_r2(struct parser *parser, - struct mips_instruction *ins) -{ - // format: rs, rt - enum mips_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - return M_SUCCESS; -} - -static int parse_instruction_rs(struct parser *parser, - struct mips_instruction *ins) -{ - // format: rs - enum mips_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rs = reg; - - return M_SUCCESS; -} - -static int parse_instruction_rd(struct parser *parser, - struct mips_instruction *ins) -{ - // format: rd - enum mips_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rd = reg; - - return M_SUCCESS; -} - -static int parse_instruction_i(struct parser *parser, - struct mips_instruction *ins) -{ - // format: rs, rt, immd - enum mips_register reg; - struct token token; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - - if (token.number >= MAX16) - return M_ERROR; - ins->data.immd = B16(token.number); - - return M_SUCCESS; -} - -static int parse_instruction_offset(struct parser *parser, - uint32_t max, - struct mips_instruction *ins, - struct reference *ref) -{ - int32_t n; - - switch (max) { - case MAX26: - if (get_offset_26(parser, &n, ref)) - return M_ERROR; - ins->data.offs26 = B32(n); - break; - case MAX16: - if (get_offset(parser, &n, ref)) - return M_ERROR; - ins->data.offset = B16(n); - break; - default: - return M_ERROR; + // if its a left paren, were parsing + // an offset + if (token.type == TOK_LPAREN) { + arg->type = EXPR_INS_ARG_OFFSET; + return parse_offset(parser, arg, 0); } - return M_SUCCESS; -} - -static int parse_instruction_j(struct parser *parser, - struct mips_instruction *ins, - struct reference *ref) -{ - uint32_t n; - if (get_target(parser, &n, ref) || n > MAX26) - return M_ERROR; - ins->data.target = n; - - return M_SUCCESS; -} - -static int parse_instruction_branch_equal(struct parser *parser, - struct mips_instruction *ins, - struct reference *ref) -{ - enum mips_register reg; - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - int32_t off; - if (get_offset(parser, &off, ref)) - return M_ERROR; - ins->data.offset = B16(off); - - return M_SUCCESS; -} - -static int parse_instruction_branch(struct parser *parser, - struct mips_instruction *ins, - struct reference *ref) -{ - enum mips_register reg; - int32_t n; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (get_offset(parser, &n, ref)) - return M_ERROR; - ins->data.offset = B16(n); - - return M_SUCCESS; -} - -static int parse_instruction_sl(struct parser *parser, - struct ins_expr *expr) -{ - enum mips_register reg; - struct mips_instruction *ins = &expr->ins[0]; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (get_reg_offset(parser, expr)) - return M_ERROR; - - return M_SUCCESS; -} - -static int parse_instruction_sli(struct parser *parser, - struct mips_instruction *ins) -{ - enum mips_register reg; - struct token token; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX16) - return M_ERROR; - ins->data.immd = B16(token.number); - - return M_SUCCESS; -} - -static int parse_instruction_s(struct parser *parser, - struct mips_instruction *ins) -{ - enum mips_register reg; - struct token token; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rd = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX5) - return M_ERROR; - ins->data.shamt = token.number; - - return M_SUCCESS; -} - -static int parse_instruction_sv(struct parser *parser, - struct mips_instruction *ins) -{ - enum mips_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rd = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->data.rs = reg; - - return M_SUCCESS; -} - -static int parse_pseudo_li(struct parser *parser, struct ins_expr *expr) -{ - enum mips_register reg; - uint32_t immd; - - if (parse_register(parser, ®)) - return M_ERROR; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_number(parser, &immd, MAX16)) - return M_ERROR; - - expr->ins_len = 1; - expr->ins[0] = mips_instructions[MIPS_INS_ORI]; - expr->ins[0].data.rt = reg; - expr->ins[0].data.rs = MIPS_REG_ZERO; - expr->ins[0].data.immd = B16(immd); - expr->ref[0].type = R_MIPS_NONE; - - return M_SUCCESS; -} - -static int parse_pseudo_la(struct parser *parser, struct ins_expr *expr) -{ - enum mips_register reg; - struct token token; - - uint16_t hi = 0, lo = 0; - - if (parse_register(parser, ®)) - return M_ERROR; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - + // token must now be either a number (immediate) + // register, or label,,, + // ... take ownership of the next token if (next_token(parser, &token)) return M_ERROR; + // if its a register... return + // there are no other pathways + if (token.type == TOK_REG) { + arg->type = EXPR_INS_ARG_REGISTER; + string_move(&arg->reg, &token.string); + return M_SUCCESS; + } + + // if it is a label... return + // therea are no other pathways if (token.type == TOK_IDENT) { - expr->ref[0].type = R_MIPS_HI16; - expr->ref[0].addend = 0; - strcpy(expr->ref[0].name, token.text); - expr->ref[1].type = R_MIPS_LO16; - expr->ref[1].addend = 0; - strcpy(expr->ref[1].name, token.text); - } else if (token.type == TOK_NUMBER && token.number > MAX32) { - hi = token.number >> 16; - lo = token.number & 0x0000ffff; - expr->ref[0].type = R_MIPS_NONE; - expr->ref[1].type = R_MIPS_NONE; + arg->type = EXPR_INS_ARG_LABEL; + string_move(&arg->label, &token.string); + return M_SUCCESS; + } + + // now it must be a number... + // throw a error if its now + if (token.type != TOK_NUMBER) { + ERROR_POS(token, "expected number, got %s", + token_str(token.type)); + token_free(&token); + return M_ERROR; + } + + uint64_t immd = (uint64_t)token.number; + // now if the next token is a lparen + // parse offset, else return immd + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_LPAREN) { + arg->type = EXPR_INS_ARG_OFFSET; + return parse_offset(parser, arg, immd); } else { - return M_ERROR; + arg->type = EXPR_INS_ARG_IMMEDIATE; + arg->immd = immd; + return M_SUCCESS; } - expr->ins_len = 2; - expr->ins[0] = mips_instructions[MIPS_INS_LUI]; - expr->ins[0].data.rt = reg; - expr->ins[0].data.immd = B16(hi); - expr->ins[1] = mips_instructions[MIPS_INS_ADDI]; - expr->ins[1].data.rt = reg; - expr->ins[1].data.rs = reg; - expr->ins[1].data.immd = B16(lo); - - return M_SUCCESS; } -static int parse_pseudo_move(struct parser *parser, struct ins_expr *expr) +static int parse_instruction(struct parser *parser, struct string *name, + struct expr_ins *ins) { - enum mips_register rd, rs; + int len = 0; - if (parse_register(parser, &rd)) - return M_ERROR; + if (peek_eol(parser) == M_SUCCESS) + goto skip_args; - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; + while (1) { + if (len >= MAX_ARG_LENGTH) { + ERROR_POS(parser->lexer, + "reached max argument length"); + return M_ERROR; + } - if (parse_register(parser, &rs)) - return M_ERROR; - expr->ins_len = 1; - expr->ins[0] = mips_instructions[MIPS_INS_OR]; - expr->ins[0].data.rs = rs; - expr->ins[0].data.rt = MIPS_REG_ZERO; - expr->ins[0].data.rd = rd; - expr->ref[0].type = R_MIPS_NONE; + if (parse_instruction_arg(parser, &ins->args[len++])) + return M_ERROR; - return M_SUCCESS; -} + if (peek_eol(parser) == M_SUCCESS) + break; -static int parse_pseudo_nop(struct parser *parser, struct ins_expr *expr) -{ - (void) parser; - - expr->ins_len = 1; - expr->ins[0] = mips_instructions[MIPS_INS_SLL]; - expr->ref[0].type = R_MIPS_NONE; - - return M_SUCCESS; -} - -static int parse_pseudo_instruction(struct parser *parser, - struct ins_expr *expr, - struct token ident) -{ - // disablle logging in the logging - // module - extern int log_disabled; - log_disabled = 1; - - int res = M_ERROR; - - struct lexer_state state; - lexer_save(parser->lexer, &state); - - #define CHK(name) if (strcmp(ident.text, #name) == 0) - - CHK(li) - res = parse_pseudo_li(parser, expr); - else CHK(la) - res = parse_pseudo_la(parser, expr); - else CHK(move) - res = parse_pseudo_move(parser, expr); - else CHK(nop) - res = parse_pseudo_nop(parser, expr); - - #undef CHK - - if (res) { - // reset on fail - lexer_load(parser->lexer, &state); - expr->ins[0].data.raw = 0; - expr->ins[1].data.raw = 0; - expr->ref[0] = (struct reference) {0}; - expr->ref[1] = (struct reference) {0}; + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; } - log_disabled = 0; - return res; +skip_args: + + string_move(&ins->name, name); + ins->args_len = len; + return M_SUCCESS; } -static int parse_instruction(struct parser *parser, - struct ins_expr *expr, - struct token ident) +/* gets the next value from the parser */ +int parser_next(struct parser *parser, struct expr *expr) { - struct mips_instruction instruction; - enum mips_parse_format format; + // the next token being looked at + struct token token = { + .type = TOK_NL + }; + + // the result to return int res = M_SUCCESS; - if (parse_pseudo_instruction(parser, expr, ident) == M_SUCCESS) - return M_SUCCESS; - - if (get_instruction(ident.text, &instruction)) { - ERROR_POS(ident, "unknown instruction '%s'", ident.text); - return M_ERROR; + // skip all new lines + while (1) { + if (next_token(parser, &token)) + return M_ERROR; + if (token.type != TOK_NL) + break; + token_free(&token); } - struct mips_instruction *ins = &expr->ins[0]; - struct reference *ref = &expr->ref[0]; + expr->line_no = parser->lexer.y; + expr->byte_start = token.off; + expr->byte_end = token.off; - // this will only ever generate one instruction - expr->ins_len = 1; - *ins = instruction; - ref->type = R_MIPS_NONE; + // if EOF, return M_EOF + if (token.type == TOK_EOF) + return M_EOF; - format = mips_parse_formats[instruction.type]; - switch (format) { - case MIPS_PARSE_R: - res = parse_instruction_r(parser, ins); - break; - case MIPS_PARSE_R2: - res = parse_instruction_r2(parser, ins); - break; - case MIPS_PARSE_RS: - res = parse_instruction_rs(parser, ins); - break; - case MIPS_PARSE_RD: - res = parse_instruction_rd(parser, ins); - break; - case MIPS_PARSE_I: - res = parse_instruction_i(parser, ins); - break; - case MIPS_PARSE_J: - res = parse_instruction_j(parser, ins, ref); - break; - case MIPS_PARSE_O16: - res = parse_instruction_offset(parser, MAX16, ins, ref); - break; - case MIPS_PARSE_O26: - res = parse_instruction_offset(parser, MAX26, ins, ref); - break; - case MIPS_PARSE_BE: - res = parse_instruction_branch_equal(parser, ins, ref); - break; - case MIPS_PARSE_BZ: - res = parse_instruction_branch(parser, ins, ref); - break; - case MIPS_PARSE_SL: - res = parse_instruction_sl(parser, expr); - break; - case MIPS_PARSE_SLI: - res = parse_instruction_sli(parser, ins); - break; - case MIPS_PARSE_S: - res = parse_instruction_s(parser, ins); - break; - case MIPS_PARSE_SV: - res = parse_instruction_sv(parser, ins); - break; - case MIPS_PARSE_NONE: - res = M_SUCCESS; - break; + // when a ident ends with a colon + // parse a lebel + else if (token.type == TOK_LABEL) { + expr->type = EXPR_LABEL; + // label now owns string + string_move(&expr->label, &token.string); } + // when a ident starts with a dot + // parse a directive + else if (token.type == TOK_DIRECTIVE) { + expr->type = EXPR_DIRECTIVE; + res = parse_directive(parser, &token.string, &expr->directive); + } + + // peek the next token: + // 1. = means parse constant + // 2. else parse instruction + else { + if (token.type != TOK_IDENT) { + ERROR_POS(token, "expected ident, got %s", + token_str(token.type)); + token_free(&token); + return M_ERROR; + } + + struct token peek; + if (peek_token(parser, &peek)) { + token_free(&token); + return M_ERROR; + } + + if (peek.type == TOK_EQUAL) { + expr->type = EXPR_CONSTANT; + res = parse_constant(parser, &token.string, + &expr->constant); + } else { + expr->type = EXPR_INS; + res = parse_instruction(parser, &token.string, + &expr->instruction); + } + } + + // update byte end for expr + expr->byte_end = ftell(parser->lexer.file); + + // free tokens + token_free(&token); + + // everything must end in a new line if (res == M_SUCCESS && assert_eol(parser)) return M_ERROR; return res; } - -static int parse_directive_align(struct parser *parser, - struct mips_directive *directive) +int parser_init(const char *file, struct parser *parser) { - struct token token; - if (assert_token(parser, TOK_NUMBER, &token)) + parser->peek.type = TOK_EOF; + if (lexer_init(file, &parser->lexer)) return M_ERROR; - - if (token.number < 0) { - ERROR_POS(token, "cannot align negative"); - return M_ERROR; - } - - if (token.number > MAX16) { - ERROR_POS(token, "cannot align more than 65kb"); - return M_ERROR; - } - - directive->type = MIPS_DIRECTIVE_ALIGN; - directive->align = token.number; - - return M_SUCCESS; -} - -static int parse_directive_space(struct parser *parser, - struct mips_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - - if (token.number < 0) { - ERROR_POS(token, "cannot reserve negative"); - return M_ERROR; - } - - if (token.number > MAX16) { - ERROR_POS(token, "cannot reserve more than 65kb"); - return M_ERROR; - } - - directive->type = MIPS_DIRECTIVE_SPACE; - directive->space = token.number; - - return M_SUCCESS; -} - -static int parse_directive_whb(struct parser *parser, - struct mips_directive *directive, - enum mips_directive_type type) -{ - struct token token; - uint32_t size = 0; - uint32_t len = 0; - - switch (type) { - case MIPS_DIRECTIVE_WORD: - size = UINT32_MAX; - break; - case MIPS_DIRECTIVE_HALF: - size = UINT16_MAX; - break; - case MIPS_DIRECTIVE_BYTE: - size = UINT8_MAX; - break; - default: - } - - directive->type = type; - - while (1) { - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - - if (len >= MAX_ARG_LENGTH) { - ERROR_POS(token, "directives cannot be longer than " - "%d arguments", MAX_ARG_LENGTH); - return M_ERROR; - } - - if (token.number > size) { - ERROR_POS(token, "number cannot execede max size of: " - "%d", size); - return M_ERROR; - } - - switch (type) { - case MIPS_DIRECTIVE_WORD: - directive->words[len++] = token.number; - - break; - case MIPS_DIRECTIVE_HALF: - directive->halfs[len++] = token.number; - break; - case MIPS_DIRECTIVE_BYTE: - directive->bytes[len++] = token.number; - break; - default: - } - - if (peek_token(parser, &token)) - return M_ERROR; - - if (token.type == TOK_COMMA) { - next_token(parser, NULL); - continue; - } - - break; - } - - directive->len = len; - - return M_SUCCESS; -} - -static int parse_directive_extern(struct parser *parser, - struct mips_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_IDENT, &token)) - return M_ERROR; - - directive->type = MIPS_DIRECTIVE_EXTERN; - strcpy(directive->name, token.text); - - return M_SUCCESS; -} - -static int parse_directive_globl(struct parser *parser, - struct mips_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_IDENT, &token)) - return M_ERROR; - - directive->type = MIPS_DIRECTIVE_GLOBL; - strcpy(directive->name, token.text); - - return M_SUCCESS; -} - -static int parse_directive_ascii(struct parser *parser, - struct mips_directive *directive, - enum mips_directive_type type) -{ - struct token token; - if (assert_token(parser, TOK_STRING, &token)) - return M_ERROR; - - directive->type = type; - strcpy(directive->name, token.text); - - return M_SUCCESS; -} - -static int parse_section(struct mips_directive *directive, - char name[MAX_LEX_LENGTH]) -{ - directive->type = MIPS_DIRECTIVE_SECTION; - strcpy(directive->name, name); - - return M_SUCCESS; -} - -static int parse_directive(struct parser *parser, - struct mips_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_DIRECTIVE, &token)) - return M_ERROR; - - // .align n - if (strcmp(token.text, "align") == 0) - return parse_directive_align(parser, directive); - else if (strcmp(token.text, "space") == 0) - return parse_directive_space(parser, directive); - else if (strcmp(token.text, "word") == 0) - return parse_directive_whb(parser, directive, - MIPS_DIRECTIVE_WORD); - else if (strcmp(token.text, "half") == 0) - return parse_directive_whb(parser, directive, - MIPS_DIRECTIVE_HALF); - else if (strcmp(token.text, "byte") == 0) - return parse_directive_whb(parser, directive, - MIPS_DIRECTIVE_BYTE); - else if (strcmp(token.text, "extern") == 0) - return parse_directive_extern(parser, directive); - else if (strcmp(token.text, "globl") == 0) - return parse_directive_globl(parser, directive); - else if (strcmp(token.text, "ascii") == 0) - return parse_directive_ascii(parser, directive, - MIPS_DIRECTIVE_ASCII); - else if (strcmp(token.text, "asciiz") == 0) - return parse_directive_ascii(parser, directive, - MIPS_DIRECTIVE_ASCIIZ); - else - return parse_section(directive, token.text); -} - -static int parse_constant(struct parser *parser, struct const_expr *expr, - struct token ident) -{ - struct token number; - - if (assert_token(parser, TOK_EQUAL, NULL)) - return M_ERROR; - - if (assert_token(parser, TOK_NUMBER, &number)) - return M_ERROR; - - strcpy(expr->name,ident.text); - expr->value = number.number; - - return M_SUCCESS; -} - -static int parser_handle_ident(struct parser *parser, struct expr *expr) -{ - struct token ident; - struct token peek; - - if (assert_token(parser, TOK_IDENT, &ident)) - return M_ERROR; - - if (peek_token(parser, &peek)) - return M_ERROR; - - if (peek.type == TOK_EQUAL) { - expr->type = EXPR_CONSTANT; - return parse_constant(parser, &expr->constant, ident); - } else { - expr->type = EXPR_INS; - return parse_instruction(parser, &expr->ins, ident); - } -} - - -static int parse_label(struct parser *parser, - struct expr *expr) -{ - struct token token; - - if (assert_token(parser, TOK_LABEL, &token)) - return M_ERROR; - strcpy(expr->label, token.text); - - return M_SUCCESS; -} - - -int parser_next(struct parser *parser, struct expr *expr) -{ - struct token token; - int res = M_SUCCESS; - -again: - if (peek_token(parser, &token)) - return M_ERROR; - - switch (token.type) { - case TOK_NL: - next_token(parser, NULL); - goto again; - - case TOK_EOF: - res = M_EOF; - break; - - case TOK_LABEL: - expr->type = EXPR_LABEL; - res = parse_label(parser, expr); - break; - - case TOK_DIRECTIVE: - expr->type = EXPR_DIRECTIVE; - res = parse_directive(parser, &expr->directive); - break; - - case TOK_IDENT: - res = parser_handle_ident(parser, expr); - break; - - default: - ERROR_POS(token, "unexpected token '%s'", - token_str(token.type)); - return M_ERROR; - - } - - return res; -} - -int parser_init(struct lexer *lexer, struct parser *parser) -{ - parser->lexer = lexer; - parser->peek.type = TOK_EOF; return M_SUCCESS; } void parser_free(struct parser *parser) { - (void) parser; + token_free(&parser->peek); + lexer_free(&parser->lexer); +} + +static inline void expr_directive_free(struct expr_directive *dir) +{ + switch (dir->type) { + case EXPR_DIRECTIVE_SECTION: + string_free(&dir->section); + break; + case EXPR_DIRECTIVE_EXTERN: + case EXPR_DIRECTIVE_GLOBL: + string_free(&dir->label); + break; + case EXPR_DIRECTIVE_ASCII: + case EXPR_DIRECTIVE_ASCIIZ: + string_free(&dir->string); + break; + default: + } +} + +static inline void expr_ins_arg_free(struct expr_ins_arg *arg) +{ + switch (arg->type) { + case EXPR_INS_ARG_REGISTER: + string_free(&arg->reg); + break; + case EXPR_INS_ARG_IMMEDIATE: + break; + case EXPR_INS_ARG_LABEL: + string_free(&arg->label); + break; + case EXPR_INS_ARG_OFFSET: + string_free(&arg->offset.reg); + break; + } +} + +void expr_free(struct expr *expr) +{ + switch (expr->type) { + case EXPR_DIRECTIVE: + expr_directive_free(&expr->directive); + break; + case EXPR_CONSTANT: + string_free(&expr->constant.name); + break; + case EXPR_INS: + string_free(&expr->instruction.name); + for (uint32_t i = 0; i < expr->instruction.args_len; i++) + expr_ins_arg_free(&expr->instruction.args[i]); + break; + case EXPR_LABEL: + string_free(&expr->label); + break; + } } diff --git a/masm/parse.h b/masm/parse.h index 9e0e928..61036cd 100644 --- a/masm/parse.h +++ b/masm/parse.h @@ -6,89 +6,147 @@ #include "lex.h" #include -#include #include -/* mips directive types */ -enum mips_directive_type { - MIPS_DIRECTIVE_ALIGN, - MIPS_DIRECTIVE_SPACE, - MIPS_DIRECTIVE_WORD, - MIPS_DIRECTIVE_HALF, - MIPS_DIRECTIVE_BYTE, - MIPS_DIRECTIVE_SECTION, - MIPS_DIRECTIVE_EXTERN, - MIPS_DIRECTIVE_GLOBL, - MIPS_DIRECTIVE_ASCII, - MIPS_DIRECTIVE_ASCIIZ, +/// the type to a direcive +enum expr_directive_type { + EXPR_DIRECTIVE_ALIGN, + EXPR_DIRECTIVE_SPACE, + EXPR_DIRECTIVE_WORD, + EXPR_DIRECTIVE_HALF, + EXPR_DIRECTIVE_BYTE, + EXPR_DIRECTIVE_SECTION, + EXPR_DIRECTIVE_EXTERN, + EXPR_DIRECTIVE_GLOBL, + EXPR_DIRECTIVE_ASCII, + EXPR_DIRECTIVE_ASCIIZ, }; -/* mip32 directive */ -struct mips_directive { - enum mips_directive_type type; - uint32_t len; // used for words, halfs, bytes +/// holds a directive +struct expr_directive { + // the type of the directive + enum expr_directive_type type; + + // lengh of .word, .half, or .byte directive + uint32_t len; + + // directive data union { + // e.g. align 2 uint16_t align; + // e.g. space 4096 uint16_t space; + // e.g. .word 0x1 0x2 uint32_t words[MAX_ARG_LENGTH]; uint16_t halfs[MAX_ARG_LENGTH]; - uint8_t bytes[MAX_ARG_LENGTH]; - char name[MAX_ARG_LENGTH]; + uint8_t bytes[MAX_ARG_LENGTH]; + // e.g. .ascii "hello world!" + struct string string; + // e.g. .globl main + struct string label; + // e.g. .text + struct string section; }; }; -struct reference { - // ELF relocate type - unsigned char type; +/// holds a constant expression +struct expr_const { + // the name of the constant + struct string name; - /// symbol name - char name[MAX_LEX_LENGTH]; - - /// integer addend - int64_t addend; + // the value of the constant + uint32_t num; }; -struct const_expr { - char name[MAX_LEX_LENGTH]; - uint32_t value; +/// the type to a right +/// hand side argument to an +/// instruction +enum expr_ins_arg_type { + // e.g. $ra + EXPR_INS_ARG_REGISTER, + + // e.g. 0x80 + EXPR_INS_ARG_IMMEDIATE, + + // e.g. main + EXPR_INS_ARG_LABEL, + + // e.g. 4($sp) + EXPR_INS_ARG_OFFSET, }; -struct ins_expr { +/// a right hand argument +/// to an instruction +struct expr_ins_arg { + enum expr_ins_arg_type type; + + union { + // register + struct string reg; + + // immediate + uint64_t immd; + + // label + struct string label; + + // offset + struct expr_ins_offset { + // immediate + uint64_t immd; + // register + struct string reg; + } offset; + }; +}; + +/// holds a instruction +struct expr_ins { /// pesudo instructions can return /// more than one instruction - size_t ins_len; - struct mips_instruction ins[2]; + struct string name; - /// instructions can reference symbols. - /// instruction `n` will be paried with reference `n` - struct reference ref[2]; + // the arguments of the instruction + uint32_t args_len; + struct expr_ins_arg args[MAX_ARG_LENGTH]; }; enum expr_type { + // e.g. .align 2 EXPR_DIRECTIVE, + // e.g. SIZE = 8 EXPR_CONSTANT, + // e.g. li $t0, 17 EXPR_INS, + // e.g. _start: EXPR_LABEL, }; struct expr { enum expr_type type; + + uint32_t line_no; + uint32_t byte_start; + uint32_t byte_end; + union { // directive - struct mips_directive directive; + struct expr_directive directive; // constant - struct const_expr constant; + struct expr_const constant; // instruction - struct ins_expr ins; + struct expr_ins instruction; // label - char label[MAX_LEX_LENGTH]; + struct string label; }; }; +void expr_free(struct expr *expr); + struct parser { // the lexer // *weak* ponter, we do not own this - struct lexer *lexer; + struct lexer lexer; // the last token peeked struct token peek; }; @@ -97,7 +155,7 @@ struct parser { int parser_next(struct parser *parser, struct expr *expr); /* initalize the base parser */ -int parser_init(struct lexer *lexer, struct parser *parser); +int parser_init(const char *file, struct parser *parser); /* free the base parser */ void parser_free(struct parser *parser); diff --git a/masm/reftab.c b/masm/reftab.c new file mode 100644 index 0000000..f8793e1 --- /dev/null +++ b/masm/reftab.c @@ -0,0 +1,43 @@ +#include +#include + +#include "tab.h" + +#define REFTAB_INIT_LEN 8 + +int reftab_init(struct reference_table *reftab) +{ + reftab->size = REFTAB_INIT_LEN; + reftab->len = 0; + reftab->references = malloc(sizeof(struct reference) + * REFTAB_INIT_LEN); + + if (reftab->references == NULL) { + PERROR("cannot alloc"); + return M_ERROR; + } + + return M_SUCCESS; +} + +void reftab_free(struct reference_table *reftab) +{ + free(reftab->references); +} + +int reftab_push(struct reference_table *reftab, struct reference *ref) +{ + if (reftab->len >= reftab->size) { + reftab->size *= 2; + reftab->references = realloc(reftab->references, + sizeof(struct reference) * reftab->size); + + if (reftab->references == NULL) { + PERROR("cannot realloc"); + return M_ERROR; + } + } + + reftab->references[reftab->len++] = *ref; + return M_SUCCESS; +} diff --git a/masm/reltab.c b/masm/reltab.c deleted file mode 100644 index afbd5e7..0000000 --- a/masm/reltab.c +++ /dev/null @@ -1,43 +0,0 @@ -#include -#include -#include - -#include "asm.h" - -#define RELTAB_INIT_LEN 8 - -int reltab_init(struct relocation_table *reltab) -{ - reltab->size = RELTAB_INIT_LEN; - reltab->len = 0; - reltab->data = malloc(sizeof(Elf32_Rela) * RELTAB_INIT_LEN); - - if (reltab->data == NULL) { - PERROR("cannot alloc"); - return M_ERROR; - } - - return M_SUCCESS; -} - -void reltab_free(struct relocation_table *reltab) -{ - free(reltab->data); -} - -int reltab_push(struct relocation_table *reltab, const Elf32_Rela rel) -{ - if (reltab->len >= reltab->size) { - reltab->size *= 2; - reltab->data = realloc(reltab->data, sizeof(Elf32_Rela) - * reltab->size); - - if (reltab->data == NULL) { - PERROR("cannot realloc"); - return M_ERROR; - } - } - - reltab->data[reltab->len++] = rel; - return M_SUCCESS; -} diff --git a/masm/sectab.c b/masm/sectab.c deleted file mode 100644 index caf34dd..0000000 --- a/masm/sectab.c +++ /dev/null @@ -1,166 +0,0 @@ -#include -#include -#include -#include -#include - -#include "asm.h" - -#define SECTBL_INIT_LEN 8 -static const char inital_section[MAX_LEX_LENGTH] = "data"; - -int sectab_init(struct section_table *sectab) -{ - sectab->size = SECTBL_INIT_LEN; - sectab->len = 0; - sectab->sections = malloc(sizeof(struct section) * SECTBL_INIT_LEN); - - if (sectab->sections == NULL) { - PERROR("cannot alloc"); - return M_ERROR; - } - - if (sectab_alloc(sectab, §ab->current, inital_section)) - return M_ERROR; - - return M_SUCCESS; -} - -void sectab_free(struct section_table *sectab) -{ - for (size_t i = 0; i < sectab->len; i++) { - reltab_free(§ab->sections[i].reltab); - free(sectab->sections[i].entries); - } - free(sectab->sections); -} - -struct section_settings { - const char *name; - bool read; - bool write; - bool execute; - size_t align; -}; - -static struct section_settings default_section_settings[] = { - {"data", true, true, false, 1}, - {"bss", true, true, false, 1}, - {"rodata", true, false, false, 1}, - {"text", true, false, true, 4}, -}; - -int sectab_alloc(struct section_table *sectab, struct section **res, - const char name[MAX_LEX_LENGTH]) -{ - if (sectab->len >= sectab->size) { - sectab->size *= 2; - sectab->sections = realloc(sectab->sections, - sizeof(struct section) * sectab->size); - - if (sectab->sections == NULL) { - PERROR("cannot realloc"); - return M_ERROR; - } - } - - /* set the sectio defaults */ - struct section *sec; - sec = §ab->sections[sectab->len]; - strcpy(sec->name,name); - sec->len = 0; - sec->size = SECTBL_INIT_LEN; - sec->alignment = 1; - sec->read = true; - sec->write = true; - sec->execute = false; - sec->index = sectab->len; - sec->entries = malloc(sizeof(struct section_entry) * SECTBL_INIT_LEN); - - if (reltab_init(&sec->reltab)) - return M_ERROR; - - /* overwrite the default if the given name has their own - * defaults */ - for (int i = 0; i < 4; i++) { - struct section_settings *set = &default_section_settings[i]; - if (strcmp(set->name, name) == 0) { - sec->read = set->read; - sec->write = set->write; - sec->execute = set->execute; - sec->alignment = set->align; - break; - } - } - - if (sec->entries == NULL) { - PERROR("cannot alloc"); - return M_ERROR; - } - - sectab->len++; - - *res = sec; - return M_SUCCESS; -} - -int sectab_get(struct section_table *sectab, struct section **sec, - const char name[MAX_LEX_LENGTH]) -{ - for (size_t i = 0; i < sectab->len; i++) { - struct section *temp = §ab->sections[i]; - if (strcmp(name, temp->name) == 0) { - if (sec != NULL) - *sec = temp; - return M_SUCCESS; - } - } - - return M_ERROR; -} - -int sec_push(struct section *section, struct section_entry entry) -{ - if (section->len >= section->size) { - section->size *= 2; - void *new = realloc(section->entries, - sizeof(struct section_entry) * section->size); - - if (new == NULL) { - PERROR("cannot realloc"); - return M_ERROR; - } - - section->entries = new; - } - - section->entries[section->len++] = entry; - - return M_SUCCESS; -} - -size_t sec_size(struct section *sec) -{ - size_t n = 0; - for (size_t i = 0; i < sec->len; i++) { - size_t t = sec->entries[i].size; - size_t m = t % sec->alignment; - if (m) - t += sec->alignment - m; - n += t; - } - return n; -} - -size_t sec_index(struct section *sec, size_t idx) -{ - size_t n = 0; - for (size_t i = 0; i < idx; i++) { - size_t t = sec->entries[i].size; - size_t m = t % sec->alignment; - if (m) - t += sec->alignment - m; - n += t; - } - return n; -} diff --git a/masm/string.c b/masm/string.c new file mode 100644 index 0000000..c05e182 --- /dev/null +++ b/masm/string.c @@ -0,0 +1,81 @@ +#include +#include + +#include "lex.h" + +/* init a empty string buffer */ +inline void string_init(struct string *string) +{ + string->len = 0; + string->size = 0; + string->allocated = true; + string->str = NULL; +} + +/* free a string buffer */ +inline void string_free(struct string *string) +{ + if (string->allocated && string->str) + free(string->str); +} + +/* clone a string buffer */ +inline int string_clone(struct string *dst, const struct string *const src) +{ + dst->len = src->len; + dst->size = src->len; + dst->allocated = src->allocated; + + /// bss strings do not need to be + /// malloced or copied + if (src->allocated == false) { + dst->str = src->str; + return M_SUCCESS; + } + + dst->str = malloc(sizeof(char) * src->len); + if (dst->str == NULL) { + PERROR("cannot alloc"); + return M_ERROR; + } + memcpy(dst->str, src->str, sizeof(char) * src->len); + return M_SUCCESS; +} + +/* moves a string */ +inline void string_move(struct string *dst, struct string *src) +{ + dst->len = src->len; + dst->size = src->len; + dst->allocated = src->allocated; + dst->str = src->str; + + // delete ptr in src + src->str = NULL; +} + +/* pushes a char onto a string */ +int string_push(struct string *string, char c) +{ + if (string->len >= string->size) { + int len = string->size ? string->size * 2 : 8; + char *new = realloc(string->str, sizeof(char) + len); + if (new == NULL) { + PERROR("cannot realloc"); + return M_ERROR; + } + string->size = len; + string->str = new; + } + string->str[string->len++] = c; + return M_SUCCESS; +} + +void string_bss(struct string *string, char *src) +{ + int len = strlen(src); + string->str = src; + string->len = len; + string->size = len; + string->allocated = false; +} diff --git a/masm/strtab.c b/masm/strtab.c index 404ea73..bd914b0 100644 --- a/masm/strtab.c +++ b/masm/strtab.c @@ -4,7 +4,7 @@ #include "asm.h" -int strtab_get_str(struct str_table *strtab, const char *str, size_t *res) +int strtab_get_str(struct elf_str_table *strtab, const char *str, size_t *res) { for (size_t i = 0; i < strtab->size; i ++) { if (strcmp(strtab->ptr + i, str) == 0) { @@ -17,7 +17,7 @@ int strtab_get_str(struct str_table *strtab, const char *str, size_t *res) return M_ERROR; } -int strtab_write_str(struct str_table *strtab, const char *str, size_t *res) +int strtab_write_str(struct elf_str_table *strtab, const char *str, size_t *res) { if (strtab_get_str(strtab, str, res) == M_SUCCESS) return M_SUCCESS; @@ -36,7 +36,7 @@ int strtab_write_str(struct str_table *strtab, const char *str, size_t *res) return M_SUCCESS; } -int strtab_init(struct str_table *strtab) +int strtab_init(struct elf_str_table *strtab) { strtab->size = 1; strtab->ptr = malloc(1); @@ -48,7 +48,7 @@ int strtab_init(struct str_table *strtab) return M_SUCCESS; } -void strtab_free(struct str_table *strtab) +void strtab_free(struct elf_str_table *strtab) { free(strtab->ptr); } diff --git a/masm/symtab.c b/masm/symtab.c index 652bd42..990be46 100644 --- a/masm/symtab.c +++ b/masm/symtab.c @@ -1,4 +1,3 @@ -#include #include #include #include @@ -6,70 +5,85 @@ #include #include -#include "asm.h" +#include "lex.h" +#include "tab.h" #define SYMTBL_INIT_LEN 24 int symtab_init(struct symbol_table *symtab) { - symtab->size = SYMTBL_INIT_LEN; - symtab->len = 0; - symtab->symbols = malloc(sizeof(Elf32_Sym) * SYMTBL_INIT_LEN); - symtab->sections = malloc(sizeof(ssize_t) * SYMTBL_INIT_LEN); + symtab->size = SYMTBL_INIT_LEN; + symtab->len = 0; + symtab->symbols = malloc(sizeof(struct symbol) * SYMTBL_INIT_LEN); - if (symtab->symbols == NULL || symtab->sections == NULL) { - PERROR("cannot alloc"); - return M_ERROR; - } + if (symtab->symbols == NULL) { + PERROR("cannot alloc"); + return M_ERROR; + } - Elf32_Sym null = {0}; - if (symtab_push(symtab, null, -1)) - return M_ERROR; - - return M_SUCCESS; + return M_SUCCESS; } void symtab_free(struct symbol_table *symtab) { - free(symtab->symbols); - free(symtab->sections); + for (uint32_t i = 0; i < symtab->len; i++) + string_free(&symtab->symbols[i].name); + free(symtab->symbols); } -int symtab_push(struct symbol_table *symtab, Elf32_Sym sym, ssize_t sec_idx) +int symtab_push(struct symbol_table *symtab, struct symbol *sym) { - if (symtab->len >= symtab->size) { - symtab->size *= 2; - symtab->symbols = realloc(symtab->symbols, - sizeof(Elf32_Sym) * symtab->size); - symtab->sections = realloc(symtab->sections, - sizeof(ssize_t) * symtab->size); - if (symtab->symbols == NULL || symtab->sections == NULL) { - PERROR("cannot realloc"); - return M_ERROR; - } - } + if (symtab->len >= symtab->size) { + symtab->size *= 2; + symtab->symbols = realloc(symtab->symbols, + sizeof(struct symbol) * symtab->size); + if (symtab->symbols == NULL) { + PERROR("cannot realloc"); + return M_ERROR; + } + } - symtab->symbols[symtab->len] = sym; - symtab->sections[symtab->len++] = sec_idx; - return M_SUCCESS; + sym->tabidx = symtab->len; + symtab->symbols[symtab->len++] = *sym; + return M_SUCCESS; } -int symtab_find(struct symbol_table *symtab, Elf32_Sym **ptr, - size_t *idx, const char name[MAX_LEX_LENGTH]) +int symtab_find(struct symbol_table *symtab, struct symbol **res, + const char *name) { - for (uint32_t i = 0; i < symtab->len; i++) { - Elf32_Sym *sym = &symtab->symbols[i]; - const char *str = &symtab->strtab->ptr[ntohl(sym->st_name)]; - if (strcmp(str, name) == 0) { - if (ptr != NULL) - *ptr = sym; + for (uint32_t i = 0; i < symtab->len; i++) { + struct symbol *sym = &symtab->symbols[i]; + if (strcmp(sym->name.str, name) == 0) { + if (res != NULL) + *res = sym; + return M_SUCCESS; + } + } - ptrdiff_t diff = sym - symtab->symbols; - if (idx != NULL) - *idx = diff; - - return M_SUCCESS; - } - } - return M_ERROR; + return M_ERROR; +} + +int symtab_find_or_stub(struct symbol_table *symtab, struct symbol **res, + const struct string *const name) +{ + if (symtab_find(symtab, res, name->str) == M_SUCCESS) + return M_SUCCESS; + + struct symbol temp = { + .offset = 0, + .secidx = SYM_SEC_STUB, + .type = SYM_LOCAL, + }; + if (string_clone(&temp.name, name)) + return M_ERROR; + + if (symtab_push(symtab, &temp)) { + string_free(&temp.name); + return M_ERROR; + } + + if (res != NULL) + *res = &symtab->symbols[symtab->len - 1]; + + return M_SUCCESS; } diff --git a/masm/tab.h b/masm/tab.h new file mode 100644 index 0000000..c9e66c5 --- /dev/null +++ b/masm/tab.h @@ -0,0 +1,98 @@ +/* Copyright (c) 2024 Freya Murphy */ + +#ifndef __TAB_H__ +#define __TAB_H__ + +#include +#include + +#include "lex.h" + +/// +/// Symbol table +/// + +#define SYM_SEC_STUB (UINT32_MAX) + +enum symbol_type { + SYM_LOCAL, + SYM_GLOBAL, + SYM_EXTERN, +}; + +struct symbol { + // the offset of the symbol in a section + uint32_t offset; + // the index of section the symbol is in + uint32_t secidx; + // index into this table + uint32_t tabidx; + // the name of the symbol + struct string name; + // type + enum symbol_type type; +}; + +struct symbol_table { + // length in size in sym ammt + size_t len; + size_t size; + + // symbols + struct symbol *symbols; +}; + +/* initalize a symbol table */ +int symtab_init(struct symbol_table *symtab); + +/* free the symbol table */ +void symtab_free(struct symbol_table *symtab); + +/* add a symbol to the symbol tbl */ +int symtab_push(struct symbol_table *symtab, struct symbol *sym); + +/* find a symbol by name in the symbol table */ +int symtab_find(struct symbol_table *symtab, struct symbol **sym, + const char *name); +/* find an existing symbol with a name or stub a temp one */ +int symtab_find_or_stub(struct symbol_table *symtab, struct symbol **sym, + const struct string *const name); + +/// +/// Reference table +/// + +enum reference_type { + REF_NONE, + REF_MIPS_16, + REF_MIPS_26, + REF_MIPS_PC16, + REF_MIPS_LO16, + REF_MIPS_HI16, +}; + +struct reference { + enum reference_type type; + struct symbol *symbol; + uint32_t offset; +}; + +struct reference_table { + // size + size_t len; + size_t size; + + // references + struct reference *references; +}; + +/* initalize a reference table */ +int reftab_init(struct reference_table *reftab); + +/* free the reference table */ +void reftab_free(struct reference_table *reftab); + +/* add a reference to the reference tbl */ +int reftab_push(struct reference_table *reftab, struct reference *ref); + +#endif /* __TAB_H__ */