diff --git a/include/mips.h b/include/mips.h index ba33893..8d76d12 100644 --- a/include/mips.h +++ b/include/mips.h @@ -3,18 +3,502 @@ #ifndef __MIPS_H__ #define __MIPS_H__ -#include +#include +#include -enum mips_isa { - ISA_MIPS32 +/* all mips registers $0-$31 */ +enum mips_register { + MIPS_REG_ZERO = 0, + MIPS_REG_AT = 1, + MIPS_REG_V0 = 2, + MIPS_REG_V1 = 3, + MIPS_REG_A0 = 4, + MIPS_REG_A1 = 5, + MIPS_REG_A2 = 6, + MIPS_REG_A3 = 7, + MIPS_REG_T0 = 8, + MIPS_REG_T1 = 9, + MIPS_REG_T2 = 10, + MIPS_REG_T3 = 11, + MIPS_REG_T4 = 12, + MIPS_REG_T5 = 13, + MIPS_REG_T6 = 14, + MIPS_REG_T7 = 15, + MIPS_REG_S0 = 16, + MIPS_REG_S1 = 17, + MIPS_REG_S2 = 18, + MIPS_REG_S3 = 19, + MIPS_REG_S4 = 20, + MIPS_REG_S5 = 21, + MIPS_REG_S6 = 22, + MIPS_REG_S7 = 23, + MIPS_REG_T8 = 24, + MIPS_REG_T9 = 25, + MIPS_REG_K0 = 26, + MIPS_REG_K1 = 27, + MIPS_REG_GP = 28, + MIPS_REG_SP = 29, + MIPS_REG_FP = 30, + MIPS_REG_RA = 31, }; -union mips_instruction { - struct mips32_instruction mips32; +/* mips instruction format */ +enum mips_instruction_format { + MIPS_FORMAT_R, + MIPS_FORMAT_I, + MIPS_FORMAT_J, + MIPS_FORMAT_B, }; -union mips_directive { - struct mips32_directive mips32; +/* mips instructions */ +enum mips_instruction_type { + MIPS_INS_ADD, + MIPS_INS_ADDI, + MIPS_INS_ADDIU, + MIPS_INS_ADDU, + MIPS_INS_AND, + MIPS_INS_ANDI, + MIPS_INS_BAL, + MIPS_INS_BALC, + MIPS_INS_BC, + MIPS_INS_BEQ, + MIPS_INS_BEQL, + MIPS_INS_BGEZ, + MIPS_INS_BGEZAL, + MIPS_INS_BGEZALL, + MIPS_INS_BGEZL, + MIPS_INS_BGTZ, + MIPS_INS_BGTZL, + MIPS_INS_BLEZ, + MIPS_INS_BLEZL, + MIPS_INS_BLTZ, + MIPS_INS_BLTZAL, + MIPS_INS_BLTZALL, + MIPS_INS_BLTZL, + MIPS_INS_BNE, + MIPS_INS_BNEL, + MIPS_INS_DDIV, + MIPS_INS_DDIVU, + MIPS_INS_DIV, + MIPS_INS_DIVU, + MIPS_INS_J, + MIPS_INS_JAL, + MIPS_INS_JALR, + MIPS_INS_JALX, + MIPS_INS_JR, + MIPS_INS_LB, + MIPS_INS_LBU, + MIPS_INS_LH, + MIPS_INS_LHU, + MIPS_INS_LUI, + MIPS_INS_LW, + MIPS_INS_LWL, + MIPS_INS_LWR, + MIPS_INS_MFHI, + MIPS_INS_MFLO, + MIPS_INS_MTHI, + MIPS_INS_MTLO, + MIPS_INS_MULT, + MIPS_INS_MULTU, + MIPS_INS_SB, + MIPS_INS_SH, + MIPS_INS_SW, + MIPS_INS_SWL, + MIPS_INS_SWR, + MIPS_INS_SLL, + MIPS_INS_SLLV, + MIPS_INS_SLT, + MIPS_INS_SLTI, + MIPS_INS_SLTIU, + MIPS_INS_SLTU, + MIPS_INS_SRA, + MIPS_INS_SRAV, + MIPS_INS_SRL, + MIPS_INS_SRLV, + MIPS_INS_SUB, + MIPS_INS_SUBU, + MIPS_INS_SYSCALL, + MIPS_INS_OR, + MIPS_INS_ORI, + MIPS_INS_NOR, + MIPS_INS_XOR, + MIPS_INS_XORI, + // gets the size of the enum + __MIPS_INS_LEN, }; -#endif /* __MIPS_H */ +/* mips instruction R TYPE */ +struct mips_instruction_r_data { + uint32_t funct : 6; + uint32_t shamt : 5; + uint32_t rd : 5; + uint32_t rt : 5; + uint32_t rs : 5; + uint32_t op : 6; +} __attribute__((packed)); + +/* mips instruction I TYPE */ +struct mips_instruction_i_data { + uint32_t immd : 16; + uint32_t rt : 5; + uint32_t rs : 5; + uint32_t op : 6; +} __attribute__((packed)); + +/* mips instruction J TYPE */ +struct mips_instruction_j_data { + uint32_t target : 26; + uint32_t op : 6; +} __attribute__((packed)); + +/* mips instruction BRANCH TYPE */ +struct mips_instruction_branch_data { + int32_t offset : 16; + uint32_t funct : 5; + uint32_t rs : 5; + uint32_t op : 6; +} __attribute__((packed)); + +/* mips instruction information */ +struct mips_instruction { + // metadata + enum mips_instruction_type type; + enum mips_instruction_format format; + const char *name; + + // data + union { + uint32_t data; + struct mips_instruction_r_data R_data; + struct mips_instruction_i_data I_data; + struct mips_instruction_j_data J_data; + struct mips_instruction_branch_data B_data; + } __attribute__((packed)); +}; + + +#define MIPS_INS(ins, format, ...) \ + [MIPS_INS_ ##ins] = { \ + MIPS_INS_ ##ins, \ + MIPS_FORMAT_ ##format, \ + #ins, \ + .format##_data = { __VA_ARGS__ } \ + }, \ + +static const struct mips_instruction mips_instructions[] = { +/* ADD - add */ +#define MIPS_OP_SPECIAL 0b000000 +#define MIPS_FUNCT_ADD 0b100000 +MIPS_INS(ADD, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_ADD) + +/* ADDI - add immediate */ +#define MIPS_OP_ADDI 0b001000 +MIPS_INS(ADDI, I, .op = MIPS_OP_ADDI) + +/* ADDIU - add immediate unsigned */ +#define MIPS_OP_ADDIU 0b001001 +MIPS_INS(ADDIU, I, .op = MIPS_OP_ADDIU) + +/* ADDU - add unsigned */ +#define MIPS_FUNCT_ADDU 0b100001 +MIPS_INS(ADDU, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_ADDU) + +/* AND - and */ +#define MIPS_FUNCT_AND 0b100100 +MIPS_INS(AND, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_AND) + +/* ANDI - and immediate */ +#define MIPS_OP_ANDI 0b001100 +MIPS_INS(ANDI, I, .op = MIPS_OP_ANDI) + +/* BAL - branch and link */ +#define MIPS_OP_REGIMM 0b000001 +#define MIPS_FUNCT_BAL 0b10001 +MIPS_INS(BAL, B, .op = MIPS_OP_REGIMM, .funct = MIPS_FUNCT_BAL) + +/* BALC - branch and link, compact */ +#define MIPS_OP_BALC 0b111010 +MIPS_INS(BALC, J, .op = MIPS_OP_BALC) + +/* BC - branch, compact */ +#define MIPS_OP_BC 0b110010 +MIPS_INS(BC, J, .op = MIPS_OP_BC) + +/* BEQ - branch on equal */ +#define MIPS_OP_BEQ 0b000100 +MIPS_INS(BEQ, I, .op = MIPS_OP_BEQ) + +/* BEQL - branch on equal likely */ +#define MIPS_OP_BEQL 0b010100 +MIPS_INS(BEQL, I, .op = MIPS_OP_BEQL) + +/* BGEZ - branch on greater than or equal to zero */ +#define MIPS_FUNCT_BGEZ 0b00001 +MIPS_INS(BGEZ, B, .op = MIPS_OP_REGIMM, .funct = MIPS_FUNCT_BGEZ) + +/* BGEZAL - branch on greater than or equal to zero and link */ +#define MIPS_FUNCT_BGEZAL 0b10001 +MIPS_INS(BGEZAL, B, .op = MIPS_OP_REGIMM, .funct = MIPS_FUNCT_BGEZAL) + +/* BGEZAL - branch on greater than or equal to zero and link likely */ +#define MIPS_FUNCT_BGEZALL 0b10011 +MIPS_INS(BGEZALL, B, .op = MIPS_OP_REGIMM, .funct = MIPS_FUNCT_BGEZALL) + +/* BGEZL - branch on greater than or equal to zero likely */ +#define MIPS_FUNCT_BGEZL 0b00011 +MIPS_INS(BGEZL, B, .op = MIPS_OP_REGIMM, .funct = MIPS_FUNCT_BGEZL) + +/* BGTZ - branch on greater than zero */ +#define MIPS_OP_BGTZ 0b000111 +MIPS_INS(BGTZ, I, .op = MIPS_OP_BGTZ) + +/* BGTZL - branch on greater than zero likely */ +#define MIPS_OP_BGTZL 0b010111 +MIPS_INS(BGTZL, I, .op = MIPS_OP_BGTZL) + +/* BLEZ - branch on less than or equal to zero */ +#define MIPS_OP_BLEZ 0b000110 +MIPS_INS(BLEZ, I, .op = MIPS_OP_BLEZ) + +/* BLEZL - branch on less than or equal to zero likely */ +#define MIPS_OP_BLEZL 0b010110 +MIPS_INS(BLEZL, I, .op = MIPS_OP_BLEZL) + +/* BLTZ - branch on less than zero */ +#define MIPS_FUNCT_BLTZ 0b00000 +MIPS_INS(BLTZ, B, .op = MIPS_OP_REGIMM, .funct = MIPS_FUNCT_BLTZ) + +/* BLTZAL - branch on less than zero and link */ +#define MIPS_FUNCT_BLTZAL 0b10000 +MIPS_INS(BLTZAL, B, .op = MIPS_OP_REGIMM, .funct = MIPS_FUNCT_BLTZAL) + +/* BLTZALL - branch on less than zero and link likely */ +#define MIPS_FUNCT_BLTZALL 0b10010 +MIPS_INS(BLTZALL, B, .op = MIPS_OP_REGIMM, .funct = MIPS_FUNCT_BLTZALL) + +/* BLTZL - branch on less than zero likely */ +#define MIPS_FUNCT_BLTZL 0b00010 +MIPS_INS(BLTZL, B, .op = MIPS_OP_REGIMM, .funct = MIPS_FUNCT_BLTZL) + +/* BNE - branch on not equal */ +#define MIPS_OP_BNE 0b000101 +MIPS_INS(BNE, I, .op = MIPS_OP_BNE) + +/* BNEL - branch on not equal likely */ +#define MIPS_OP_BNEL 0b010101 +MIPS_INS(BNEL, I, .op = MIPS_OP_BNEL) + +/* DDIV - doubleword divide */ +#define MIPS_FUNCT_DDIV 0b011110 +MIPS_INS(DDIV, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_DDIV) + +/* DDIVU - doubleword divide unsigned */ +#define MIPS_FUNCT_DDIVU 0b011111 +MIPS_INS(DDIVU, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_DDIVU) + +/* DIV - divide */ +#define MIPS_FUNCT_DIV 0b011010 +MIPS_INS(DIV, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_DIV) + +/* DIVU - divide unsigned */ +#define MIPS_FUNCT_DIVU 0b011011 +MIPS_INS(DIVU, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_DIVU) + +/* J - jump */ +#define MIPS_OP_J 0b000010 +MIPS_INS(J, J, .op = MIPS_OP_J) + +/* JAL - jump and link */ +#define MIPS_OP_JAL 0b000011 +MIPS_INS(JAL, J, .op = MIPS_OP_JAL) + +/* JALR - jump and link register */ +#define MIPS_FUNCT_JALR 0b001001 +MIPS_INS(JALR, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_JALR) + +/* JALX - jump and link exchange */ +#define MIPS_OP_JALX 0b011101 +MIPS_INS(JALX, J, .op = MIPS_OP_JALX) + +/* JR - jump register */ +#define MIPS_FUNCT_JR 0b001000 +MIPS_INS(JR, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_JR) + +/* LB - load byte */ +#define MIPS_OP_LB 0b100000 +MIPS_INS(LB, I, .op = MIPS_OP_LB) + +/* LBU - load byte unsigned */ +#define MIPS_OP_LBU 0b100100 +MIPS_INS(LBU, I, .op = MIPS_OP_LBU) + +/* LH - load half */ +#define MIPS_OP_LH 0b100001 +MIPS_INS(LH, I, .op = MIPS_OP_LH) + +/* LHU - load half unsigned */ +#define MIPS_OP_LHU 0b100101 +MIPS_INS(LHU, I, .op = MIPS_OP_LHU) + +/* LUI - load upper immediate */ +#define MIPS_OP_LUI 0b001111 +MIPS_INS(LUI, I, .op = MIPS_OP_LUI) + +/* LW - load word */ +#define MIPS_OP_LW 0b100011 +MIPS_INS(LW, I, .op = MIPS_OP_LW) + +/* LWL - load word left */ +#define MIPS_OP_LWL 0b100010 +MIPS_INS(LWL, I, .op = MIPS_OP_LWL) + +/* LWR - load word right */ +#define MIPS_OP_LWR 0b100110 +MIPS_INS(LWR, I, .op = MIPS_OP_LWR) + +/* MFHI - move from hi */ +#define MIPS_FUNCT_MFHI 0b010000 +MIPS_INS(MFHI, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_MFHI) + +/* MFLO - move from hi */ +#define MIPS_FUNCT_MFLO 0b010010 +MIPS_INS(MFLO, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_MFLO) + +/* MTHI - move from hi */ +#define MIPS_FUNCT_MTHI 0b010001 +MIPS_INS(MTHI, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_MTHI) + +/* MTLO - move from hi */ +#define MIPS_FUNCT_MTLO 0b010011 +MIPS_INS(MTLO, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_MTLO) + +/* MULT - multiply */ +#define MIPS_FUNCT_MULT 0b011000 +MIPS_INS(MULT, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_MULT) + +/* MULTU - multiply unsigned */ +#define MIPS_FUNCT_MULTU 0b011001 +MIPS_INS(MULTU, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_MULTU) + +/* SB - store byte */ +#define MIPS_OP_SB 0b101000 +MIPS_INS(SB, I, .op = MIPS_OP_SB) + +/* SH - store half */ +#define MIPS_OP_SH 0b101001 +MIPS_INS(SH, I, .op = MIPS_OP_SH) + +/* SW - store word */ +#define MIPS_OP_SW 0b101011 +MIPS_INS(SW, I, .op = MIPS_OP_SW) + +/* SWL - store word left */ +#define MIPS_OP_SWL 0b101010 +MIPS_INS(SWL, I, .op = MIPS_OP_SWL) + +/* SWR - store word right */ +#define MIPS_OP_SWR 0b101110 +MIPS_INS(SWR, I, .op = MIPS_OP_SWR) + +/* SLL - shift left logical */ +#define MIPS_FUNCT_SLL 0b000000 +MIPS_INS(SLL, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SLL) + +/* SLLV - shift left logical variable */ +#define MIPS_FUNCT_SLLV 0b000100 +MIPS_INS(SLLV, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SLLV) + +/* SLT - set less then */ +#define MIPS_FUNCT_SLT 0b101010 +MIPS_INS(SLT, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SLT) + +/* SLTI - set less then immediate */ +#define MIPS_OP_SLTI 0b001010 +MIPS_INS(SLTI, I, .op = MIPS_OP_SLTI) + +/* SLTIU - set less then imemdiate unsigned */ +#define MIPS_OP_SLTIU 0b001011 +MIPS_INS(SLTIU, I, .op = MIPS_OP_SLTIU) + +/* SLTU - set less than unsigned */ +#define MIPS_FUNCT_SLTU 0b101011 +MIPS_INS(SLTU, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SLTU) + +/* SRA - shift right arithmetic */ +#define MIPS_FUNCT_SRA 0b000011 +MIPS_INS(SRA, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SRA) + +/* SRAV - shift right arithmetic variable */ +#define MIPS_FUNCT_SRAV 0b000111 +MIPS_INS(SRAV, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SRAV) + +/* SRL - shift right logical */ +#define MIPS_FUNCT_SRL 0b000010 +MIPS_INS(SRL, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SRL) + +/* SRLV - shift right logical variable */ +#define MIPS_FUNCT_SRLV 0b000110 +MIPS_INS(SRLV, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SRLV) + +/* SUB - subtract */ +#define MIPS_FUNCT_SUB 0b100010 +MIPS_INS(SUB, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SUB) + +/* SUBU - subtract unsigned */ +#define MIPS_FUNCT_SUBU 0b100011 +MIPS_INS(SUBU, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SUBU) + +/* SYSCALL - syscall */ +#define MIPS_FUNCT_SYSCALL 0b001100 +MIPS_INS(SYSCALL, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SYSCALL) + +/* OR - or */ +#define MIPS_FUNCT_OR 0b100101 +MIPS_INS(OR, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_OR) + +/* ORI - or imemdiate */ +#define MIPS_OP_ORI 0b001101 +MIPS_INS(ORI, I, .op = MIPS_OP_ORI) + +/* NOR - not or */ +#define MIPS_FUNCT_NOR 0b100111 +MIPS_INS(NOR, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_NOR) + +/* XOR - exclusive or */ +#define MIPS_FUNCT_XOR 0b100110 +MIPS_INS(XOR, R, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_XOR) + +/* XORI - exclusive or immediate */ +#define MIPS_OP_XORI 0b001110 +MIPS_INS(XORI, I, .op = MIPS_OP_XORI) +}; + +#undef MIPS_INS + +/* mips directive types */ +enum mips_directive_type { + MIPS_DIRECTIVE_ALIGN, + MIPS_DIRECTIVE_SPACE, + MIPS_DIRECTIVE_WORD, + MIPS_DIRECTIVE_HALF, + MIPS_DIRECTIVE_BYTE, + MIPS_DIRECTIVE_SECTION, + MIPS_DIRECTIVE_EXTERN, + MIPS_DIRECTIVE_GLOBL, +}; + +/* mip32 directive */ +struct mips_directive { + enum mips_directive_type type; + uint32_t len; // used for words, halfs, bytes + union { + uint16_t align; + uint16_t space; + uint32_t words[MAX_ARG_LENGTH]; + uint16_t halfs[MAX_ARG_LENGTH]; + uint8_t bytes[MAX_ARG_LENGTH]; + char name[MAX_ARG_LENGTH]; + }; +}; + +#endif /* __MIPS_H__ */ diff --git a/include/mips32.h b/include/mips32.h deleted file mode 100644 index 228e213..0000000 --- a/include/mips32.h +++ /dev/null @@ -1,504 +0,0 @@ -/* Copyright (c) 2024 Freya Murphy */ - -#ifndef __MIPS32_H__ -#define __MIPS32_H__ - -#include -#include - -/* all mips registers $0-$31 */ -enum mips32_register { - MIPS32_REG_ZERO = 0, - MIPS32_REG_AT = 1, - MIPS32_REG_V0 = 2, - MIPS32_REG_V1 = 3, - MIPS32_REG_A0 = 4, - MIPS32_REG_A1 = 5, - MIPS32_REG_A2 = 6, - MIPS32_REG_A3 = 7, - MIPS32_REG_T0 = 8, - MIPS32_REG_T1 = 9, - MIPS32_REG_T2 = 10, - MIPS32_REG_T3 = 11, - MIPS32_REG_T4 = 12, - MIPS32_REG_T5 = 13, - MIPS32_REG_T6 = 14, - MIPS32_REG_T7 = 15, - MIPS32_REG_S0 = 16, - MIPS32_REG_S1 = 17, - MIPS32_REG_S2 = 18, - MIPS32_REG_S3 = 19, - MIPS32_REG_S4 = 20, - MIPS32_REG_S5 = 21, - MIPS32_REG_S6 = 22, - MIPS32_REG_S7 = 23, - MIPS32_REG_T8 = 24, - MIPS32_REG_T9 = 25, - MIPS32_REG_K0 = 26, - MIPS32_REG_K1 = 27, - MIPS32_REG_GP = 28, - MIPS32_REG_SP = 29, - MIPS32_REG_FP = 30, - MIPS32_REG_RA = 31, -}; - -/* mips instruction format */ -enum mips32_instruction_format { - MIPS32_FORMAT_R, - MIPS32_FORMAT_I, - MIPS32_FORMAT_J, - MIPS32_FORMAT_B, -}; - -/* mips instructions */ -enum mips32_instruction_type { - MIPS32_INS_ADD, - MIPS32_INS_ADDI, - MIPS32_INS_ADDIU, - MIPS32_INS_ADDU, - MIPS32_INS_AND, - MIPS32_INS_ANDI, - MIPS32_INS_BAL, - MIPS32_INS_BALC, - MIPS32_INS_BC, - MIPS32_INS_BEQ, - MIPS32_INS_BEQL, - MIPS32_INS_BGEZ, - MIPS32_INS_BGEZAL, - MIPS32_INS_BGEZALL, - MIPS32_INS_BGEZL, - MIPS32_INS_BGTZ, - MIPS32_INS_BGTZL, - MIPS32_INS_BLEZ, - MIPS32_INS_BLEZL, - MIPS32_INS_BLTZ, - MIPS32_INS_BLTZAL, - MIPS32_INS_BLTZALL, - MIPS32_INS_BLTZL, - MIPS32_INS_BNE, - MIPS32_INS_BNEL, - MIPS32_INS_DDIV, - MIPS32_INS_DDIVU, - MIPS32_INS_DIV, - MIPS32_INS_DIVU, - MIPS32_INS_J, - MIPS32_INS_JAL, - MIPS32_INS_JALR, - MIPS32_INS_JALX, - MIPS32_INS_JR, - MIPS32_INS_LB, - MIPS32_INS_LBU, - MIPS32_INS_LH, - MIPS32_INS_LHU, - MIPS32_INS_LUI, - MIPS32_INS_LW, - MIPS32_INS_LWL, - MIPS32_INS_LWR, - MIPS32_INS_MFHI, - MIPS32_INS_MFLO, - MIPS32_INS_MTHI, - MIPS32_INS_MTLO, - MIPS32_INS_MULT, - MIPS32_INS_MULTU, - MIPS32_INS_SB, - MIPS32_INS_SH, - MIPS32_INS_SW, - MIPS32_INS_SWL, - MIPS32_INS_SWR, - MIPS32_INS_SLL, - MIPS32_INS_SLLV, - MIPS32_INS_SLT, - MIPS32_INS_SLTI, - MIPS32_INS_SLTIU, - MIPS32_INS_SLTU, - MIPS32_INS_SRA, - MIPS32_INS_SRAV, - MIPS32_INS_SRL, - MIPS32_INS_SRLV, - MIPS32_INS_SUB, - MIPS32_INS_SUBU, - MIPS32_INS_SYSCALL, - MIPS32_INS_OR, - MIPS32_INS_ORI, - MIPS32_INS_NOR, - MIPS32_INS_XOR, - MIPS32_INS_XORI, - // gets the size of the enum - __MIPS32_INS_LEN, -}; - -/* mips instruction R TYPE */ -struct mips32_instruction_r_data { - uint32_t funct : 6; - uint32_t shamt : 5; - uint32_t rd : 5; - uint32_t rt : 5; - uint32_t rs : 5; - uint32_t op : 6; -} __attribute__((packed)); - -/* mips instruction I TYPE */ -struct mips32_instruction_i_data { - uint32_t immd : 16; - uint32_t rt : 5; - uint32_t rs : 5; - uint32_t op : 6; -} __attribute__((packed)); - -/* mips instruction J TYPE */ -struct mips32_instruction_j_data { - uint32_t target : 26; - uint32_t op : 6; -} __attribute__((packed)); - -/* mips instruction BRANCH TYPE */ -struct mips32_instruction_branch_data { - int32_t offset : 16; - uint32_t funct : 5; - uint32_t rs : 5; - uint32_t op : 6; -} __attribute__((packed)); - -/* mips instruction information */ -struct mips32_instruction { - // metadata - enum mips32_instruction_type type; - enum mips32_instruction_format format; - const char *name; - - // data - union { - uint32_t data; - struct mips32_instruction_r_data R_data; - struct mips32_instruction_i_data I_data; - struct mips32_instruction_j_data J_data; - struct mips32_instruction_branch_data B_data; - } __attribute__((packed)); -}; - - -#define MIPS32_INS(ins, format, ...) \ - [MIPS32_INS_ ##ins] = { \ - MIPS32_INS_ ##ins, \ - MIPS32_FORMAT_ ##format, \ - #ins, \ - .format##_data = { __VA_ARGS__ } \ - }, \ - -static const struct mips32_instruction mips32_instructions[] = { -/* ADD - add */ -#define MIPS32_OP_SPECIAL 0b000000 -#define MIPS32_FUNCT_ADD 0b100000 -MIPS32_INS(ADD, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_ADD) - -/* ADDI - add immediate */ -#define MIPS32_OP_ADDI 0b001000 -MIPS32_INS(ADDI, I, .op = MIPS32_OP_ADDI) - -/* ADDIU - add immediate unsigned */ -#define MIPS32_OP_ADDIU 0b001001 -MIPS32_INS(ADDIU, I, .op = MIPS32_OP_ADDIU) - -/* ADDU - add unsigned */ -#define MIPS32_FUNCT_ADDU 0b100001 -MIPS32_INS(ADDU, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_ADDU) - -/* AND - and */ -#define MIPS32_FUNCT_AND 0b100100 -MIPS32_INS(AND, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_AND) - -/* ANDI - and immediate */ -#define MIPS32_OP_ANDI 0b001100 -MIPS32_INS(ANDI, I, .op = MIPS32_OP_ANDI) - -/* BAL - branch and link */ -#define MIPS32_OP_REGIMM 0b000001 -#define MIPS32_FUNCT_BAL 0b10001 -MIPS32_INS(BAL, B, .op = MIPS32_OP_REGIMM, .funct = MIPS32_FUNCT_BAL) - -/* BALC - branch and link, compact */ -#define MIPS32_OP_BALC 0b111010 -MIPS32_INS(BALC, J, .op = MIPS32_OP_BALC) - -/* BC - branch, compact */ -#define MIPS32_OP_BC 0b110010 -MIPS32_INS(BC, J, .op = MIPS32_OP_BC) - -/* BEQ - branch on equal */ -#define MIPS32_OP_BEQ 0b000100 -MIPS32_INS(BEQ, I, .op = MIPS32_OP_BEQ) - -/* BEQL - branch on equal likely */ -#define MIPS32_OP_BEQL 0b010100 -MIPS32_INS(BEQL, I, .op = MIPS32_OP_BEQL) - -/* BGEZ - branch on greater than or equal to zero */ -#define MIPS32_FUNCT_BGEZ 0b00001 -MIPS32_INS(BGEZ, B, .op = MIPS32_OP_REGIMM, .funct = MIPS32_FUNCT_BGEZ) - -/* BGEZAL - branch on greater than or equal to zero and link */ -#define MIPS32_FUNCT_BGEZAL 0b10001 -MIPS32_INS(BGEZAL, B, .op = MIPS32_OP_REGIMM, .funct = MIPS32_FUNCT_BGEZAL) - -/* BGEZAL - branch on greater than or equal to zero and link likely */ -#define MIPS32_FUNCT_BGEZALL 0b10011 -MIPS32_INS(BGEZALL, B, .op = MIPS32_OP_REGIMM, .funct = MIPS32_FUNCT_BGEZALL) - -/* BGEZL - branch on greater than or equal to zero likely */ -#define MIPS32_FUNCT_BGEZL 0b00011 -MIPS32_INS(BGEZL, B, .op = MIPS32_OP_REGIMM, .funct = MIPS32_FUNCT_BGEZL) - -/* BGTZ - branch on greater than zero */ -#define MIPS32_OP_BGTZ 0b000111 -MIPS32_INS(BGTZ, I, .op = MIPS32_OP_BGTZ) - -/* BGTZL - branch on greater than zero likely */ -#define MIPS32_OP_BGTZL 0b010111 -MIPS32_INS(BGTZL, I, .op = MIPS32_OP_BGTZL) - -/* BLEZ - branch on less than or equal to zero */ -#define MIPS32_OP_BLEZ 0b000110 -MIPS32_INS(BLEZ, I, .op = MIPS32_OP_BLEZ) - -/* BLEZL - branch on less than or equal to zero likely */ -#define MIPS32_OP_BLEZL 0b010110 -MIPS32_INS(BLEZL, I, .op = MIPS32_OP_BLEZL) - -/* BLTZ - branch on less than zero */ -#define MIPS32_FUNCT_BLTZ 0b00000 -MIPS32_INS(BLTZ, B, .op = MIPS32_OP_REGIMM, .funct = MIPS32_FUNCT_BLTZ) - -/* BLTZAL - branch on less than zero and link */ -#define MIPS32_FUNCT_BLTZAL 0b10000 -MIPS32_INS(BLTZAL, B, .op = MIPS32_OP_REGIMM, .funct = MIPS32_FUNCT_BLTZAL) - -/* BLTZALL - branch on less than zero and link likely */ -#define MIPS32_FUNCT_BLTZALL 0b10010 -MIPS32_INS(BLTZALL, B, .op = MIPS32_OP_REGIMM, .funct = MIPS32_FUNCT_BLTZALL) - -/* BLTZL - branch on less than zero likely */ -#define MIPS32_FUNCT_BLTZL 0b00010 -MIPS32_INS(BLTZL, B, .op = MIPS32_OP_REGIMM, .funct = MIPS32_FUNCT_BLTZL) - -/* BNE - branch on not equal */ -#define MIPS32_OP_BNE 0b000101 -MIPS32_INS(BNE, I, .op = MIPS32_OP_BNE) - -/* BNEL - branch on not equal likely */ -#define MIPS32_OP_BNEL 0b010101 -MIPS32_INS(BNEL, I, .op = MIPS32_OP_BNEL) - -/* DDIV - doubleword divide */ -#define MIPS32_FUNCT_DDIV 0b011110 -MIPS32_INS(DDIV, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_DDIV) - -/* DDIVU - doubleword divide unsigned */ -#define MIPS32_FUNCT_DDIVU 0b011111 -MIPS32_INS(DDIVU, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_DDIVU) - -/* DIV - divide */ -#define MIPS32_FUNCT_DIV 0b011010 -MIPS32_INS(DIV, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_DIV) - -/* DIVU - divide unsigned */ -#define MIPS32_FUNCT_DIVU 0b011011 -MIPS32_INS(DIVU, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_DIVU) - -/* J - jump */ -#define MIPS32_OP_J 0b000010 -MIPS32_INS(J, J, .op = MIPS32_OP_J) - -/* JAL - jump and link */ -#define MIPS32_OP_JAL 0b000011 -MIPS32_INS(JAL, J, .op = MIPS32_OP_JAL) - -/* JALR - jump and link register */ -#define MIPS32_FUNCT_JALR 0b001001 -MIPS32_INS(JALR, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_JALR) - -/* JALX - jump and link exchange */ -#define MIPS32_OP_JALX 0b011101 -MIPS32_INS(JALX, J, .op = MIPS32_OP_JALX) - -/* JR - jump register */ -#define MIPS32_FUNCT_JR 0b001000 -MIPS32_INS(JR, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_JR) - -/* LB - load byte */ -#define MIPS32_OP_LB 0b100000 -MIPS32_INS(LB, I, .op = MIPS32_OP_LB) - -/* LBU - load byte unsigned */ -#define MIPS32_OP_LBU 0b100100 -MIPS32_INS(LBU, I, .op = MIPS32_OP_LBU) - -/* LH - load half */ -#define MIPS32_OP_LH 0b100001 -MIPS32_INS(LH, I, .op = MIPS32_OP_LH) - -/* LHU - load half unsigned */ -#define MIPS32_OP_LHU 0b100101 -MIPS32_INS(LHU, I, .op = MIPS32_OP_LHU) - -/* LUI - load upper immediate */ -#define MIPS32_OP_LUI 0b001111 -MIPS32_INS(LUI, I, .op = MIPS32_OP_LUI) - -/* LW - load word */ -#define MIPS32_OP_LW 0b100011 -MIPS32_INS(LW, I, .op = MIPS32_OP_LW) - -/* LWL - load word left */ -#define MIPS32_OP_LWL 0b100010 -MIPS32_INS(LWL, I, .op = MIPS32_OP_LWL) - -/* LWR - load word right */ -#define MIPS32_OP_LWR 0b100110 -MIPS32_INS(LWR, I, .op = MIPS32_OP_LWR) - -/* MFHI - move from hi */ -#define MIPS32_FUNCT_MFHI 0b010000 -MIPS32_INS(MFHI, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_MFHI) - -/* MFLO - move from hi */ -#define MIPS32_FUNCT_MFLO 0b010010 -MIPS32_INS(MFLO, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_MFLO) - -/* MTHI - move from hi */ -#define MIPS32_FUNCT_MTHI 0b010001 -MIPS32_INS(MTHI, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_MTHI) - -/* MTLO - move from hi */ -#define MIPS32_FUNCT_MTLO 0b010011 -MIPS32_INS(MTLO, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_MTLO) - -/* MULT - multiply */ -#define MIPS32_FUNCT_MULT 0b011000 -MIPS32_INS(MULT, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_MULT) - -/* MULTU - multiply unsigned */ -#define MIPS32_FUNCT_MULTU 0b011001 -MIPS32_INS(MULTU, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_MULTU) - -/* SB - store byte */ -#define MIPS32_OP_SB 0b101000 -MIPS32_INS(SB, I, .op = MIPS32_OP_SB) - -/* SH - store half */ -#define MIPS32_OP_SH 0b101001 -MIPS32_INS(SH, I, .op = MIPS32_OP_SH) - -/* SW - store word */ -#define MIPS32_OP_SW 0b101011 -MIPS32_INS(SW, I, .op = MIPS32_OP_SW) - -/* SWL - store word left */ -#define MIPS32_OP_SWL 0b101010 -MIPS32_INS(SWL, I, .op = MIPS32_OP_SWL) - -/* SWR - store word right */ -#define MIPS32_OP_SWR 0b101110 -MIPS32_INS(SWR, I, .op = MIPS32_OP_SWR) - -/* SLL - shift left logical */ -#define MIPS32_FUNCT_SLL 0b000000 -MIPS32_INS(SLL, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_SLL) - -/* SLLV - shift left logical variable */ -#define MIPS32_FUNCT_SLLV 0b000100 -MIPS32_INS(SLLV, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_SLLV) - -/* SLT - set less then */ -#define MIPS32_FUNCT_SLT 0b101010 -MIPS32_INS(SLT, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_SLT) - -/* SLTI - set less then immediate */ -#define MIPS32_OP_SLTI 0b001010 -MIPS32_INS(SLTI, I, .op = MIPS32_OP_SLTI) - -/* SLTIU - set less then imemdiate unsigned */ -#define MIPS32_OP_SLTIU 0b001011 -MIPS32_INS(SLTIU, I, .op = MIPS32_OP_SLTIU) - -/* SLTU - set less than unsigned */ -#define MIPS32_FUNCT_SLTU 0b101011 -MIPS32_INS(SLTU, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_SLTU) - -/* SRA - shift right arithmetic */ -#define MIPS32_FUNCT_SRA 0b000011 -MIPS32_INS(SRA, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_SRA) - -/* SRAV - shift right arithmetic variable */ -#define MIPS32_FUNCT_SRAV 0b000111 -MIPS32_INS(SRAV, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_SRAV) - -/* SRL - shift right logical */ -#define MIPS32_FUNCT_SRL 0b000010 -MIPS32_INS(SRL, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_SRL) - -/* SRLV - shift right logical variable */ -#define MIPS32_FUNCT_SRLV 0b000110 -MIPS32_INS(SRLV, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_SRLV) - -/* SUB - subtract */ -#define MIPS32_FUNCT_SUB 0b100010 -MIPS32_INS(SUB, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_SUB) - -/* SUBU - subtract unsigned */ -#define MIPS32_FUNCT_SUBU 0b100011 -MIPS32_INS(SUBU, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_SUBU) - -/* SYSCALL - syscall */ -#define MIPS32_FUNCT_SYSCALL 0b001100 -MIPS32_INS(SYSCALL, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_SYSCALL) - -/* OR - or */ -#define MIPS32_FUNCT_OR 0b100101 -MIPS32_INS(OR, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_OR) - -/* ORI - or imemdiate */ -#define MIPS32_OP_ORI 0b001101 -MIPS32_INS(ORI, I, .op = MIPS32_OP_ORI) - -/* NOR - not or */ -#define MIPS32_FUNCT_NOR 0b100111 -MIPS32_INS(NOR, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_NOR) - -/* XOR - exclusive or */ -#define MIPS32_FUNCT_XOR 0b100110 -MIPS32_INS(XOR, R, .op = MIPS32_OP_SPECIAL, .funct = MIPS32_FUNCT_XOR) - -/* XORI - exclusive or immediate */ -#define MIPS32_OP_XORI 0b001110 -MIPS32_INS(XORI, I, .op = MIPS32_OP_XORI) -}; - -#undef MIPS32_INS - -/* mips32 directive types */ -enum mips32_directive_type { - MIPS32_DIRECTIVE_ALIGN, - MIPS32_DIRECTIVE_SPACE, - MIPS32_DIRECTIVE_WORD, - MIPS32_DIRECTIVE_HALF, - MIPS32_DIRECTIVE_BYTE, - MIPS32_DIRECTIVE_SECTION, - MIPS32_DIRECTIVE_EXTERN, - MIPS32_DIRECTIVE_GLOBL, -}; - -/* mip32 directive */ -struct mips32_directive { - enum mips32_directive_type type; - uint32_t len; // used for words, halfs, bytes - union { - uint16_t align; - uint16_t space; - uint32_t words[MAX_ARG_LENGTH]; - uint16_t halfs[MAX_ARG_LENGTH]; - uint8_t bytes[MAX_ARG_LENGTH]; - char name[MAX_ARG_LENGTH]; - }; -}; - -#endif /* __MIPS32_H__ */ diff --git a/masm/asm.c b/masm/asm.c index 099bfe6..54d766b 100644 --- a/masm/asm.c +++ b/masm/asm.c @@ -1,77 +1,717 @@ #include +#include +#include #include +#include +#include +#include #include "asm.h" +#include "mlimits.h" +#include "parse.h" + +extern char *current_file; + +#define SYMSEC_STUB -1 +#define SYMSEC_EXTERN -1 + +static int create_symbol(struct assembler *assembler, + const char name[MAX_LEX_LENGTH], + ssize_t section_idx, + size_t section_offset, + unsigned char bind) +{ + size_t str_off; + if (strtab_write_str(&assembler->strtab, name, &str_off)) + return M_ERROR; + + Elf32_Sym symbol = { + .st_name = str_off, + .st_value = section_offset, + .st_size = 0, + .st_info = ELF32_ST_INFO(bind, STT_NOTYPE), + .st_other = ELF32_ST_VISIBILITY(STV_DEFAULT), + .st_shndx = section_idx, + }; + + // dont put magic flag values inside symbol, only real indexes + if (section_idx < 0) + symbol.st_shndx = 0; + + if (symtab_push(&assembler->symtab, symbol, section_idx)) + return M_ERROR; + + return M_SUCCESS; +} + +static int find_symbol_or_stub(struct assembler *assembler, + const char name[MAX_LEX_LENGTH], + Elf32_Sym **res, + size_t *res2) +{ + if (symtab_find(&assembler->symtab, res, res2, name) == M_SUCCESS) + return M_SUCCESS; + + if (create_symbol(assembler, name, SYMSEC_STUB, 0, STB_LOCAL)) + return M_ERROR; + + size_t idx = assembler->symtab.len - 1; + + if (res != NULL) + *res = &assembler->symtab.symbols[idx]; + if (res2 != NULL) + *res2 = idx; + + return M_SUCCESS; +} + +static int handle_directive(struct assembler *assembler, + struct mips_directive *directive) +{ + switch (directive->type) { + case MIPS_DIRECTIVE_SECTION: { + struct section_table *sec_tbl = &assembler->sectab; + struct section *sec; + if (sectab_get(sec_tbl, &sec, directive->name) + == M_SUCCESS) { + sec_tbl->current = sec; + break; + } + + if (sectab_alloc(sec_tbl, &sec, directive->name)) + return M_ERROR; + + sec_tbl->current = sec; + break; + } + + case MIPS_DIRECTIVE_ALIGN: { + assembler->sectab.current->alignment = + 1 << directive->align; + break; + } + + case MIPS_DIRECTIVE_SPACE: { + struct section_entry entry; + entry.type = ENT_NO_DATA; + entry.size = directive->space; + if (sec_push(assembler->sectab.current, entry)) + return M_ERROR; + break; + } + + case MIPS_DIRECTIVE_WORD: { + for (uint32_t i = 0; i < directive->len; i++) { + struct section_entry entry; + entry.type = ENT_WORD; + entry.word = directive->words[i]; + if (sec_push(assembler->sectab.current, + entry)) + return M_ERROR; + } + break; + } + + case MIPS_DIRECTIVE_HALF: { + for (uint32_t i = 0; i < directive->len; i++) { + struct section_entry entry; + entry.type = ENT_HALF; + entry.half = directive->halfs[i]; + if (sec_push(assembler->sectab.current, + entry)) + return M_ERROR; + } + break; + } + + case MIPS_DIRECTIVE_BYTE: { + for (uint32_t i = 0; i < directive->len; i++) { + struct section_entry entry; + entry.type = ENT_BYTE; + entry.byte = directive->bytes[i]; + if (sec_push(assembler->sectab.current, + entry)) + return M_ERROR; + } + break; + } + + case MIPS_DIRECTIVE_EXTERN: { + if (symtab_find(&assembler->symtab, NULL, NULL, + directive->name) == M_SUCCESS) { + ERROR("cannot extern local symbol '%s'", + directive->name); + return M_ERROR; + } + + if (create_symbol(assembler, directive->name, SYMSEC_EXTERN, 0, + STB_GLOBAL)) + return M_ERROR; + + break; + } + + case MIPS_DIRECTIVE_GLOBL: { + Elf32_Sym *sym; + if (symtab_find(&assembler->symtab, &sym, NULL, + directive->name) == M_SUCCESS) { + sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_NOTYPE); + break; + } + + if (create_symbol(assembler, directive->name, SYMSEC_STUB, 0, + STB_GLOBAL)) + return M_ERROR; + + break; + } + } + + return M_SUCCESS; +} + +static int handle_label(struct assembler *assembler, + const char name[MAX_LEX_LENGTH]) +{ + struct section *cur = assembler->sectab.current; + + Elf32_Sym *ref; + size_t symidx; + + if (symtab_find(&assembler->symtab, &ref, &symidx, name) == M_SUCCESS) { + ssize_t *sec = &assembler->symtab.sections[symidx]; + + // check if the symbol is acutally jus a stub, if so + // we need to update it + if (*sec == SYMSEC_STUB) { + *sec = cur->index; + ref->st_value = sec_size(cur); + return M_SUCCESS; + } + + ERROR("redefined symbol '%s'", name); + return M_ERROR; + } + + if (create_symbol(assembler, name, cur->index, sec_size(cur), + STB_LOCAL)) + return M_ERROR; + + return M_SUCCESS; +} + +static int handle_ins(struct assembler *assembler, + struct ins_expr *expr) +{ + struct section *sec = assembler->sectab.current; + size_t secidx = sec->len; + + for (size_t i = 0; i < expr->ins_len; i++) { + struct mips_instruction *ins = + &expr->ins[i]; + struct reference *ref = + &expr->ref[i]; + struct section_entry entry; + + entry.type = ENT_INS; + entry.size = sizeof(struct mips_instruction); + entry.ins = *ins; + + if (sec_push(sec, entry)) + return M_ERROR; + + unsigned char type = 0; + switch (ref->type) { + case REF_NONE: + continue; + case REF_OFFESET: + type = R_MIPS_PC16; + break; + case REF_TARGET: + type = R_MIPS_26; + break; + } + + size_t symidx; + if (find_symbol_or_stub(assembler, ref->name, NULL, &symidx)) + return M_ERROR; + + Elf32_Rela rel = { + .r_info = ELF32_R_INFO(symidx, type), + .r_addend = ref->addend, + .r_offset = sec_index(sec, secidx + i), + }; + + if (reltab_push(&sec->reltab, rel)) + return M_ERROR; + + break; + } + + return M_SUCCESS; +} + +static int parse_file(struct assembler *assembler) +{ + struct parser *parser = &assembler->parser; + + while (1) { + struct expr expr; + int res = parser_next(parser, &expr); + + if (res == M_ERROR) + return M_ERROR; + + if (res == M_EOF) + return M_SUCCESS; + + switch (expr.type) { + case EXPR_INS: + if (handle_ins(assembler, &expr.ins)) + return M_ERROR; + break; + case EXPR_DIRECTIVE: + if (handle_directive(assembler, + &expr.directive)) + return M_ERROR; + break; + + case EXPR_LABEL: + if (handle_label(assembler, expr.label)) + return M_ERROR; + break; + + case EXPR_CONSTANT: + break; + } + } + + return M_SUCCESS; +} + +static int assemble_phdr(struct assembler *assembler, Elf32_Phdr **res, + uint32_t *res2) +{ + Elf32_Phdr *phdr = malloc(sizeof(Elf32_Phdr) * + assembler->sectab.len); + if (phdr == NULL) { + ERROR("cannot alloc"); + return M_ERROR;; + } + + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + Elf32_Phdr *hdr = &phdr[i]; + struct section *sec = &assembler->sectab.sections[i]; + size_t size = sec_size(sec); + hdr->p_type = PT_LOAD; + hdr->p_flags = (sec->execute << 0) | + (sec->write << 1) | + (sec->read << 2); + hdr->p_offset = 0; + hdr->p_vaddr = 0; + hdr->p_paddr = 0; + hdr->p_filesz = size; + hdr->p_memsz = size; + hdr->p_align = sec->alignment; + } + + *res = phdr; + *res2 = assembler->sectab.len; + return M_SUCCESS; +} + +static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res, + uint32_t *res2) +{ + uint32_t max_entries = 4; // symtab, strtab, shstrtab + max_entries += assembler->sectab.len; // sections + max_entries += assembler->sectab.len; // reltabs per section + + Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * max_entries); + + size_t str_off; + uint32_t count = 0; + + // eeltables + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + struct section *sec = &assembler->sectab.sections[i]; + const char *prefix = ".reltab."; + char reltab_name[MAX_LEX_LENGTH + 8]; + + if (sec->reltab.len == 0) + continue; + + strcpy(reltab_name, prefix); + strcat(reltab_name, sec->name); + + if (strtab_write_str(&assembler->shstrtab, + reltab_name, &str_off)) { + free(shdr); + return M_ERROR; + } + + sec->reltab_shidx = count; + shdr[count++] = (Elf32_Shdr) { + .sh_name = str_off, + .sh_type = SHT_RELA, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = sizeof(Elf32_Rela), + }; + } + + // for each section + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + struct section *sec = &assembler->sectab.sections[i]; + char name[MAX_LEX_LENGTH+1] = "."; + + strcat(name, sec->name); + if (strtab_write_str(&assembler->shstrtab, name, &str_off)) { + free(shdr); + return M_ERROR; + } + + sec->shdr_idx = count; + if (sec->reltab.len != 0) + shdr[sec->reltab_shidx].sh_info = count; + + shdr[count++] = (Elf32_Shdr){ + .sh_name = str_off, + .sh_type = SHT_PROGBITS, + .sh_flags = (sec->write << 0) | (sec->execute << 2) | + SHF_ALLOC, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = sec->alignment, + .sh_entsize = sizeof(struct mips_instruction), + }; + } + + // symbol table + if (strtab_write_str(&assembler->shstrtab, ".symtab", &str_off)) { + free(shdr); + return M_ERROR; + } + + assembler->symtab_shidx = count; + shdr[count++] = (Elf32_Shdr) { + .sh_name = str_off, + .sh_type = SHT_SYMTAB, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 1, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = sizeof(Elf32_Sym), + }; + + // string table + if (strtab_write_str(&assembler->shstrtab, ".strtab", &str_off)) { + free(shdr); + return M_ERROR; + } + + assembler->strtab_shidx = count; + shdr[count++] = (Elf32_Shdr) { + .sh_name = str_off, + .sh_type = SHT_STRTAB, + .sh_flags = SHF_STRINGS, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = 0, + }; + + // sh string table + if (strtab_write_str(&assembler->shstrtab, ".shstrtab", &str_off)) { + free(shdr); + return M_ERROR; + } + + assembler->shstrtab_shidx = count; + shdr[count++] = (Elf32_Shdr) { + .sh_name = str_off, + .sh_type = SHT_STRTAB, + .sh_flags = SHF_STRINGS, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = 0, + }; + + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + struct section *sec = &assembler->sectab.sections[i]; + if (sec->reltab.len == 0) + continue; + shdr[sec->reltab_shidx].sh_link = + assembler->symtab_shidx; + } + + *res = shdr; + *res2 = count; + + return M_SUCCESS; +} + +static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr) +{ + Elf32_Shdr *shdr = (Elf32_Shdr *) assembler->shdr; + Elf32_Phdr *phdr = (Elf32_Phdr *) assembler->phdr; + uint32_t ptr = 0; + + // we must now correct offets and sizes inside the ehdr, phdr, + // and shdr + ptr += sizeof(Elf32_Ehdr); + + // phdr + ehdr->e_phoff = ptr; + ptr += assembler->phdr_len * sizeof(Elf32_Phdr); + + // reltbls + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + struct section *sec = &assembler->sectab.sections[i]; + if (sec->reltab.len == 0) + continue; + int idx = sec->reltab_shidx; + int len = sec->reltab.len; + shdr[idx].sh_offset = ptr; + shdr[idx].sh_size = len * sizeof(Elf32_Rela); + ptr += len * sizeof(Elf32_Rela); + } + + // sections + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + struct section *sec = &assembler->sectab.sections[i]; + int idx = sec->shdr_idx; + phdr[i].p_offset = ptr; + phdr[i].p_vaddr = ptr; + phdr[i].p_paddr = ptr; + shdr[idx].sh_offset = ptr; + shdr[idx].sh_size = phdr[i].p_filesz; + shdr[idx].sh_addr = phdr[i].p_vaddr; + shdr[idx].sh_addralign = phdr[i].p_align; + ptr += phdr[i].p_filesz; + } + + // symtab + shdr[assembler->symtab_shidx].sh_offset = ptr; + shdr[assembler->symtab_shidx].sh_link = assembler->strtab_shidx; + shdr[assembler->symtab_shidx].sh_size = + assembler->symtab.len * sizeof(Elf32_Sym); + ptr += assembler->symtab.len * sizeof(Elf32_Sym); + + // strtab + shdr[assembler->strtab_shidx].sh_offset = ptr; + shdr[assembler->strtab_shidx].sh_size = assembler->strtab.size; + ptr += assembler->strtab.size; + + // shstrtab + shdr[assembler->shstrtab_shidx].sh_offset = ptr; + shdr[assembler->shstrtab_shidx].sh_size = assembler->shstrtab.size; + ptr += assembler->shstrtab.size; + + // shdr + ehdr->e_shoff = ptr; +} + +static void update_sym_shindx(struct assembler *assembler) +{ + for (size_t i = 0; i < assembler->symtab.len; i++) + { + Elf32_Sym *sym = &assembler->symtab.symbols[i]; + ssize_t sec = assembler->symtab.sections[i]; + + if (sec >= 0) { + sym->st_shndx = assembler-> + sectab.sections[sec].shdr_idx; + } + } +} + +static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr, + const char *path) +{ + FILE *out = fopen(path, "w"); + + if (out == NULL) { + ERROR("cannot write '%s'", path); + return M_ERROR; + } + + // ehdr + fwrite(ehdr, sizeof(Elf32_Ehdr), 1, out); + + // phdr + fwrite(assembler->phdr, sizeof(Elf32_Phdr), assembler->phdr_len, out); + + // reltbls + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + struct section *sec = &assembler->sectab.sections[i]; + if (sec->reltab.len == 0) + continue; + void *ptr = sec->reltab.data; + int len = sec->reltab.len; + fwrite(ptr, sizeof(Elf32_Rela), len, out); + } + + // sections + for (uint32_t i = 0; i < assembler->sectab.len; i++) { + struct section *sec = &assembler->sectab.sections[i]; + for (uint32_t j = 0; j < sec->len; j++) { + struct section_entry *entry = &sec->entries[j]; + size_t size = entry->size; + fwrite(&entry->data, size, 1, out); + while(size % sec->alignment) { + uint8_t zero = 0; + fwrite(&zero, 1, 1, out); + size++; + } + } + } + + // sym tbl + fwrite(assembler->symtab.symbols, sizeof(Elf32_Sym), + assembler->symtab.len, out); + + // str tbl + fwrite(assembler->strtab.ptr, assembler->strtab.size, 1, out); + + // shstr tbl + fwrite(assembler->shstrtab.ptr, assembler->shstrtab.size, 1, out); + + // shdr + fwrite(assembler->shdr, sizeof(Elf32_Shdr), assembler->shdr_len, out); + + fclose(out); + + return M_SUCCESS; +} + +static int assemble_elf(struct assembler *assembler, const char *out) +{ + if (assemble_phdr(assembler, (Elf32_Phdr **) &assembler->phdr, + &assembler->phdr_len)) { + return M_ERROR; + } + + if (assemble_shdr(assembler, (Elf32_Shdr **) &assembler->shdr, + &assembler->shdr_len)) { + return M_ERROR; + }; + + Elf32_Ehdr ehdr = { + .e_ident = { + [EI_MAG0] = ELFMAG0, + [EI_MAG1] = ELFMAG1, + [EI_MAG2] = ELFMAG2, + [EI_MAG3] = ELFMAG3, + [EI_CLASS] = ELFCLASS32, + [EI_DATA] = ELFDATA2LSB, + [EI_VERSION] = EV_CURRENT, + [EI_OSABI] = ELFOSABI_NONE, + [EI_ABIVERSION] = 0x00, + [EI_PAD] = 0x00, + }, + .e_type = ET_REL, + .e_machine = EM_MIPS, + .e_version = EV_CURRENT, + .e_entry = 0x00, + .e_phoff = 0x00, + .e_shoff = 0x00, + .e_flags = EF_MIPS_ARCH_32R6, + .e_ehsize = sizeof(Elf32_Ehdr), + .e_phentsize = sizeof(Elf32_Phdr), + .e_phnum = assembler->phdr_len, + .e_shentsize = sizeof(Elf32_Shdr), + .e_shnum = assembler->shdr_len, + .e_shstrndx = assembler->shstrtab_shidx, + }; + + update_offsets(assembler, &ehdr); + update_sym_shindx(assembler); + + if (write_file(assembler, &ehdr, out)) + return M_ERROR; + + return M_SUCCESS; +} + +int assemble_file(struct assembler_arguments args) +{ + struct assembler assembler; + int res = M_SUCCESS; + + current_file = args.in_file; + + if (assembler_init(&assembler, args.in_file)) + return M_ERROR; + + if (res == M_SUCCESS) + res = parse_file(&assembler); + + if (res == M_SUCCESS) + res = assemble_elf(&assembler, args.out_file); + + assembler_free(&assembler); + + return res; +} int assembler_init(struct assembler *assembler, const char *path) { if (lexer_init(path, &assembler->lexer)) return M_ERROR; - if (parser_init(&assembler->lexer, &assembler->parser)) { - lexer_free(&assembler->lexer); + if (parser_init(&assembler->lexer, &assembler->parser)) return M_ERROR; - } - if (strtbl_init(&assembler->shstr_tbl)) { - parser_free(&assembler->parser); - lexer_free(&assembler->lexer); + if (strtab_init(&assembler->shstrtab)) return M_ERROR; - } - if (strtbl_init(&assembler->str_tbl)) { - strtbl_free(&assembler->shstr_tbl); - parser_free(&assembler->parser); - lexer_free(&assembler->lexer); + if (strtab_init(&assembler->strtab)) return M_ERROR; - } - if (symtbl_init(&assembler->sym_tbl)) { - strtbl_free(&assembler->str_tbl); - strtbl_free(&assembler->shstr_tbl); - parser_free(&assembler->parser); - lexer_free(&assembler->lexer); + if (symtab_init(&assembler->symtab)) return M_ERROR; - } - assembler->meta = NULL; + if (sectab_init(&assembler->sectab)) + return M_ERROR; + + assembler->symtab.strtab = &assembler->strtab; assembler->phdr = NULL; assembler->shdr = NULL; - assembler->symtab = NULL; return M_SUCCESS; } void assembler_free(struct assembler *assembler) { - if (assembler->meta) { - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - if (assembler->meta[i].reltbl != NULL) { - free(assembler->meta[i].reltbl); - } - } - free(assembler->meta); - } - if (assembler->phdr) free(assembler->phdr); if (assembler->shdr) free(assembler->shdr); - if (assembler->symtab) - free(assembler->symtab); - symtbl_free(&assembler->sym_tbl); - strtbl_free(&assembler->str_tbl); - strtbl_free(&assembler->shstr_tbl); + sectab_free(&assembler->sectab); + symtab_free(&assembler->symtab); + strtab_free(&assembler->strtab); + strtab_free(&assembler->shstrtab); parser_free(&assembler->parser); lexer_free(&assembler->lexer); } - -int assemble_file(struct assembler_arguments args) { - switch (args.isa) { - case ISA_MIPS32: - return assemble_file_mips32(args); - } - return M_ERROR; -} diff --git a/masm/asm.h b/masm/asm.h index 86f6b9a..c8a6394 100644 --- a/masm/asm.h +++ b/masm/asm.h @@ -4,101 +4,235 @@ #define __ASM_H__ #include +#include +#include -#include "lex.h" #include "parse.h" +#include "lex.h" -enum symbol_flag { - SYM_LOCAL, - SYM_GLOBAL, - SYM_EXTERNAL, -}; - -struct symbol { - char name[MAX_LEX_LENGTH]; - uint32_t index; - struct section *sec; - enum symbol_flag flag; - -}; - -struct symbol_table { - uint32_t count; - uint32_t len; - struct symbol *symbols; -}; - -int symtbl_init(struct symbol_table *sym_tbl); -void symtbl_free(struct symbol_table *sym_tbl); - -int symtbl_push(struct symbol_table *sym_tbl, struct symbol sym); -int symtbl_find(struct symbol_table *sym_tbl, struct symbol **sym, - const char name[MAX_LEX_LENGTH]); - +/// +/// ELF string table +/// struct str_table { - char *ptr; + // size of the ptr in bytes size_t size; + + // pointer that contains + // the strings + char *ptr; }; /* initalize a string table */ -int strtbl_init(struct str_table *str_tbl); +int strtab_init(struct str_table *strtab); /* free a string table */ -void strtbl_free(struct str_table *str_tbl); +void strtab_free(struct str_table *strtab); /* get a string form the string table */ -int strtbl_get_str(struct str_table *str_tbl, const char *str, size_t *res); +int strtab_get_str(struct str_table *strtab, const char *str, size_t *res); /* get or append a string into the string table */ -int strtbl_write_str(struct str_table *str_tbl, const char *str, size_t *res); +int strtab_write_str(struct str_table *strtab, const char *str, size_t *res); + + +/// +/// ELF symbol table +/// + +struct symbol_table { + // length in size in sym ammt + size_t len; + size_t size; + + // the Elf symbols + Elf32_Sym *symbols; + + // keeps track of what section each ELF symbol is in + // *!!this is NOT the section header index in the ELF ehdr!!* + ssize_t *sections; + + // symbols reference a string table that acutally + // holds the strings + // + // *weak* ptr, we do not own this!!! + struct str_table *strtab; -struct section_meta { - void *reltbl; - uint32_t reltbl_len; - uint32_t reltbl_idx; // reltbl idx in shdr - uint32_t shdr_idx; // sec idx in shdr - uint32_t v_addr; }; +/* initalize a symbol table */ +int symtab_init(struct symbol_table *symtab); + +/* free the symbol table */ +void symtab_free(struct symbol_table *symtab); + +/* add a symbol to the symbol tbl */ +int symtab_push(struct symbol_table *symtab, const Elf32_Sym sym, + ssize_t sec_idx); + +/* find a symbol by name in the symbol table */ +int symtab_find(struct symbol_table *symtab, Elf32_Sym **sym, size_t *idx, + const char name[MAX_LEX_LENGTH]); + +/// +/// ELF relocation table +/// + +struct relocation_table { + size_t len; + size_t size; + Elf32_Rela *data; +}; + +/* initalize a relocation table */ +int reltab_init(struct relocation_table *reltab); + +/* free the relocation table */ +void reltab_free(struct relocation_table *reltab); + +/* add a entry to the relocation table */ +int reltab_push(struct relocation_table *reltab, const Elf32_Rela rel); + +/// +/// section entry +/// + +enum section_entry_type { + ENT_INS, + ENT_WORD, + ENT_HALF, + ENT_BYTE, + ENT_NO_DATA, +}; + +/* holds a entry inside the section, i.e. a instruction, raw data, + * special directives */ +struct section_entry { + size_t size; + enum section_entry_type type; + + union { + // to get memory address + char data; + + // data + struct mips_instruction ins; + int32_t word; + int16_t half; + int8_t byte; + }; +}; + +/// +/// section +/// + +/* holds a section of the asm file (i.e. .text, .bss, .data) */ +struct section { + // length and size of amount of entries + size_t len; + size_t size; + struct section_entry *entries; + + // section name + char name[MAX_LEX_LENGTH]; + + // index of the section in + // all the sections + size_t index; + + // index of the sectio in + // the ELF shdr + size_t shdr_idx; + + // ELF section data + bool read; + bool write; + bool execute; + uint16_t alignment; + + // ELF tables + size_t reltab_shidx; + struct relocation_table reltab; +}; + +/* get the size of the section in bytes */ +size_t sec_size(struct section *section); + +/* get the index of a entry in bytes */ +size_t sec_index(struct section *section, size_t index); + +/* add a section entry to the section */ +int sec_push(struct section *section, struct section_entry entry); + +/* holds eachs section */ +struct section_table { + // length and size of amount of sections + size_t len; + size_t size; + struct section *sections; + + // the current section + struct section *current; +}; + +/* initalize the section table */ +int sectab_init(struct section_table *sec_tbl); + +/* free the section table */ +void sectab_free(struct section_table *sec_tbl); + +/* create a new section in the section table */ +int sectab_alloc(struct section_table *sec_tbl, struct section **sec, + const char name[MAX_LEX_LENGTH]); + +/* get a section by name from the section table */ +int sectab_get(struct section_table *sec_tbl, struct section **sec, + const char name[MAX_LEX_LENGTH]); + +/// +/// assembler +/// + struct assembler { // the token lexer struct lexer lexer; // the expression parser struct parser parser; - // shdr indexes - struct section_meta *meta; - size_t shstrtbl_idx; - size_t strtbl_idx; - size_t symtab_idx; + /// ELF tables + size_t symtab_shidx; + struct symbol_table symtab; + size_t strtab_shidx; + struct str_table strtab; + size_t shstrtab_shidx; + struct str_table shstrtab; - // symbols and strings - struct symbol_table sym_tbl; - struct str_table shstr_tbl; - struct str_table str_tbl; + /// Segments + struct section_table sectab; - // elf data - void *phdr; // void* since could be Elf32 or Elf64 - void *shdr; - void *symtab; + /// program header + Elf32_Phdr *phdr; uint32_t phdr_len; + + /// section header + Elf32_Shdr *shdr; uint32_t shdr_len; - uint32_t symtab_len; }; +/* defines arguments to the assembler */ struct assembler_arguments { char *in_file; char *out_file; - enum mips_isa isa; }; +/* initalize the assembler */ int assembler_init(struct assembler *assembler, const char *path); + +/* free the assembler */ void assembler_free(struct assembler *assembler); +/* assemble a file */ int assemble_file(struct assembler_arguments args); -/* assemble a mips32 file*/ -int assemble_file_mips32(struct assembler_arguments args); - #endif /* __ASM_H__ */ diff --git a/masm/asm_mips32.c b/masm/asm_mips32.c deleted file mode 100644 index 7716f4d..0000000 --- a/masm/asm_mips32.c +++ /dev/null @@ -1,746 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "asm.h" -#include "mlimits.h" -#include "parse.h" -#include "parse_mips32.h" - -extern char *current_file; - -static int handle_directive(struct assembler *assembler, - struct mips32_directive *directive) -{ - switch (directive->type) { - case MIPS32_DIRECTIVE_SECTION: { - struct section_table *sec_tbl = &assembler->parser.sec_tbl; - struct section *sec; - if (sectbl_get(sec_tbl, &sec, directive->name) - == M_SUCCESS) { - sec_tbl->current = sec; - break; - } - - if (sectbl_alloc(sec_tbl, &sec, directive->name)) - return M_ERROR; - - sec_tbl->current = sec; - break; - } - - case MIPS32_DIRECTIVE_ALIGN: { - assembler->parser.sec_tbl.current->alignment = - 1 << directive->align; - break; - } - - case MIPS32_DIRECTIVE_SPACE: { - struct section_entry entry; - entry.type = ENT_NO_DATA; - entry.size = directive->space; - if (sec_push(assembler->parser.sec_tbl.current, entry)) - return M_ERROR; - break; - } - - case MIPS32_DIRECTIVE_WORD: { - for (uint32_t i = 0; i < directive->len; i++) { - struct section_entry entry; - entry.type = ENT_WORD; - entry.word = directive->words[i]; - if (sec_push(assembler->parser.sec_tbl.current, - entry)) - return M_ERROR; - } - break; - } - - case MIPS32_DIRECTIVE_HALF: { - for (uint32_t i = 0; i < directive->len; i++) { - struct section_entry entry; - entry.type = ENT_HALF; - entry.half = directive->halfs[i]; - if (sec_push(assembler->parser.sec_tbl.current, - entry)) - return M_ERROR; - } - break; - } - - case MIPS32_DIRECTIVE_BYTE: { - for (uint32_t i = 0; i < directive->len; i++) { - struct section_entry entry; - entry.type = ENT_BYTE; - entry.byte = directive->bytes[i]; - if (sec_push(assembler->parser.sec_tbl.current, - entry)) - return M_ERROR; - } - break; - } - - case MIPS32_DIRECTIVE_EXTERN: { - struct symbol symbol; - if (symtbl_find(&assembler->sym_tbl, NULL, directive->name) - == M_SUCCESS) { - ERROR("cannot extern local symbol '%s'", - directive->name); - return M_ERROR; - } - - symbol = (struct symbol) { - .name = "", - .sec = NULL, - .index = 0, - .flag = SYM_EXTERNAL, - }; - strcpy(symbol.name, directive->name); - - if (symtbl_push(&assembler->sym_tbl, symbol)) - return M_ERROR; - - break; - } - - case MIPS32_DIRECTIVE_GLOBL: { - struct symbol symbol; - if (symtbl_find(&assembler->sym_tbl, NULL, directive->name) - == M_SUCCESS) { - symbol.flag = SYM_GLOBAL; - break; - } - - symbol = (struct symbol) { - .name = "", - .sec = NULL, - .index = 0, - .flag = SYM_GLOBAL, - }; - strcpy(symbol.name, directive->name); - - if (symtbl_push(&assembler->sym_tbl, symbol)) - return M_ERROR; - - break; - } - } - - return M_SUCCESS; -} - -static int handle_label(struct assembler *assembler, - const char name[MAX_LEX_LENGTH]) -{ - struct symbol *ref; - if (symtbl_find(&assembler->sym_tbl, &ref, name) == M_SUCCESS) { - if (ref->flag == SYM_GLOBAL && ref->sec == NULL) { - ref->sec = assembler->parser.sec_tbl.current; - ref->index = assembler->parser.sec_tbl.current->count; - return M_SUCCESS; - } - ERROR("redefined symbol '%s'", name); - return M_ERROR; - } - - struct symbol symbol; - symbol = (struct symbol) { - .name = "", - .sec = assembler->parser.sec_tbl.current, - .index = assembler->parser.sec_tbl.current->count, - .flag = SYM_LOCAL, - }; - strcpy(symbol.name, name); - - if (symtbl_push(&assembler->sym_tbl, symbol)) - return M_ERROR; - - return M_SUCCESS; -} - -static int parse_file(struct assembler *assembler) -{ - struct parser *parser = &assembler->parser; - - while (1) { - struct expr expr; - if (parser_next(parser, &expr)) - return M_ERROR; - - switch (expr.type) { - case EXPR_INS: - struct section_entry entry; - entry.type = ENT_INS; - entry.size = sizeof(struct mips32_instruction); - entry.ins = expr.ins; - if (sec_push(parser->sec_tbl.current, entry)) - return M_ERROR; - break; - - case EXPR_DIRECTIVE: - if (handle_directive(assembler, - &expr.directive.mips32)) - return M_ERROR; - break; - - case EXPR_LABEL: - if (handle_label(assembler, expr.text)) - return M_ERROR; - break; - - case EXPR_CONSTANT: - break; - } - } - - struct section_meta *meta = malloc(sizeof(struct section_meta) * - parser->sec_tbl.count); - if (meta == NULL) { - ERROR("cannot alloc"); - return M_ERROR; - } - - assembler->meta = meta; - - size_t ptr = 0; - for (uint32_t i = 0; i < parser->sec_tbl.count; i++) { - struct section *sec = &parser->sec_tbl.sections[i]; - meta[i].v_addr = ptr; - ptr += sec_size(sec); - } - - return M_SUCCESS; -} - -static int assemble_phdr(struct assembler *assembler, Elf32_Phdr **res, - uint32_t *res2) -{ - struct parser *parser = &assembler->parser; - Elf32_Phdr *phdr = malloc(sizeof(Elf32_Phdr) * - parser->sec_tbl.count); - if (phdr == NULL) { - ERROR("cannot alloc"); - return M_ERROR;; - } - - for (uint32_t i = 0; i < parser->sec_tbl.count; i++) { - Elf32_Phdr *hdr = &phdr[i]; - struct section *sec = &parser->sec_tbl.sections[i]; - size_t size = sec_size(sec); - hdr->p_type = PT_LOAD; - hdr->p_flags = (sec->execute << 0) | - (sec->write << 1) | - (sec->read << 2); - hdr->p_offset = 0; - hdr->p_vaddr = 0; - hdr->p_paddr = 0; - hdr->p_filesz = size; - hdr->p_memsz = size; - hdr->p_align = sec->alignment; - } - - *res = phdr; - *res2 = parser->sec_tbl.count; - return M_SUCCESS; -} - -static int assemble_symtab(struct assembler *assembler, Elf32_Sym **res, - uint32_t *res2) -{ - Elf32_Sym *stbl = malloc(sizeof(Elf32_Sym) * assembler->sym_tbl - .count); - size_t size = 0; - - if (stbl == NULL) - return M_ERROR; - - for (uint32_t i = 0; i < assembler->sym_tbl.count; i++) { - struct symbol *sym = &assembler->sym_tbl.symbols[i]; - size_t str_off; - unsigned char bind; - unsigned char type = STT_NOTYPE; - - if (strtbl_write_str(&assembler->str_tbl, - sym->name, &str_off)) { - free(stbl); - return M_ERROR; - } - - if (sym->flag == SYM_GLOBAL && sym->sec == NULL) { - ERROR("never defined global symbol '%s'", sym->name); - return M_ERROR; - } - - if (sym->flag == SYM_LOCAL) - bind = STB_LOCAL; - else - bind = STB_GLOBAL; - - stbl[i] = (Elf32_Sym) { - .st_name = str_off, - .st_value = sym->index, - .st_size = 0, - .st_info = ELF32_ST_INFO(bind, type), - .st_other = ELF32_ST_VISIBILITY(STV_DEFAULT), - .st_shndx = 0, - }; - size = i + 1; - }; - - *res = stbl; - *res2 = size; - - return M_SUCCESS; -} - -static void assemble_symtab_shndx(struct assembler *assembler, Elf32_Sym *tbl) -{ - for (uint32_t i = 0; i < assembler->sym_tbl.count; i++) { - struct symbol *sym = &assembler->sym_tbl.symbols[i]; - if (sym->sec != NULL) - tbl[i].st_shndx = - assembler->meta[sym->sec->index].shdr_idx; - } -} - -static int assemble_reltbl_sec(struct assembler *assembler, Elf32_Sym *symtab, - uint32_t symtab_len, struct section *sec) -{ - uint32_t len = 0; - - for (uint32_t i = 0; i < assembler->parser.ref_tbl.count; i++) { - struct reference *ref = - &assembler->parser.ref_tbl.references[i]; - if (ref->section->index == sec->index) { - len++; - } - } - - if (len == 0) { - assembler->meta[sec->index].reltbl = NULL; - return M_SUCCESS; - } - - Elf32_Rela *reltbl = malloc(sizeof(Elf32_Rela) * len); - - if (reltbl == NULL) { - ERROR("cannot alloc"); - return M_ERROR; - } - - for (uint32_t i = 0; i < assembler->parser.ref_tbl.count; i++) { - struct reference *ref = - &assembler->parser.ref_tbl.references[i]; - struct mips32_instruction *ins = &ref->section-> - entries[ref->index].ins.mips32; - - if (ref->section->index != sec->index) { - continue; - } - - int32_t addend = 0; - unsigned char type = 0; - switch (ref->type) { - case REF_OFFESET: - addend = ins->B_data.offset; - type = R_MIPS_PC16; - break; - case REF_TARGET: - addend = ins->J_data.target; - type = R_MIPS_26; - break; - } - - int32_t symidx = -1; - - for (uint32_t i = 0; i < symtab_len; i++) { - Elf32_Sym *sym = &symtab[i]; - const char *str = - &assembler->str_tbl.ptr[sym->st_name]; - if (strcmp(ref->name, str) == 0) { - symidx = i; - break; - } - } - - if (symidx == -1) { - ERROR("undefined symbol '%s'", ref->name); - free(reltbl); - return M_ERROR; - } - - reltbl[i] = (Elf32_Rela) { - .r_info = ELF32_R_INFO(symidx, type), - .r_addend = addend, - .r_offset = sec_index(ref->section, ref->index), - }; - }; - - assembler->meta[sec->index].reltbl_len = len; - assembler->meta[sec->index].reltbl = reltbl; - - return M_SUCCESS; -} - -static int assemble_reltbl(struct assembler *assembler, Elf32_Sym *symtab, - uint32_t symtab_len) -{ - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - struct section *sec = &assembler->parser.sec_tbl.sections[i]; - if (assemble_reltbl_sec(assembler, symtab, symtab_len, sec)) - return M_ERROR; - } - - return M_SUCCESS; -} - -static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res, - uint32_t *res2) -{ - uint32_t max_entries = 4; // symtab, strtab, shstrtab - max_entries += assembler->parser.sec_tbl.count; // sections - max_entries += assembler->parser.sec_tbl.count; // reltabs per section - - Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * max_entries); - - size_t str_off; - uint32_t count = 0; - - // eeltables - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - - if (assembler->meta[i].reltbl == NULL) - continue; - - struct section *sec = &assembler->parser.sec_tbl.sections[i]; - const char *prefix = ".reltab."; - char reltab_name[MAX_LEX_LENGTH + 8]; - - strcpy(reltab_name, prefix); - strcat(reltab_name, sec->name); - - if (strtbl_write_str(&assembler->shstr_tbl, - reltab_name, &str_off)) { - free(shdr); - return M_ERROR; - } - - assembler->meta[i].reltbl_idx = count; - shdr[count++] = (Elf32_Shdr) { - .sh_name = str_off, - .sh_type = SHT_RELA, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = 1, - .sh_entsize = sizeof(Elf32_Rela), - }; - } - - // for each section - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - struct section *sec = &assembler->parser.sec_tbl.sections[i]; - char name[MAX_LEX_LENGTH+1] = "."; - strcat(name, sec->name); - if (strtbl_write_str(&assembler->shstr_tbl, name, &str_off)) { - free(shdr); - return M_ERROR; - } - assembler->meta[i].shdr_idx = count; - if (assembler->meta[i].reltbl != NULL) - shdr[assembler->meta[i].reltbl_idx].sh_info = count; - shdr[count++] = (Elf32_Shdr){ - .sh_name = str_off, - .sh_type = SHT_PROGBITS, - .sh_flags = (sec->write << 0) | (sec->execute << 2) | - SHF_ALLOC, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = sec->alignment, - .sh_entsize = sizeof(struct mips32_instruction), - }; - } - - // symbol table - if (strtbl_write_str(&assembler->shstr_tbl, ".symtab", &str_off)) { - free(shdr); - return M_ERROR; - } - - assembler->symtab_idx = count; - shdr[count++] = (Elf32_Shdr) { - .sh_name = str_off, - .sh_type = SHT_SYMTAB, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 1, - .sh_info = 0, - .sh_addralign = 1, - .sh_entsize = sizeof(Elf32_Sym), - }; - - // string table - if (strtbl_write_str(&assembler->shstr_tbl, ".strtab", &str_off)) { - free(shdr); - return M_ERROR; - } - - assembler->strtbl_idx = count; - shdr[count++] = (Elf32_Shdr) { - .sh_name = str_off, - .sh_type = SHT_STRTAB, - .sh_flags = SHF_STRINGS, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = 1, - .sh_entsize = 0, - }; - - // sh string table - if (strtbl_write_str(&assembler->shstr_tbl, ".shstrtab", &str_off)) { - free(shdr); - return M_ERROR; - } - - assembler->shstrtbl_idx = count; - shdr[count++] = (Elf32_Shdr) { - .sh_name = str_off, - .sh_type = SHT_STRTAB, - .sh_flags = SHF_STRINGS, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = 1, - .sh_entsize = 0, - }; - - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - if (assembler->meta[i].reltbl == NULL) - continue; - shdr[assembler->meta[i].reltbl_idx].sh_link = - assembler->symtab_idx; - } - - *res = shdr; - *res2 = count; - - return M_SUCCESS; -} - -static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr) -{ - Elf32_Shdr *shdr = (Elf32_Shdr *) assembler->shdr; - Elf32_Phdr *phdr = (Elf32_Phdr *) assembler->phdr; - uint32_t ptr = 0; - - // we must now correct offets and sizes inside the ehdr, phdr, - // and shdr - ptr += sizeof(Elf32_Ehdr); - - // phdr - ehdr->e_phoff = ptr; - ptr += assembler->phdr_len * sizeof(Elf32_Phdr); - - // reltbls - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - if (assembler->meta[i].reltbl == NULL) - continue; - int idx = assembler->meta[i].reltbl_idx; - int len = assembler->meta[i].reltbl_len; - shdr[idx].sh_offset = ptr; - shdr[idx].sh_size = len * sizeof(Elf32_Rela); - ptr += len * sizeof(Elf32_Rela); - } - - // sections - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - int idx = assembler->meta[i].shdr_idx; - phdr[i].p_offset = ptr; - phdr[i].p_vaddr = ptr; - phdr[i].p_paddr = ptr; - shdr[idx].sh_offset = ptr; - shdr[idx].sh_size = phdr[i].p_filesz; - shdr[idx].sh_addr = phdr[i].p_vaddr; - shdr[idx].sh_addralign = phdr[i].p_align; - ptr += phdr[i].p_filesz; - } - - // symtab - shdr[assembler->symtab_idx].sh_offset = ptr; - shdr[assembler->symtab_idx].sh_link = assembler->strtbl_idx; - shdr[assembler->symtab_idx].sh_size = - assembler->symtab_len * sizeof(Elf32_Sym); - ptr += assembler->symtab_len * sizeof(Elf32_Sym); - - // strtbl - shdr[assembler->strtbl_idx].sh_offset = ptr; - shdr[assembler->strtbl_idx].sh_size = assembler->str_tbl.size; - ptr += assembler->str_tbl.size; - - // shstrtbl - shdr[assembler->shstrtbl_idx].sh_offset = ptr; - shdr[assembler->shstrtbl_idx].sh_size = assembler->shstr_tbl.size; - ptr += assembler->shstr_tbl.size; - - // shdr - ehdr->e_shoff = ptr; -} - -static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr, - const char *path) -{ - FILE *out = fopen(path, "w"); - - if (out == NULL) { - ERROR("cannot write '%s'", path); - return M_ERROR; - } - - // ehdr - fwrite(ehdr, sizeof(Elf32_Ehdr), 1, out); - - // phdr - fwrite(assembler->phdr, sizeof(Elf32_Phdr), assembler->phdr_len, out); - - // reltbls - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - if (assembler->meta[i].reltbl == NULL) - continue; - void *ptr = assembler->meta[i].reltbl; - int len = assembler->meta[i].reltbl_len; - fwrite(ptr, sizeof(Elf32_Rela), len, out); - } - - // sections - for (uint32_t i = 0; i < assembler->parser.sec_tbl.count; i++) { - struct section *sec = &assembler->parser.sec_tbl.sections[i]; - for (uint32_t j = 0; j < sec->count; j++) { - struct section_entry *entry = &sec->entries[j]; - size_t size = entry->size; - fwrite(&entry->data, size, 1, out); - while(size % sec->alignment) { - uint8_t zero = 0; - fwrite(&zero, 1, 1, out); - size++; - } - } - } - - // sym tbl - fwrite(assembler->symtab, sizeof(Elf32_Sym), - assembler->symtab_len, out); - - // str tbl - fwrite(assembler->str_tbl.ptr, assembler->str_tbl.size, 1, out); - - // shstr tbl - fwrite(assembler->shstr_tbl.ptr, assembler->shstr_tbl.size, 1, out); - - // shdr - fwrite(assembler->shdr, sizeof(Elf32_Shdr), assembler->shdr_len, out); - - fclose(out); - - return M_SUCCESS; -} - -static int assemble_elf(struct assembler *assembler, const char *out) -{ - if (assemble_symtab(assembler, (Elf32_Sym **) &assembler->symtab, - &assembler->symtab_len)) - return M_ERROR; - - if (assemble_reltbl(assembler, assembler->symtab, - assembler->symtab_len)) { - return M_ERROR; - }; - - if (assemble_phdr(assembler, (Elf32_Phdr **) &assembler->phdr, - &assembler->phdr_len)) { - return M_ERROR; - } - - if (assemble_shdr(assembler, (Elf32_Shdr **) &assembler->shdr, - &assembler->shdr_len)) { - return M_ERROR; - }; - - // update the symbol tables with their given section - assemble_symtab_shndx(assembler, assembler->symtab); - - Elf32_Ehdr ehdr = { - .e_ident = { - [EI_MAG0] = ELFMAG0, - [EI_MAG1] = ELFMAG1, - [EI_MAG2] = ELFMAG2, - [EI_MAG3] = ELFMAG3, - [EI_CLASS] = ELFCLASS32, - [EI_DATA] = ELFDATA2LSB, - [EI_VERSION] = EV_CURRENT, - [EI_OSABI] = ELFOSABI_NONE, - [EI_ABIVERSION] = 0x00, - [EI_PAD] = 0x00, - }, - .e_type = ET_REL, - .e_machine = EM_MIPS, - .e_version = EV_CURRENT, - .e_entry = 0x00, - .e_phoff = 0x00, - .e_shoff = 0x00, - .e_flags = EF_MIPS_ARCH_32R6, - .e_ehsize = sizeof(Elf32_Ehdr), - .e_phentsize = sizeof(Elf32_Phdr), - .e_phnum = assembler->phdr_len, - .e_shentsize = sizeof(Elf32_Shdr), - .e_shnum = assembler->shdr_len, - .e_shstrndx = assembler->shstrtbl_idx, - }; - - update_offsets(assembler, &ehdr); - - if (write_file(assembler, &ehdr, out)) - return M_ERROR; - - return M_SUCCESS; -} - -int assemble_file_mips32(struct assembler_arguments args) -{ - struct assembler assembler; - int res = M_SUCCESS; - - current_file = args.in_file; - - if (assembler_init(&assembler, args.in_file)) - return M_ERROR; - - mips32_parser_init(&assembler.parser); - - if (res == M_SUCCESS) - res = parse_file(&assembler); - - if (res == M_SUCCESS) - res = assemble_elf(&assembler, args.out_file); - - assembler_free(&assembler); - - return res; -} diff --git a/masm/main.c b/masm/main.c index be156d8..760e4fa 100644 --- a/masm/main.c +++ b/masm/main.c @@ -3,27 +3,24 @@ #include #include "asm.h" -#include "mips.h" void help(void) { printf("usage: masm [options] source.asm\n\n"); printf("options:\n"); printf("\t-h\t\tprints this help message\n"); - printf("\t-i isa\t\tselect a ISA to assemble to (mips32)\n"); printf("\t-o output\tselect a output file destination\n"); } int main(int argc, char **argv) { struct assembler_arguments args = { - .isa = ISA_MIPS32, .in_file = NULL, .out_file = NULL, }; int c; - while ((c = getopt(argc, argv, "ho:i:")) != 1) { + while ((c = getopt(argc, argv, "ho:")) != 1) { switch(c) { case 'h': help(); @@ -31,14 +28,6 @@ int main(int argc, char **argv) { case 'o': args.out_file = optarg; break; - case 'i': - if (strcmp(optarg, "mips32") == 0) { - args.isa = ISA_MIPS32; - } else { - ERROR("invalid isa '%s'", optarg); - return M_ERROR; - } - break; case '?': return M_ERROR; default: diff --git a/masm/parse.c b/masm/parse.c index 452045b..ccabf41 100644 --- a/masm/parse.c +++ b/masm/parse.c @@ -6,7 +6,7 @@ #include "parse.h" #include "lex.h" -int next_token(struct parser *parser, struct token *tok) +static int next_token(struct parser *parser, struct token *tok) { if (parser->peek.type != TOK_EOF) { if (tok != NULL) @@ -23,7 +23,7 @@ int next_token(struct parser *parser, struct token *tok) } -int peek_token(struct parser *parser, struct token *tok) +static int peek_token(struct parser *parser, struct token *tok) { if (parser->peek.type == TOK_EOF) { if (next_token(parser, &parser->peek)) @@ -35,7 +35,7 @@ int peek_token(struct parser *parser, struct token *tok) } -int assert_token(struct parser *parser, enum token_type type, +static int assert_token(struct parser *parser, enum token_type type, struct token *tok) { struct token token; @@ -51,7 +51,7 @@ int assert_token(struct parser *parser, enum token_type type, return M_SUCCESS; } -int assert_eol(struct parser *parser) +static int assert_eol(struct parser *parser) { struct token token; if (next_token(parser, &token)) @@ -63,6 +63,856 @@ int assert_eol(struct parser *parser) return M_SUCCESS; } +/* each instruction has a given parse format + * internal to the parser */ +enum mips_parse_format { + // register type: rs, rt, td + MIPS_PARSE_R, + // register type: rs, rt + MIPS_PARSE_R2, + // register type: rd + MIPS_PARSE_RD, + // register type: rs + MIPS_PARSE_RS, + // imeediate type: rs, rt, immd + MIPS_PARSE_I, + // jump type: offset + MIPS_PARSE_J, + // jump type: register + MIPS_PARSE_JR, + // offset 16b type: offset + MIPS_PARSE_O16, + // offset 26b type: offset + MIPS_PARSE_O26, + // breanch equal type: rs, rt, offset + MIPS_PARSE_BE, + // branch zero type: rs, offset + MIPS_PARSE_BZ, + // store and load: rt, offset(base) + MIPS_PARSE_SL, + // store and load immediate: rt, immediate + MIPS_PARSE_SLI, + // shift: rd, rt, sa + MIPS_PARSE_S, + // shift variable: rd, rt, rs + MIPS_PARSE_SV, + // none: + MIPS_PARSE_NONE, +}; + +#define FORMAT(ins, format) \ + [MIPS_INS_##ins] = MIPS_PARSE_##format, \ + +const enum mips_parse_format mips_parse_formats[] = { + FORMAT(ADD, R) + FORMAT(ADDI, I) + FORMAT(ADDIU, I) + FORMAT(ADDU, R) + FORMAT(AND, R) + FORMAT(ANDI, I) + FORMAT(BAL, O16) + FORMAT(BALC, O26) + FORMAT(BC, O26) + FORMAT(BEQ, BE) + FORMAT(BEQL, BE) + FORMAT(BGEZ, BZ) + FORMAT(BGEZAL, BZ) + FORMAT(BGEZALL, BZ) + FORMAT(BGEZL, BZ) + FORMAT(BGTZ, BZ) + FORMAT(BGTZL, BZ) + FORMAT(BLEZ, BZ) + FORMAT(BLEZL, BZ) + FORMAT(BLTZ, BZ) + FORMAT(BLTZAL, BZ) + FORMAT(BLTZALL, BZ) + FORMAT(BLTZL, BZ) + FORMAT(BNE, BE) + FORMAT(BNEL, BE) + FORMAT(DDIV, R2) + FORMAT(DDIVU, R2) + FORMAT(DIV, R2) + FORMAT(DIVU, R2) + FORMAT(J, J) + FORMAT(JAL, J) + FORMAT(JALR, JR) // TODO: handle rd + FORMAT(JALX, J) + FORMAT(JR, JR) + FORMAT(LB, SL) + FORMAT(LBU, SL) + FORMAT(LH, SL) + FORMAT(LHU, SL) + FORMAT(LUI, SLI) + FORMAT(LW, SL) + FORMAT(LWL, SL) + FORMAT(LWR, SL) + FORMAT(MFHI, RD) + FORMAT(MFLO, RD) + FORMAT(MTHI, RS) + FORMAT(MTLO, RS) + FORMAT(MULT, R2) + FORMAT(MULTU, R2) + FORMAT(SB, SL) + FORMAT(SH, SL) + FORMAT(SW, SL) + FORMAT(SWL, SL) + FORMAT(SLL, S) + FORMAT(SLLV, SV) + FORMAT(SLT, R) + FORMAT(SLTI, I) + FORMAT(SLTIU, I) + FORMAT(SLTU, R) + FORMAT(SRA, S) + FORMAT(SRAV, SV) + FORMAT(SRL, S) + FORMAT(SRLV, SV) + FORMAT(SYSCALL, NONE) + FORMAT(OR, R) + FORMAT(ORI, I) + FORMAT(NOR, R) + FORMAT(SUB, R) + FORMAT(SUBU, R) + FORMAT(XOR, R) + FORMAT(XORI, I) +}; + +#undef FORMAT + +#define MAX5 32 +#define MAX16 65536 +#define MAX26 67108864 + +static int get_reference(struct parser *parser, uint32_t *offset, + struct reference *ref, enum reference_type type) +{ + struct token token; + + if (next_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_NUMBER) { + *offset = token.number; + return M_SUCCESS; + } + + if (token.type != TOK_IDENT) { + ERROR_POS(token, "unexpected token of type '%s'", + token_str(token.type)); + return M_ERROR; + } + + strcpy(ref->name, token.text); + ref->type = type; + ref->addend = 0; + + // return zero for now + *offset = 0; + return M_SUCCESS; +} + +static int get_offset(struct parser *parser, uint32_t *offset, + struct reference *ref) +{ + return get_reference(parser, offset, ref, REF_OFFESET); +} + +static int get_target(struct parser *parser, uint32_t *offset, + struct reference *ref) +{ + return get_reference(parser, offset, ref, REF_TARGET); +} + +static int get_instruction(const char *ident, struct mips_instruction *res) +{ + for (int i = 0; i < __MIPS_INS_LEN; i++) { + struct mips_instruction ins = + mips_instructions[i]; + if (strcasecmp(ident, ins.name) == 0) { + if (res != NULL) + *res = ins; + return M_SUCCESS; + } + } + return M_ERROR; +} + +static int parse_register(struct parser *parser, enum mips_register *reg) +{ + struct token token; + if (assert_token(parser, TOK_REG, &token)) + return M_ERROR; + + int len = strlen(token.text); + int c0 = len > 0 ? token.text[0] : '\0', + c1 = len > 1 ? token.text[1] : '\0', + c2 = len > 2 ? token.text[2] : '\0', + c3 = len > 3 ? token.text[3] : '\0'; + + // $zero + if (c0 == 'z') { + if (c1 == 'e' && c2 == 'r' && c3 == 'o') { + *reg = MIPS_REG_ZERO; + return M_SUCCESS; + } + } + + // $a0-a3 $at + else if (c0 == 'a') { + if (c1 == 't') { + *reg = MIPS_REG_AT; + return M_SUCCESS; + } + if (c1 >= '0' && c1 <= '3') { + *reg = MIPS_REG_A0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $v0-v1 + else if (c0 == 'v') { + if (c1 >= '0' && c1 <= '1') { + *reg = MIPS_REG_V0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $t0-t9 + else if (c0 == 't') { + if (c1 >= '0' && c1 <= '7') { + *reg = MIPS_REG_T0; + *reg += c1 - '0'; + return M_SUCCESS; + } + // reg T8-T9 are not in order with T0-T7 + if (c1 >= '8' && c1 <= '9') { + *reg = MIPS_REG_T8; + *reg += c1 - '8'; + return M_SUCCESS; + } + } + + // $s0-s7 $sp + else if (c0 == 's') { + if (c1 >= '0' && c1 <= '7') { + *reg = MIPS_REG_S0; + *reg += c1 - '0'; + return M_SUCCESS; + } + if (c1 == 'p') { + *reg = MIPS_REG_SP; + return M_SUCCESS; + } + } + + // $k0-k1 + else if (c0 == 'k') { + if (c1 >= '0' && c1 <= '1') { + *reg = MIPS_REG_K0; + *reg += c1 - '0'; + return M_SUCCESS; + } + } + + // $gp + else if (c0 == 'g') { + if (c1 == 'p') { + *reg = MIPS_REG_GP; + return M_SUCCESS; + } + } + + // $fp + else if (c0 == 'f') { + if (c1 == 'p') { + *reg = MIPS_REG_FP; + return M_SUCCESS; + } + } + + // $rp + else if (c0 == 'r') { + if (c1 == 'p') { + *reg = MIPS_REG_RA; + return M_SUCCESS; + } + } + + // $0-31 (non aliased register names) + else if (c0 >= '0' && c0 <= '9') { + int i = c0 - '0'; + if (c1 >= '0' && c1 <= '9') { + i *= 10; + i += c1 - '0'; + } + if (i <= 31) { + *reg = i; + return M_SUCCESS; + } + } + + ERROR_POS(token, "unknown register $%s", token.text); + return M_ERROR; +} + +static int parse_instruction_r(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rs, rt, rd + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + return M_SUCCESS; +} + +static int parse_instruction_r2(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rs, rt + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + return M_SUCCESS; +} + +static int parse_instruction_rs(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rs + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + return M_SUCCESS; +} + +static int parse_instruction_rd(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rd + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + return M_SUCCESS; +} + +static int parse_instruction_i(struct parser *parser, + struct mips_instruction *ins) +{ + // format: rs, rt, immd + enum mips_register reg; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (token.number >= MAX16) + return M_ERROR; + ins->I_data.immd = token.number; + + return M_SUCCESS; +} + +static int parse_instruction_offset(struct parser *parser, + uint32_t max, + struct mips_instruction *ins, + struct reference *ref) +{ + uint32_t n; + if (get_offset(parser, &n, ref) || n > max) + return M_ERROR; + + switch (max) { + case MAX26: + ins->J_data.target = n; + break; + case MAX16: + ins->B_data.offset = n; + break; + } + + return M_SUCCESS; +} + +static int parse_instruction_j(struct parser *parser, + struct mips_instruction *ins, + struct reference *ref) +{ + uint32_t n; + if (get_target(parser, &n, ref) || n > MAX26) + return M_ERROR; + ins->J_data.target = n; + + return M_SUCCESS; +} + +static int parse_instruction_jr(struct parser *parser, + struct mips_instruction *ins, + struct reference *ref) +{ + uint32_t n; + if (get_target(parser, &n, ref) || n > MAX26) + return M_ERROR; + ins->J_data.target = n; + + return M_SUCCESS; +} + +static int parse_instruction_branch_equal(struct parser *parser, + struct mips_instruction *ins) +{ + enum mips_register reg; + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + return M_SUCCESS; +} + +static int parse_instruction_branch(struct parser *parser, + struct mips_instruction *ins, + struct reference *ref) +{ + enum mips_register reg; + uint32_t n; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->B_data.rs = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (get_offset(parser, &n, ref) || n > MAX16) + return M_ERROR; + ins->B_data.offset = n; + + return M_SUCCESS; +} + +static int parse_instruction_sl(struct parser *parser, + struct mips_instruction *ins, + struct reference *ref) +{ + enum mips_register reg; + uint32_t offset = 0; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type != TOK_LPAREN) + if (get_offset(parser, &offset, ref)) + return M_ERROR; + ins->I_data.immd = offset; + + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_NL) { + ins->I_data.rs = MIPS_REG_ZERO; + return M_SUCCESS; + } + + if (assert_token(parser, TOK_LPAREN, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rs = reg; + + if (assert_token(parser, TOK_RPAREN, NULL)) + return M_ERROR; + + return M_SUCCESS; +} + +static int parse_instruction_sli(struct parser *parser, + struct mips_instruction *ins) +{ + enum mips_register reg; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->I_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX16) + return M_ERROR; + ins->I_data.immd = token.number; + + return M_SUCCESS; +} + +static int parse_instruction_s(struct parser *parser, + struct mips_instruction *ins) +{ + enum mips_register reg; + struct token token; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX5) + return M_ERROR; + ins->R_data.shamt = token.number; + + return M_SUCCESS; +} + +static int parse_instruction_sv(struct parser *parser, + struct mips_instruction *ins) +{ + enum mips_register reg; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rd = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rt = reg; + + if (assert_token(parser, TOK_COMMA, NULL)) + return M_ERROR; + + if (parse_register(parser, ®)) + return M_ERROR; + ins->R_data.rs = reg; + + return M_SUCCESS; +} + +static int parse_instruction(struct parser *parser, + struct ins_expr *expr, + struct token ident) +{ + struct mips_instruction instruction; + enum mips_parse_format format; + int res = M_SUCCESS; + + if (get_instruction(ident.text, &instruction)) { + ERROR_POS(ident, "unknown instruction '%s'", ident.text); + return M_ERROR; + } + + struct mips_instruction *ins = &expr->ins[0]; + struct reference *ref = &expr->ref[0]; + + // this will only ever generate one instruction + expr->ins_len = 1; + *ins = instruction; + ref->type = REF_NONE; + + format = mips_parse_formats[instruction.type]; + switch (format) { + case MIPS_PARSE_R: + res = parse_instruction_r(parser, ins); + break; + case MIPS_PARSE_R2: + res = parse_instruction_r2(parser, ins); + break; + case MIPS_PARSE_RS: + res = parse_instruction_rs(parser, ins); + break; + case MIPS_PARSE_RD: + res = parse_instruction_rd(parser, ins); + break; + case MIPS_PARSE_I: + res = parse_instruction_i(parser, ins); + break; + case MIPS_PARSE_J: + res = parse_instruction_j(parser, ins, ref); + break; + case MIPS_PARSE_JR: + res = parse_instruction_jr(parser, ins, ref); + break; + case MIPS_PARSE_O16: + res = parse_instruction_offset(parser, MAX16, ins, ref); + break; + case MIPS_PARSE_O26: + res = parse_instruction_offset(parser, MAX26, ins, ref); + break; + case MIPS_PARSE_BE: + res = parse_instruction_branch_equal(parser, ins); + break; + case MIPS_PARSE_BZ: + res = parse_instruction_branch(parser, ins, ref); + break; + case MIPS_PARSE_SL: + res = parse_instruction_sl(parser, ins, ref); + break; + case MIPS_PARSE_SLI: + res = parse_instruction_sli(parser, ins); + break; + case MIPS_PARSE_S: + res = parse_instruction_s(parser, ins); + break; + case MIPS_PARSE_SV: + res = parse_instruction_sv(parser, ins); + break; + case MIPS_PARSE_NONE: + res = M_SUCCESS; + break; + } + + if (res == M_SUCCESS && assert_eol(parser)) + return M_ERROR; + + return res; +} + + +static int parse_directive_align(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (token.number < 0) { + ERROR_POS(token, "cannot align negative"); + return M_ERROR; + } + + if (token.number > MAX16) { + ERROR_POS(token, "cannot align more than 65kb"); + return M_ERROR; + } + + directive->type = MIPS_DIRECTIVE_ALIGN; + directive->align = token.number; + + return M_SUCCESS; +} + +static int parse_directive_space(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (token.number < 0) { + ERROR_POS(token, "cannot reserve negative"); + return M_ERROR; + } + + if (token.number > MAX16) { + ERROR_POS(token, "cannot reserve more than 65kb"); + return M_ERROR; + } + + directive->type = MIPS_DIRECTIVE_SPACE; + directive->space = token.number; + + return M_SUCCESS; +} + +static int parse_directive_whb(struct parser *parser, + struct mips_directive *directive, + enum mips_directive_type type) +{ + struct token token; + uint32_t size = 0; + uint32_t len = 0; + + switch (type) { + case MIPS_DIRECTIVE_WORD: + size = UINT32_MAX; + break; + case MIPS_DIRECTIVE_HALF: + size = UINT16_MAX; + break; + case MIPS_DIRECTIVE_BYTE: + size = UINT8_MAX; + break; + default: + } + + directive->type = type; + + while (1) { + if (assert_token(parser, TOK_NUMBER, &token)) + return M_ERROR; + + if (len >= MAX_ARG_LENGTH) { + ERROR_POS(token, "directives cannot be longer than " + "%d arguments", MAX_ARG_LENGTH); + return M_ERROR; + } + + if (token.number > size) { + ERROR_POS(token, "number cannot execede max size of: " + "%d", size); + return M_ERROR; + } + + switch (type) { + case MIPS_DIRECTIVE_WORD: + directive->words[len++] = token.number; + + break; + case MIPS_DIRECTIVE_HALF: + directive->halfs[len++] = token.number; + break; + case MIPS_DIRECTIVE_BYTE: + directive->bytes[len++] = token.number; + break; + default: + } + + if (peek_token(parser, &token)) + return M_ERROR; + + if (token.type == TOK_COMMA) { + next_token(parser, NULL); + continue; + } + + break; + } + + directive->len = len; + + return M_SUCCESS; +} + +static int parse_directive_extern(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_IDENT, &token)) + return M_ERROR; + + directive->type = MIPS_DIRECTIVE_EXTERN; + strcpy(directive->name, token.text); + + return M_SUCCESS; +} + +static int parse_directive_globl(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_IDENT, &token)) + return M_ERROR; + + directive->type = MIPS_DIRECTIVE_GLOBL; + strcpy(directive->name, token.text); + + return M_SUCCESS; +} + +static int parse_section(struct mips_directive *directive, + char name[MAX_LEX_LENGTH]) +{ + directive->type = MIPS_DIRECTIVE_SECTION; + strcpy(directive->name, name); + + return M_SUCCESS; +} + +static int parse_directive(struct parser *parser, + struct mips_directive *directive) +{ + struct token token; + if (assert_token(parser, TOK_DIRECTIVE, &token)) + return M_ERROR; + + // .align n + if (strcmp(token.text, "align") == 0) + return parse_directive_align(parser, directive); + else if (strcmp(token.text, "space") == 0) + return parse_directive_space(parser, directive); + else if (strcmp(token.text, "word") == 0) + return parse_directive_whb(parser, directive, + MIPS_DIRECTIVE_WORD); + else if (strcmp(token.text, "half") == 0) + return parse_directive_whb(parser, directive, + MIPS_DIRECTIVE_HALF); + else if (strcmp(token.text, "byte") == 0) + return parse_directive_whb(parser, directive, + MIPS_DIRECTIVE_BYTE); + else if (strcmp(token.text, "extern") == 0) + return parse_directive_extern(parser, directive); + else if (strcmp(token.text, "globl") == 0) + return parse_directive_globl(parser, directive); + else + return parse_section(directive, token.text); +} + static int parse_constant(struct parser *parser, struct const_expr *expr, struct token ident) { @@ -96,7 +946,7 @@ static int parser_handle_ident(struct parser *parser, struct expr *expr) return parse_constant(parser, &expr->constant, ident); } else { expr->type = EXPR_INS; - return parser->parse_instruction(parser, &expr->ins, ident); + return parse_instruction(parser, &expr->ins, ident); } } @@ -108,7 +958,7 @@ static int parse_label(struct parser *parser, if (assert_token(parser, TOK_LABEL, &token)) return M_ERROR; - strcpy(expr->text, token.text); + strcpy(expr->label, token.text); return M_SUCCESS; } @@ -139,8 +989,7 @@ again: case TOK_DIRECTIVE: expr->type = EXPR_DIRECTIVE; - res = parser->parse_directive(parser, - &expr->directive); + res = parse_directive(parser, &expr->directive); break; case TOK_IDENT: @@ -161,16 +1010,11 @@ int parser_init(struct lexer *lexer, struct parser *parser) { parser->lexer = lexer; parser->peek.type = TOK_EOF; - if (sectbl_init(&parser->sec_tbl)) - return M_ERROR; - if (reftbl_init(&parser->ref_tbl)) - return M_ERROR; return M_SUCCESS; } void parser_free(struct parser *parser) { - sectbl_free(&parser->sec_tbl); - reftbl_free(&parser->ref_tbl); + (void) parser; } diff --git a/masm/parse.h b/masm/parse.h index ea8f929..9181899 100644 --- a/masm/parse.h +++ b/masm/parse.h @@ -9,135 +9,68 @@ #include #include -struct const_expr { - char name[MAX_LEX_LENGTH]; - uint32_t value; -}; - -enum expr_type { - EXPR_INS, - EXPR_DIRECTIVE, - EXPR_CONSTANT, - EXPR_LABEL, -}; - -struct expr { - enum expr_type type; - union { - // instruction - union mips_instruction ins; - // directive - union mips_directive directive; - // constant - struct const_expr constant; - // segment or label - char text[MAX_LEX_LENGTH]; - }; -}; - -enum section_entry_type { - ENT_INS, - ENT_WORD, - ENT_HALF, - ENT_BYTE, - ENT_NO_DATA, -}; - -struct section_entry { - enum section_entry_type type; - size_t size; - - union { - char data; // to get memory address - union mips_instruction ins; - int32_t word; - int16_t half; - int8_t byte; - }; -}; - -struct section { - uint32_t count; - uint32_t len; - uint32_t alignment; - uint32_t index; // what index is my section - char name[MAX_LEX_LENGTH]; - bool read; - bool write; - bool execute; - struct section_entry *entries; -}; - -struct section_table { - uint32_t count; - uint32_t len; - struct section *sections; - struct section *current; - char name[MAX_LEX_LENGTH]; -}; - -int sectbl_init(struct section_table *sec_tbl); -void sectbl_free(struct section_table *sec_tbl); - -int sectbl_alloc(struct section_table *sec_tbl, struct section **sec, - const char name[MAX_LEX_LENGTH]); -int sectbl_get(struct section_table *sec_tbl, struct section **sec, - const char name[MAX_LEX_LENGTH]); -int sec_push(struct section *section, struct section_entry entry); -size_t sec_size(struct section *section); -size_t sec_index(struct section *section, uint32_t index); +/// +/// reference +/// enum reference_type { + REF_NONE, REF_OFFESET, REF_TARGET, }; struct reference { enum reference_type type; - struct section *section; - uint32_t index; + + /// symbol name char name[MAX_LEX_LENGTH]; + + /// integer addend + int64_t addend; }; -struct reference_table { - uint32_t count; - uint32_t len; - struct reference *references; +struct const_expr { + char name[MAX_LEX_LENGTH]; + uint32_t value; }; -int reftbl_init(struct reference_table *ref_tbl); -void reftbl_free(struct reference_table *ref_tbl); -int reftbl_push(struct reference_table *ref_tbl, struct reference reference); +struct ins_expr { + /// pesudo instructions can return + /// more than one instruction + size_t ins_len; + struct mips_instruction ins[2]; + + /// instructions can reference symbols. + /// instruction `n` will be paried with reference `n` + struct reference ref[2]; +}; + +enum expr_type { + EXPR_DIRECTIVE, + EXPR_CONSTANT, + EXPR_INS, + EXPR_LABEL, +}; + +struct expr { + enum expr_type type; + union { + // directive + struct mips_directive directive; + // constant + struct const_expr constant; + // instruction + struct ins_expr ins; + // label + char label[MAX_LEX_LENGTH]; + }; +}; struct parser { struct lexer *lexer; struct token peek; - - // sections - struct section_table sec_tbl; - - // references - struct reference_table ref_tbl; - - int (*parse_instruction)(struct parser *, union mips_instruction *, - struct token); - int (*parse_directive)(struct parser *, union mips_directive *); - int (*is_instruction)(const char *ident); }; -/* get the next token in the parser */ -int next_token(struct parser *parser, struct token *tok); - -/* peek the next token in the parser */ -int peek_token(struct parser *parser, struct token *tok); - -/* assert the next token is a specific type */ -int assert_token(struct parser *parser, enum token_type type, - struct token *tok); - -/* assert the next token is EOF or NL */ -int assert_eol(struct parser *parser); - /* get the next expression in the parser */ int parser_next(struct parser *parser, struct expr *expr); diff --git a/masm/parse_mips32.c b/masm/parse_mips32.c deleted file mode 100644 index db7f346..0000000 --- a/masm/parse_mips32.c +++ /dev/null @@ -1,872 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "parse_mips32.h" -#include "parse.h" -#include "mlimits.h" -#include "parse.h" -#include "lex.h" - -/* each instruction has a given parse format - * internal to the parser */ -enum mips32_parse_format { - // register type: rs, rt, td - MIPS32_PARSE_R, - // register type: rs, rt - MIPS32_PARSE_R2, - // register type: rd - MIPS32_PARSE_RD, - // register type: rs - MIPS32_PARSE_RS, - // imeediate type: rs, rt, immd - MIPS32_PARSE_I, - // jump type: offset - MIPS32_PARSE_J, - // jump type: register - MIPS32_PARSE_JR, - // offset 16b type: offset - MIPS32_PARSE_O16, - // offset 26b type: offset - MIPS32_PARSE_O26, - // breanch equal type: rs, rt, offset - MIPS32_PARSE_BE, - // branch zero type: rs, offset - MIPS32_PARSE_BZ, - // store and load: rt, offset(base) - MIPS32_PARSE_SL, - // store and load immediate: rt, immediate - MIPS32_PARSE_SLI, - // shift: rd, rt, sa - MIPS32_PARSE_S, - // shift variable: rd, rt, rs - MIPS32_PARSE_SV, - // none: - MIPS32_PARSE_NONE, -}; - -#define FORMAT(ins, format) \ - [MIPS32_INS_##ins] = MIPS32_PARSE_##format, \ - -const enum mips32_parse_format mips32_parse_formats[] = { - FORMAT(ADD, R) - FORMAT(ADDI, I) - FORMAT(ADDIU, I) - FORMAT(ADDU, R) - FORMAT(AND, R) - FORMAT(ANDI, I) - FORMAT(BAL, O16) - FORMAT(BALC, O26) - FORMAT(BC, O26) - FORMAT(BEQ, BE) - FORMAT(BEQL, BE) - FORMAT(BGEZ, BZ) - FORMAT(BGEZAL, BZ) - FORMAT(BGEZALL, BZ) - FORMAT(BGEZL, BZ) - FORMAT(BGTZ, BZ) - FORMAT(BGTZL, BZ) - FORMAT(BLEZ, BZ) - FORMAT(BLEZL, BZ) - FORMAT(BLTZ, BZ) - FORMAT(BLTZAL, BZ) - FORMAT(BLTZALL, BZ) - FORMAT(BLTZL, BZ) - FORMAT(BNE, BE) - FORMAT(BNEL, BE) - FORMAT(DDIV, R2) - FORMAT(DDIVU, R2) - FORMAT(DIV, R2) - FORMAT(DIVU, R2) - FORMAT(J, J) - FORMAT(JAL, J) - FORMAT(JALR, JR) // TODO: handle rd - FORMAT(JALX, J) - FORMAT(JR, JR) - FORMAT(LB, SL) - FORMAT(LBU, SL) - FORMAT(LH, SL) - FORMAT(LHU, SL) - FORMAT(LUI, SLI) - FORMAT(LW, SL) - FORMAT(LWL, SL) - FORMAT(LWR, SL) - FORMAT(MFHI, RD) - FORMAT(MFLO, RD) - FORMAT(MTHI, RS) - FORMAT(MTLO, RS) - FORMAT(MULT, R2) - FORMAT(MULTU, R2) - FORMAT(SB, SL) - FORMAT(SH, SL) - FORMAT(SW, SL) - FORMAT(SWL, SL) - FORMAT(SLL, S) - FORMAT(SLLV, SV) - FORMAT(SLT, R) - FORMAT(SLTI, I) - FORMAT(SLTIU, I) - FORMAT(SLTU, R) - FORMAT(SRA, S) - FORMAT(SRAV, SV) - FORMAT(SRL, S) - FORMAT(SRLV, SV) - FORMAT(SYSCALL, NONE) - FORMAT(OR, R) - FORMAT(ORI, I) - FORMAT(NOR, R) - FORMAT(SUB, R) - FORMAT(SUBU, R) - FORMAT(XOR, R) - FORMAT(XORI, I) -}; - -#undef FORMAT - -#define MAX5 32 -#define MAX16 65536 -#define MAX26 67108864 - -static int get_reference(struct parser *parser, uint32_t *offset, - enum reference_type type) -{ - struct token token; - - if (next_token(parser, &token)) - return M_ERROR; - - if (token.type == TOK_NUMBER) { - *offset = token.number; - return M_SUCCESS; - } - - if (token.type != TOK_IDENT) { - ERROR_POS(token, "unexpected token of type '%s'", - token_str(token.type)); - return M_ERROR; - } - - struct reference reference = { - .section = parser->sec_tbl.current, - .index = parser->sec_tbl.current->count, - .type = type, - }; - strcpy(reference.name, token.text); - - if (reftbl_push(&parser->ref_tbl, reference)) - return M_ERROR; - - *offset = 0; - - return M_SUCCESS; -} - -static int get_offset(struct parser *parser, uint32_t *offset) -{ - return get_reference(parser, offset, REF_OFFESET); -} - -static int get_target(struct parser *parser, uint32_t *offset) -{ - return get_reference(parser, offset, REF_TARGET); -} - -static int get_instruction(const char *ident, struct mips32_instruction *res) -{ - for (int i = 0; i < __MIPS32_INS_LEN; i++) { - struct mips32_instruction ins = - mips32_instructions[i]; - if (strcasecmp(ident, ins.name) == 0) { - if (res != NULL) - *res = ins; - return M_SUCCESS; - } - } - return M_ERROR; -} - -static int is_instruction(const char *ident) -{ - return get_instruction(ident, NULL); -} - -static int parse_register(struct parser *parser, enum mips32_register *reg) -{ - struct token token; - if (assert_token(parser, TOK_REG, &token)) - return M_ERROR; - - int len = strlen(token.text); - int c0 = len > 0 ? token.text[0] : '\0', - c1 = len > 1 ? token.text[1] : '\0', - c2 = len > 2 ? token.text[2] : '\0', - c3 = len > 3 ? token.text[3] : '\0'; - - // $zero - if (c0 == 'z') { - if (c1 == 'e' && c2 == 'r' && c3 == 'o') { - *reg = MIPS32_REG_ZERO; - return M_SUCCESS; - } - } - - // $a0-a3 $at - else if (c0 == 'a') { - if (c1 == 't') { - *reg = MIPS32_REG_AT; - return M_SUCCESS; - } - if (c1 >= '0' && c1 <= '3') { - *reg = MIPS32_REG_A0; - *reg += c1 - '0'; - return M_SUCCESS; - } - } - - // $v0-v1 - else if (c0 == 'v') { - if (c1 >= '0' && c1 <= '1') { - *reg = MIPS32_REG_V0; - *reg += c1 - '0'; - return M_SUCCESS; - } - } - - // $t0-t9 - else if (c0 == 't') { - if (c1 >= '0' && c1 <= '7') { - *reg = MIPS32_REG_T0; - *reg += c1 - '0'; - return M_SUCCESS; - } - // reg T8-T9 are not in order with T0-T7 - if (c1 >= '8' && c1 <= '9') { - *reg = MIPS32_REG_T8; - *reg += c1 - '8'; - return M_SUCCESS; - } - } - - // $s0-s7 $sp - else if (c0 == 's') { - if (c1 >= '0' && c1 <= '7') { - *reg = MIPS32_REG_S0; - *reg += c1 - '0'; - return M_SUCCESS; - } - if (c1 == 'p') { - *reg = MIPS32_REG_SP; - return M_SUCCESS; - } - } - - // $k0-k1 - else if (c0 == 'k') { - if (c1 >= '0' && c1 <= '1') { - *reg = MIPS32_REG_K0; - *reg += c1 - '0'; - return M_SUCCESS; - } - } - - // $gp - else if (c0 == 'g') { - if (c1 == 'p') { - *reg = MIPS32_REG_GP; - return M_SUCCESS; - } - } - - // $fp - else if (c0 == 'f') { - if (c1 == 'p') { - *reg = MIPS32_REG_FP; - return M_SUCCESS; - } - } - - // $rp - else if (c0 == 'r') { - if (c1 == 'p') { - *reg = MIPS32_REG_RA; - return M_SUCCESS; - } - } - - // $0-31 (non aliased register names) - else if (c0 >= '0' && c0 <= '9') { - int i = c0 - '0'; - if (c1 >= '0' && c1 <= '9') { - i *= 10; - i += c1 - '0'; - } - if (i <= 31) { - *reg = i; - return M_SUCCESS; - } - } - - ERROR_POS(token, "unknown register $%s", token.text); - return M_ERROR; -} - -static int parse_instruction_r(struct parser *parser, - struct mips32_instruction *ins) -{ - // format: rs, rt, rd - enum mips32_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rd = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rt = reg; - - return M_SUCCESS; -} - -static int parse_instruction_r2(struct parser *parser, - struct mips32_instruction *ins) -{ - // format: rs, rt - enum mips32_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rt = reg; - - return M_SUCCESS; -} - -static int parse_instruction_rs(struct parser *parser, - struct mips32_instruction *ins) -{ - // format: rs - enum mips32_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rs = reg; - - return M_SUCCESS; -} - -static int parse_instruction_rd(struct parser *parser, - struct mips32_instruction *ins) -{ - // format: rd - enum mips32_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rd = reg; - - return M_SUCCESS; -} - -static int parse_instruction_i(struct parser *parser, - struct mips32_instruction *ins) -{ - // format: rs, rt, immd - enum mips32_register reg; - struct token token; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->I_data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->I_data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - - if (token.number >= MAX16) - return M_ERROR; - ins->I_data.immd = token.number; - - return M_SUCCESS; -} - -static int parse_instruction_offset(struct parser *parser, - uint32_t max, - struct mips32_instruction *ins) -{ - uint32_t n; - if (get_offset(parser, &n) || n > max) - return M_ERROR; - - switch (max) { - case MAX26: - ins->J_data.target = n; - break; - case MAX16: - ins->B_data.offset = n; - break; - } - - return M_SUCCESS; -} - -static int parse_instruction_j(struct parser *parser, - struct mips32_instruction *ins) -{ - uint32_t n; - if (get_target(parser, &n) || n > MAX26) - return M_ERROR; - ins->J_data.target = n; - - return M_SUCCESS; -} - -static int parse_instruction_jr(struct parser *parser, - struct mips32_instruction *ins) -{ - uint32_t n; - if (get_target(parser, &n) || n > MAX26) - return M_ERROR; - ins->J_data.target = n; - - return M_SUCCESS; -} - -static int parse_instruction_branch_equal(struct parser *parser, - struct mips32_instruction *ins) -{ - enum mips32_register reg; - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rs = reg; - - return M_SUCCESS; -} - -static int parse_instruction_branch(struct parser *parser, - struct mips32_instruction *ins) -{ - enum mips32_register reg; - uint32_t n; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->B_data.rs = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (get_offset(parser, &n) || n > MAX16) - return M_ERROR; - ins->B_data.offset = n; - - return M_SUCCESS; -} - -static int parse_instruction_sl(struct parser *parser, - struct mips32_instruction *ins) -{ - enum mips32_register reg; - uint32_t offset = 0; - struct token token; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->I_data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (peek_token(parser, &token)) - return M_ERROR; - - if (token.type != TOK_LPAREN) - if (get_offset(parser, &offset)) - return M_ERROR; - ins->I_data.immd = offset; - - if (peek_token(parser, &token)) - return M_ERROR; - - if (token.type == TOK_NL) { - ins->I_data.rs = MIPS32_REG_ZERO; - return M_SUCCESS; - } - - if (assert_token(parser, TOK_LPAREN, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->I_data.rs = reg; - - if (assert_token(parser, TOK_RPAREN, NULL)) - return M_ERROR; - - return M_SUCCESS; -} - -static int parse_instruction_sli(struct parser *parser, - struct mips32_instruction *ins) -{ - enum mips32_register reg; - struct token token; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->I_data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX16) - return M_ERROR; - ins->I_data.immd = token.number; - - return M_SUCCESS; -} - -static int parse_instruction_s(struct parser *parser, - struct mips32_instruction *ins) -{ - enum mips32_register reg; - struct token token; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rd = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX5) - return M_ERROR; - ins->R_data.shamt = token.number; - - return M_SUCCESS; -} - -static int parse_instruction_sv(struct parser *parser, - struct mips32_instruction *ins) -{ - enum mips32_register reg; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rd = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rt = reg; - - if (assert_token(parser, TOK_COMMA, NULL)) - return M_ERROR; - - if (parse_register(parser, ®)) - return M_ERROR; - ins->R_data.rs = reg; - - return M_SUCCESS; -} - -static int parse_instruction(struct parser *parser, - union mips_instruction *ins, - struct token ident) -{ - struct mips32_instruction instruction; - enum mips32_parse_format format; - int res = M_SUCCESS; - - if (get_instruction(ident.text, &instruction)) { - ERROR_POS(ident, "unknown instruction '%s'", ident.text); - return M_ERROR; - } - - ins->mips32 = instruction; - format = mips32_parse_formats[instruction.type]; - - switch (format) { - case MIPS32_PARSE_R: - res = parse_instruction_r(parser, &ins->mips32); - break; - case MIPS32_PARSE_R2: - res = parse_instruction_r2(parser, &ins->mips32); - break; - case MIPS32_PARSE_RS: - res = parse_instruction_rs(parser, &ins->mips32); - break; - case MIPS32_PARSE_RD: - res = parse_instruction_rd(parser, &ins->mips32); - break; - case MIPS32_PARSE_I: - res = parse_instruction_i(parser, &ins->mips32); - break; - case MIPS32_PARSE_J: - res = parse_instruction_j(parser, &ins->mips32); - break; - case MIPS32_PARSE_JR: - res = parse_instruction_jr(parser, &ins->mips32); - break; - case MIPS32_PARSE_O16: - res = parse_instruction_offset(parser, MAX16, &ins->mips32); - break; - case MIPS32_PARSE_O26: - res = parse_instruction_offset(parser, MAX26, &ins->mips32); - break; - case MIPS32_PARSE_BE: - res = parse_instruction_branch_equal(parser, &ins->mips32); - break; - case MIPS32_PARSE_BZ: - res = parse_instruction_branch(parser, &ins->mips32); - break; - case MIPS32_PARSE_SL: - res = parse_instruction_sl(parser, &ins->mips32); - break; - case MIPS32_PARSE_SLI: - res = parse_instruction_sli(parser, &ins->mips32); - break; - case MIPS32_PARSE_S: - res = parse_instruction_s(parser, &ins->mips32); - break; - case MIPS32_PARSE_SV: - res = parse_instruction_sv(parser, &ins->mips32); - break; - case MIPS32_PARSE_NONE: - res = M_SUCCESS; - break; - } - - if (res == M_SUCCESS && assert_eol(parser)) - return M_ERROR; - - return res; -} - - -static int parse_directive_align(struct parser *parser, - struct mips32_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - - if (token.number < 0) { - ERROR_POS(token, "cannot align negative"); - return M_ERROR; - } - - if (token.number > MAX16) { - ERROR_POS(token, "cannot align more than 65kb"); - return M_ERROR; - } - - directive->type = MIPS32_DIRECTIVE_ALIGN; - directive->align = token.number; - - return M_SUCCESS; -} - -static int parse_directive_space(struct parser *parser, - struct mips32_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - - if (token.number < 0) { - ERROR_POS(token, "cannot reserve negative"); - return M_ERROR; - } - - if (token.number > MAX16) { - ERROR_POS(token, "cannot reserve more than 65kb"); - return M_ERROR; - } - - directive->type = MIPS32_DIRECTIVE_SPACE; - directive->space = token.number; - - return M_SUCCESS; -} - -static int parse_directive_whb(struct parser *parser, - struct mips32_directive *directive, - enum mips32_directive_type type) -{ - struct token token; - uint32_t size = 0; - uint32_t len = 0; - - switch (type) { - case MIPS32_DIRECTIVE_WORD: - size = UINT32_MAX; - break; - case MIPS32_DIRECTIVE_HALF: - size = UINT16_MAX; - break; - case MIPS32_DIRECTIVE_BYTE: - size = UINT8_MAX; - break; - default: - } - - directive->type = type; - - while (1) { - if (assert_token(parser, TOK_NUMBER, &token)) - return M_ERROR; - - if (len >= MAX_ARG_LENGTH) { - ERROR_POS(token, "directives cannot be longer than " - "%d arguments", MAX_ARG_LENGTH); - return M_ERROR; - } - - if (token.number > size) { - ERROR_POS(token, "number cannot execede max size of: " - "%d", size); - return M_ERROR; - } - - switch (type) { - case MIPS32_DIRECTIVE_WORD: - directive->words[len++] = token.number; - - break; - case MIPS32_DIRECTIVE_HALF: - directive->halfs[len++] = token.number; - break; - case MIPS32_DIRECTIVE_BYTE: - directive->bytes[len++] = token.number; - break; - default: - } - - if (peek_token(parser, &token)) - return M_ERROR; - - if (token.type == TOK_COMMA) { - next_token(parser, NULL); - continue; - } - - break; - } - - directive->len = len; - - return M_SUCCESS; -} - -static int parse_directive_extern(struct parser *parser, - struct mips32_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_IDENT, &token)) - return M_ERROR; - - directive->type = MIPS32_DIRECTIVE_EXTERN; - strcpy(directive->name, token.text); - - return M_SUCCESS; -} - -static int parse_directive_globl(struct parser *parser, - struct mips32_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_IDENT, &token)) - return M_ERROR; - - directive->type = MIPS32_DIRECTIVE_GLOBL; - strcpy(directive->name, token.text); - - return M_SUCCESS; -} - -static int parse_section(struct mips32_directive *directive, - char name[MAX_LEX_LENGTH]) -{ - directive->type = MIPS32_DIRECTIVE_SECTION; - strcpy(directive->name, name); - - return M_SUCCESS; -} - -static int parse_directive(struct parser *parser, - union mips_directive *directive) -{ - struct token token; - if (assert_token(parser, TOK_DIRECTIVE, &token)) - return M_ERROR; - - // .align n - if (strcmp(token.text, "align") == 0) - return parse_directive_align(parser, &directive->mips32); - else if (strcmp(token.text, "space") == 0) - return parse_directive_space(parser, &directive->mips32); - else if (strcmp(token.text, "word") == 0) - return parse_directive_whb(parser, &directive->mips32, - MIPS32_DIRECTIVE_WORD); - else if (strcmp(token.text, "half") == 0) - return parse_directive_whb(parser, &directive->mips32, - MIPS32_DIRECTIVE_HALF); - else if (strcmp(token.text, "byte") == 0) - return parse_directive_whb(parser, &directive->mips32, - MIPS32_DIRECTIVE_BYTE); - else if (strcmp(token.text, "extern") == 0) - return parse_directive_extern(parser, &directive->mips32); - else if (strcmp(token.text, "globl") == 0) - return parse_directive_globl(parser, &directive->mips32); - else - return parse_section(&directive->mips32, token.text); -} - -void mips32_parser_init(struct parser *parser) -{ - parser->parse_instruction = parse_instruction; - parser->is_instruction = is_instruction; - parser->parse_directive = parse_directive; -} - -void mips32_parser_free(struct parser *parser) -{ - parser_free(parser); -} diff --git a/masm/parse_mips32.h b/masm/parse_mips32.h deleted file mode 100644 index 5262d68..0000000 --- a/masm/parse_mips32.h +++ /dev/null @@ -1,14 +0,0 @@ -/* Copyright (c) 2024 Freya Murphy */ - -#ifndef __PARSE_MIPS32_H__ -#define __PARSE_MIPS32_H__ - -#include "parse.h" - -/* initzlize a mips32 parser vtable */ -void mips32_parser_init(struct parser *parser); - -/* free the mips32 parser */ -void mips32_parser_free(struct parser *parser); - -#endif /* __PARSE_MIPS32_H__ */ diff --git a/masm/reftbl.c b/masm/reftbl.c deleted file mode 100644 index 198af83..0000000 --- a/masm/reftbl.c +++ /dev/null @@ -1,47 +0,0 @@ -#include -#include -#include -#include -#include - -#include "parse.h" - -#define RELTBL_INIT_LEN 8 - -int reftbl_init(struct reference_table *ref_tbl) -{ - ref_tbl->len = RELTBL_INIT_LEN; - ref_tbl->count = 0; - ref_tbl->references = malloc(sizeof(struct reference) * - RELTBL_INIT_LEN); - - if (ref_tbl->references == NULL) { - ERROR("cannot alloc"); - return M_ERROR; - } - - return M_SUCCESS; -} - -void reftbl_free(struct reference_table *ref_tbl) -{ - free(ref_tbl->references); -} - -int reftbl_push(struct reference_table *ref_tbl, struct reference reference) -{ - if (ref_tbl->count >= ref_tbl->len) { - ref_tbl->len *= 2; - ref_tbl->references = realloc(ref_tbl->references, - sizeof(struct reference) * ref_tbl->len); - - if (ref_tbl->references == NULL) { - ERROR("cannot realloc"); - return M_ERROR; - } - } - - ref_tbl->references[ref_tbl->count++] = reference; - - return M_SUCCESS; -} diff --git a/masm/reltab.c b/masm/reltab.c new file mode 100644 index 0000000..482ed44 --- /dev/null +++ b/masm/reltab.c @@ -0,0 +1,43 @@ +#include +#include +#include + +#include "asm.h" + +#define RELTAB_INIT_LEN 8 + +int reltab_init(struct relocation_table *reltab) +{ + reltab->size = RELTAB_INIT_LEN; + reltab->len = 0; + reltab->data = malloc(sizeof(Elf32_Rela) * RELTAB_INIT_LEN); + + if (reltab->data == NULL) { + ERROR("cannot alloc"); + return M_ERROR; + } + + return M_SUCCESS; +} + +void reltab_free(struct relocation_table *reltab) +{ + free(reltab->data); +} + +int reltab_push(struct relocation_table *reltab, const Elf32_Rela rel) +{ + if (reltab->len >= reltab->size) { + reltab->size *= 2; + reltab->data = realloc(reltab->data, sizeof(Elf32_Rela) + * reltab->size); + + if (reltab->data == NULL) { + ERROR("cannot realloc"); + return M_ERROR; + } + } + + reltab->data[reltab->len++] = rel; + return M_SUCCESS; +} diff --git a/masm/sectab.c b/masm/sectab.c new file mode 100644 index 0000000..d07399f --- /dev/null +++ b/masm/sectab.c @@ -0,0 +1,166 @@ +#include +#include +#include +#include +#include + +#include "asm.h" + +#define SECTBL_INIT_LEN 8 +static const char inital_section[MAX_LEX_LENGTH] = "data"; + +int sectab_init(struct section_table *sectab) +{ + sectab->size = SECTBL_INIT_LEN; + sectab->len = 0; + sectab->sections = malloc(sizeof(struct section) * SECTBL_INIT_LEN); + + if (sectab->sections == NULL) { + ERROR("cannot alloc"); + return M_ERROR; + } + + if (sectab_alloc(sectab, §ab->current, inital_section)) + return M_ERROR; + + return M_SUCCESS; +} + +void sectab_free(struct section_table *sectab) +{ + for (size_t i = 0; i < sectab->len; i++) { + reltab_free(§ab->sections[i].reltab); + free(sectab->sections[i].entries); + } + free(sectab->sections); +} + +struct section_settings { + const char *name; + bool read; + bool write; + bool execute; + size_t align; +}; + +static struct section_settings default_section_settings[] = { + {"data", true, true, false, 1}, + {"bss", true, true, false, 1}, + {"rodata", true, false, false, 1}, + {"text", true, false, true, 4}, +}; + +int sectab_alloc(struct section_table *sectab, struct section **res, + const char name[MAX_LEX_LENGTH]) +{ + if (sectab->len >= sectab->size) { + sectab->size *= 2; + sectab->sections = realloc(sectab->sections, + sizeof(struct section) * sectab->size); + + if (sectab->sections == NULL) { + ERROR("cannot realloc"); + return M_ERROR; + } + } + + /* set the sectio defaults */ + struct section *sec; + sec = §ab->sections[sectab->len]; + strcpy(sec->name,name); + sec->len = 0; + sec->size = SECTBL_INIT_LEN; + sec->alignment = 1; + sec->read = true; + sec->write = true; + sec->execute = false; + sec->index = sectab->len; + sec->entries = malloc(sizeof(struct section_entry) * SECTBL_INIT_LEN); + + if (reltab_init(&sec->reltab)) + return M_ERROR; + + /* overwrite the default if the given name has their own + * defaults */ + for (int i = 0; i < 4; i++) { + struct section_settings *set = &default_section_settings[i]; + if (strcmp(set->name, name) == 0) { + sec->read = set->read; + sec->write = set->write; + sec->execute = set->execute; + sec->alignment = set->align; + break; + } + } + + if (sec->entries == NULL) { + ERROR("cannot alloc"); + return M_ERROR; + } + + sectab->len++; + + *res = sec; + return M_SUCCESS; +} + +int sectab_get(struct section_table *sectab, struct section **sec, + const char name[MAX_LEX_LENGTH]) +{ + for (size_t i = 0; i < sectab->len; i++) { + struct section *temp = §ab->sections[i]; + if (strcmp(name, temp->name) == 0) { + if (sec != NULL) + *sec = temp; + return M_SUCCESS; + } + } + + return M_ERROR; +} + +int sec_push(struct section *section, struct section_entry entry) +{ + if (section->len >= section->size) { + section->size *= 2; + void *new = realloc(section->entries, + sizeof(struct section_entry) * section->size); + + if (new == NULL) { + ERROR("cannot realloc"); + return M_ERROR; + } + + section->entries = new; + } + + section->entries[section->len++] = entry; + + return M_SUCCESS; +} + +size_t sec_size(struct section *sec) +{ + size_t n = 0; + for (size_t i = 0; i < sec->len; i++) { + size_t t = sec->entries[i].size; + size_t m = t % sec->alignment; + if (m) + t += sec->alignment - m; + n += t; + } + return n; +} + +size_t sec_index(struct section *sec, size_t idx) +{ + size_t n = 0; + for (size_t i = 0; i < idx; i++) { + size_t t = sec->entries[i].size; + size_t m = t % sec->alignment; + if (m) + t += sec->alignment - m; + n += t; + } + return n; +} diff --git a/masm/sectbl.c b/masm/sectbl.c deleted file mode 100644 index 6eafc60..0000000 --- a/masm/sectbl.c +++ /dev/null @@ -1,159 +0,0 @@ -#include -#include -#include -#include -#include - -#include "parse.h" - -#define SECTBL_INIT_LEN 8 -static const char inital_section[MAX_LEX_LENGTH] = "data"; - -int sectbl_init(struct section_table *sec_tbl) -{ - sec_tbl->len = SECTBL_INIT_LEN; - sec_tbl->count = 0; - sec_tbl->sections = malloc(sizeof(struct section) * SECTBL_INIT_LEN); - - if (sec_tbl->sections == NULL) { - ERROR("cannot alloc"); - return M_ERROR; - } - - if (sectbl_alloc(sec_tbl, &sec_tbl->current, inital_section)) - return M_ERROR; - - return M_SUCCESS; -} - -void sectbl_free(struct section_table *sec_tbl) -{ - for (uint32_t i = 0; i < sec_tbl->count; i++) { - free(sec_tbl->sections[i].entries); - } - free(sec_tbl->sections); -} - -struct section_settings { - const char *name; - bool read; - bool write; - bool execute; - uint32_t align; -}; - -static struct section_settings default_section_settings[] = { - {"data", true, true, false, 1}, - {"bss", true, true, false, 1}, - {"rodata", true, false, false, 1}, - {"text", true, false, true, 4}, -}; - -int sectbl_alloc(struct section_table *sec_tbl, struct section **sec, - const char name[MAX_LEX_LENGTH]) -{ - if (sec_tbl->count >= sec_tbl->len) { - sec_tbl->len *= 2; - sec_tbl->sections = realloc(sec_tbl->sections, - sizeof(struct section) * sec_tbl->len); - - if (sec_tbl->sections == NULL) { - ERROR("cannot realloc"); - return M_ERROR; - } - } - - struct section *temp; - temp = &sec_tbl->sections[sec_tbl->count]; - strcpy(temp->name,name); - temp->count = 0; - temp->len = SECTBL_INIT_LEN; - temp->alignment = 1; - temp->read = true; - temp->write = true; - temp->execute = false; - temp->index = sec_tbl->count; - temp->entries = malloc(sizeof(struct section_entry) * SECTBL_INIT_LEN); - - for (int i = 0; i < 4; i++) { - struct section_settings *set = &default_section_settings[i]; - if (strcmp(set->name, name) == 0) { - temp->read = set->read; - temp->write = set->write; - temp->execute = set->execute; - temp->alignment = set->align; - break; - } - } - - if (temp->entries == NULL) { - ERROR("cannot alloc"); - return M_ERROR; - } - - sec_tbl->count++; - - *sec = temp; - return M_SUCCESS; -} - -int sectbl_get(struct section_table *sec_tbl, struct section **sec, - const char name[MAX_LEX_LENGTH]) -{ - for (uint32_t i = 0; i < sec_tbl->count; i++) { - struct section *temp = &sec_tbl->sections[i]; - if (strcmp(name, temp->name) == 0) { - if (sec != NULL) - *sec = temp; - return M_SUCCESS; - } - } - - return M_ERROR; -} - -int sec_push(struct section *section, struct section_entry entry) -{ - if (section->count >= section->len) { - section->len *= 2; - void *new = realloc(section->entries, - sizeof(struct section_entry) * section->len); - - if (new == NULL) { - ERROR("cannot realloc"); - return M_ERROR; - } - - section->entries = new; - } - - section->entries[section->count++] = entry; - - return M_SUCCESS; -} - -size_t sec_size(struct section *sec) -{ - size_t n = 0; - for (uint32_t i = 0; i < sec->count; i++) { - size_t t = sec->entries[i].size; - size_t m = t % sec->alignment; - if (m) - t += sec->alignment - m; - n += t; - } - return n; -} - -size_t sec_index(struct section *sec, uint32_t idx) -{ - size_t n = 0; - for (uint32_t i = 0; i < idx; i++) { - size_t t = sec->entries[i].size; - size_t m = t % sec->alignment; - if (m) - t += sec->alignment - m; - n += t; - } - return n; -} diff --git a/masm/strtab.c b/masm/strtab.c new file mode 100644 index 0000000..57d3d0e --- /dev/null +++ b/masm/strtab.c @@ -0,0 +1,54 @@ +#include +#include +#include + +#include "asm.h" + +int strtab_get_str(struct str_table *strtab, const char *str, size_t *res) +{ + for (size_t i = 0; i < strtab->size; i ++) { + if (strcmp(strtab->ptr + i, str) == 0) { + if (res != NULL) + *res = i; + return M_SUCCESS; + } + } + + return M_ERROR; +} + +int strtab_write_str(struct str_table *strtab, const char *str, size_t *res) +{ + if (strtab_get_str(strtab, str, res) == M_SUCCESS) + return M_SUCCESS; + + size_t len = strlen(str); + char *new = realloc(strtab->ptr, strtab->size + len + 1); + if (new == NULL) + return M_ERROR; + strtab->ptr = new; + memcpy(strtab->ptr + strtab->size, str, len + 1); + + if (res != NULL) + *res = strtab->size; + + strtab->size += len + 1; + return M_SUCCESS; +} + +int strtab_init(struct str_table *strtab) +{ + strtab->size = 1; + strtab->ptr = malloc(1); + if (strtab->ptr == NULL) { + ERROR("cannot alloc"); + return M_ERROR; + } + *strtab->ptr = '\0'; + return M_SUCCESS; +} + +void strtab_free(struct str_table *strtab) +{ + free(strtab->ptr); +} diff --git a/masm/strtbl.c b/masm/strtbl.c deleted file mode 100644 index 7bdbbea..0000000 --- a/masm/strtbl.c +++ /dev/null @@ -1,54 +0,0 @@ -#include -#include -#include - -#include "asm.h" - -int strtbl_get_str(struct str_table *str_tbl, const char *str, size_t *res) -{ - for (size_t i = 0; i < str_tbl->size; i ++) { - if (strcmp(str_tbl->ptr + i, str) == 0) { - if (res != NULL) - *res = i; - return M_SUCCESS; - } - } - - return M_ERROR; -} - -int strtbl_write_str(struct str_table *str_tbl, const char *str, size_t *res) -{ - if (strtbl_get_str(str_tbl, str, res) == M_SUCCESS) - return M_SUCCESS; - - size_t len = strlen(str); - char *new = realloc(str_tbl->ptr, str_tbl->size + len + 1); - if (new == NULL) - return M_ERROR; - str_tbl->ptr = new; - memcpy(str_tbl->ptr + str_tbl->size, str, len + 1); - - if (res != NULL) - *res = str_tbl->size; - - str_tbl->size += len + 1; - return M_SUCCESS; -} - -int strtbl_init(struct str_table *str_tbl) -{ - str_tbl->size = 1; - str_tbl->ptr = malloc(1); - if (str_tbl->ptr == NULL) { - ERROR("cannot alloc"); - return M_ERROR; - } - *str_tbl->ptr = '\0'; - return M_SUCCESS; -} - -void strtbl_free(struct str_table *str_tbl) -{ - free(str_tbl->ptr); -} diff --git a/masm/symtab.c b/masm/symtab.c new file mode 100644 index 0000000..7d40609 --- /dev/null +++ b/masm/symtab.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include +#include + +#include "asm.h" + +#define SYMTBL_INIT_LEN 24 + +int symtab_init(struct symbol_table *symtab) +{ + symtab->size = SYMTBL_INIT_LEN; + symtab->len = 0; + symtab->symbols = malloc(sizeof(Elf32_Sym) * SYMTBL_INIT_LEN); + symtab->sections = malloc(sizeof(ssize_t) * SYMTBL_INIT_LEN); + + if (symtab->symbols == NULL || symtab->sections == NULL) { + ERROR("cannot alloc"); + return M_ERROR; + } + + return M_SUCCESS; +} + +void symtab_free(struct symbol_table *symtab) +{ + free(symtab->symbols); + free(symtab->sections); +} + +int symtab_push(struct symbol_table *symtab, Elf32_Sym sym, ssize_t sec_idx) +{ + if (symtab->len >= symtab->size) { + symtab->size *= 2; + symtab->symbols = realloc(symtab->symbols, + sizeof(Elf32_Sym) * symtab->size); + symtab->sections = realloc(symtab->sections, + sizeof(ssize_t) * symtab->size); + if (symtab->symbols == NULL || symtab->sections == NULL) { + ERROR("cannot realloc"); + return M_ERROR; + } + } + + symtab->symbols[symtab->len] = sym; + symtab->sections[symtab->len++] = sec_idx; + return M_SUCCESS; +} + +int symtab_find(struct symbol_table *symtab, Elf32_Sym **ptr, + size_t *idx, const char name[MAX_LEX_LENGTH]) +{ + for (uint32_t i = 0; i < symtab->len; i++) { + Elf32_Sym *sym = &symtab->symbols[i]; + const char *str = &symtab->strtab->ptr[sym->st_name]; + if (strcmp(str, name) == 0) { + if (ptr != NULL) + *ptr = sym; + + ptrdiff_t diff = sym - symtab->symbols; + if (idx != NULL) + *idx = diff / sizeof(Elf32_Sym); + + return M_SUCCESS; + } + } + return M_ERROR; +} diff --git a/masm/symtbl.c b/masm/symtbl.c deleted file mode 100644 index 8aa7bcf..0000000 --- a/masm/symtbl.c +++ /dev/null @@ -1,57 +0,0 @@ -#include -#include -#include -#include - -#include "asm.h" - -#define SYMTBL_INIT_LEN 24 - -int symtbl_init(struct symbol_table *sym_tbl) -{ - sym_tbl->len = SYMTBL_INIT_LEN; - sym_tbl->count = 0; - sym_tbl->symbols = malloc(sizeof(struct symbol) * SYMTBL_INIT_LEN); - - if (sym_tbl->symbols == NULL) { - ERROR("cannot alloc"); - return M_ERROR; - } - - return M_SUCCESS; -} - -void symtbl_free(struct symbol_table *sym_tbl) -{ - free(sym_tbl->symbols); -} - -int symtbl_push(struct symbol_table *sym_tbl, struct symbol sym) -{ - if (sym_tbl->count >= sym_tbl->len) { - sym_tbl->len *= 2; - sym_tbl->symbols = realloc(sym_tbl->symbols, - sizeof(struct symbol) * sym_tbl->len); - if (sym_tbl->symbols == NULL) { - ERROR("cannot relloc"); - return M_ERROR; - } - } - - sym_tbl->symbols[sym_tbl->count++] = sym; - return M_SUCCESS; -} - -int symtbl_find(struct symbol_table *sym_tbl, struct symbol **ptr, - const char name[MAX_LEX_LENGTH]) -{ - for (uint32_t i = 0; i < sym_tbl->count; i++) { - struct symbol *sym = &sym_tbl->symbols[i]; - if (strcmp(sym->name, name) == 0) { - if (ptr != NULL) - *ptr = sym; - return M_SUCCESS; - } - } - return M_ERROR; -}