refactor masm to add codegen step

This commit is contained in:
Freya Murphy 2024-10-04 19:41:10 -04:00
parent b2ca1c9e98
commit 1c11a13ff3
Signed by: freya
GPG key ID: 744AB800E383AE52
23 changed files with 2793 additions and 2679 deletions

View file

@ -32,7 +32,7 @@ static const Elf32_Ehdr MIPS_ELF_EHDR =
.e_machine = B16(EM_MIPS), .e_machine = B16(EM_MIPS),
.e_version = B32(EV_CURRENT), .e_version = B32(EV_CURRENT),
.e_entry = 0x00, .e_entry = 0x00,
.e_flags = B32(EF_MIPS_ARCH_32R6), .e_flags = 0x00, // B32(EF_MIPS_ARCH_32R6),
.e_ehsize = B16(sizeof(Elf32_Ehdr)), .e_ehsize = B16(sizeof(Elf32_Ehdr)),
.e_phentsize = B16(sizeof(Elf32_Phdr)), .e_phentsize = B16(sizeof(Elf32_Phdr)),
.e_shentsize = B16(sizeof(Elf32_Shdr)), .e_shentsize = B16(sizeof(Elf32_Shdr)),

View file

@ -8,8 +8,8 @@
/* Error codes /* Error codes
*/ */
#define M_SUCCESS 0 #define M_SUCCESS 0
#define M_EOF 1 #define M_ERROR 1
#define M_ERROR -1 #define M_EOF 2
#define __DEBUG 1 #define __DEBUG 1
#define __WARNING 2 #define __WARNING 2

View file

@ -1,467 +0,0 @@
/* Copyright (c) 2024 Freya Murphy */
#ifndef __MIPS_H__
#define __MIPS_H__
#include <mlimits.h>
#include <stdint.h>
/* all mips registers $0-$31 */
enum mips_register {
MIPS_REG_ZERO = 0,
MIPS_REG_AT = 1,
MIPS_REG_V0 = 2,
MIPS_REG_V1 = 3,
MIPS_REG_A0 = 4,
MIPS_REG_A1 = 5,
MIPS_REG_A2 = 6,
MIPS_REG_A3 = 7,
MIPS_REG_T0 = 8,
MIPS_REG_T1 = 9,
MIPS_REG_T2 = 10,
MIPS_REG_T3 = 11,
MIPS_REG_T4 = 12,
MIPS_REG_T5 = 13,
MIPS_REG_T6 = 14,
MIPS_REG_T7 = 15,
MIPS_REG_S0 = 16,
MIPS_REG_S1 = 17,
MIPS_REG_S2 = 18,
MIPS_REG_S3 = 19,
MIPS_REG_S4 = 20,
MIPS_REG_S5 = 21,
MIPS_REG_S6 = 22,
MIPS_REG_S7 = 23,
MIPS_REG_T8 = 24,
MIPS_REG_T9 = 25,
MIPS_REG_K0 = 26,
MIPS_REG_K1 = 27,
MIPS_REG_GP = 28,
MIPS_REG_SP = 29,
MIPS_REG_FP = 30,
MIPS_REG_RA = 31,
};
/* mips instructions */
enum mips_instruction_type {
MIPS_INS_ADD,
MIPS_INS_ADDI,
MIPS_INS_ADDIU,
MIPS_INS_ADDU,
MIPS_INS_AND,
MIPS_INS_ANDI,
MIPS_INS_BAL,
MIPS_INS_BALC,
MIPS_INS_BC,
MIPS_INS_BEQ,
MIPS_INS_BEQL,
MIPS_INS_BGEZ,
MIPS_INS_BGEZAL,
MIPS_INS_BGEZALL,
MIPS_INS_BGEZL,
MIPS_INS_BGTZ,
MIPS_INS_BGTZL,
MIPS_INS_BLEZ,
MIPS_INS_BLEZL,
MIPS_INS_BLTZ,
MIPS_INS_BLTZAL,
MIPS_INS_BLTZALL,
MIPS_INS_BLTZL,
MIPS_INS_BNE,
MIPS_INS_BNEL,
MIPS_INS_DIV,
MIPS_INS_MOD,
MIPS_INS_DIVU,
MIPS_INS_MODU,
MIPS_INS_J,
MIPS_INS_JAL,
MIPS_INS_JALR,
MIPS_INS_JALX,
MIPS_INS_JR,
MIPS_INS_LB,
MIPS_INS_LBU,
MIPS_INS_LH,
MIPS_INS_LHU,
MIPS_INS_LUI,
MIPS_INS_LW,
MIPS_INS_MFHI,
MIPS_INS_MFLO,
MIPS_INS_MTHI,
MIPS_INS_MTLO,
MIPS_INS_MUL,
MIPS_INS_MUH,
MIPS_INS_MULU,
MIPS_INS_MUHU,
MIPS_INS_SB,
MIPS_INS_SH,
MIPS_INS_SW,
MIPS_INS_SLL,
MIPS_INS_SLLV,
MIPS_INS_SLT,
MIPS_INS_SLTI,
MIPS_INS_SLTIU,
MIPS_INS_SLTU,
MIPS_INS_SRA,
MIPS_INS_SRAV,
MIPS_INS_SRL,
MIPS_INS_SRLV,
MIPS_INS_SUB,
MIPS_INS_SUBU,
MIPS_INS_SYSCALL,
MIPS_INS_OR,
MIPS_INS_ORI,
MIPS_INS_NOR,
MIPS_INS_XOR,
MIPS_INS_XORI,
// gets the size of the enum
__MIPS_INS_LEN,
};
union mips_instruction_data {
/* raw ins */
uint32_t raw : 32;
/* register type */
struct {
uint32_t funct : 6;
uint32_t shamt : 5;
uint32_t rd : 5;
uint32_t rt : 5;
uint32_t rs : 5;
uint32_t op : 6;
};
/* immediate type */
struct {
uint32_t immd : 16;
uint32_t : 16;
};
/* jump type */
struct {
uint32_t target : 26;
uint32_t : 6;
};
/* branch compact */
struct {
int32_t offs26 : 26;
uint32_t : 6;
};
/* branch */
struct {
int32_t offset : 16;
uint32_t bfunct : 5;
uint32_t : 11;
};
} __attribute__((packed));
/* mips instruction information */
struct mips_instruction {
// metadata
enum mips_instruction_type type;
const char *name;
// data
union mips_instruction_data data;
};
#define MIPS_INS(ins, ...) \
[MIPS_INS_ ##ins] = { \
MIPS_INS_ ##ins, \
#ins, \
.data = { __VA_ARGS__ } \
}, \
static const struct mips_instruction mips_instructions[] = {
/* ADD - add */
#define MIPS_OP_SPECIAL 0b000000
#define MIPS_FUNCT_ADD 0b100000
MIPS_INS(ADD, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_ADD)
/* ADDI - add immediate */
#define MIPS_OP_ADDI 0b001000
MIPS_INS(ADDI, .op = MIPS_OP_ADDI)
/* ADDIU - add immediate unsigned */
#define MIPS_OP_ADDIU 0b001001
MIPS_INS(ADDIU, .op = MIPS_OP_ADDIU)
/* ADDU - add unsigned */
#define MIPS_FUNCT_ADDU 0b100001
MIPS_INS(ADDU, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_ADDU)
/* AND - and */
#define MIPS_FUNCT_AND 0b100100
MIPS_INS(AND, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_AND)
/* ANDI - and immediate */
#define MIPS_OP_ANDI 0b001100
MIPS_INS(ANDI, .op = MIPS_OP_ANDI)
/* BAL - branch and link */
#define MIPS_OP_REGIMM 0b000001
#define MIPS_FUNCT_BAL 0b10001
MIPS_INS(BAL, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BAL)
/* BALC - branch and link, compact */
#define MIPS_OP_BALC 0b111010
MIPS_INS(BALC, .op = MIPS_OP_BALC)
/* BC - branch, compact */
#define MIPS_OP_BC 0b110010
MIPS_INS(BC, .op = MIPS_OP_BC)
/* BEQ - branch on equal */
#define MIPS_OP_BEQ 0b000100
MIPS_INS(BEQ, .op = MIPS_OP_BEQ)
/* BEQL - branch on equal likely */
#define MIPS_OP_BEQL 0b010100
MIPS_INS(BEQL, .op = MIPS_OP_BEQL)
/* BGEZ - branch on greater than or equal to zero */
#define MIPS_FUNCT_BGEZ 0b00001
MIPS_INS(BGEZ, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BGEZ)
/* BGEZAL - branch on greater than or equal to zero and link */
#define MIPS_FUNCT_BGEZAL 0b10001
MIPS_INS(BGEZAL, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BGEZAL)
/* BGEZAL - branch on greater than or equal to zero and link likely */
#define MIPS_FUNCT_BGEZALL 0b10011
MIPS_INS(BGEZALL, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BGEZALL)
/* BGEZL - branch on greater than or equal to zero likely */
#define MIPS_FUNCT_BGEZL 0b00011
MIPS_INS(BGEZL, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BGEZL)
/* BGTZ - branch on greater than zero */
#define MIPS_OP_BGTZ 0b000111
MIPS_INS(BGTZ, .op = MIPS_OP_BGTZ)
/* BGTZL - branch on greater than zero likely */
#define MIPS_OP_BGTZL 0b010111
MIPS_INS(BGTZL, .op = MIPS_OP_BGTZL)
/* BLEZ - branch on less than or equal to zero */
#define MIPS_OP_BLEZ 0b000110
MIPS_INS(BLEZ, .op = MIPS_OP_BLEZ)
/* BLEZL - branch on less than or equal to zero likely */
#define MIPS_OP_BLEZL 0b010110
MIPS_INS(BLEZL, .op = MIPS_OP_BLEZL)
/* BLTZ - branch on less than zero */
#define MIPS_FUNCT_BLTZ 0b00000
MIPS_INS(BLTZ, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BLTZ)
/* BLTZAL - branch on less than zero and link */
#define MIPS_FUNCT_BLTZAL 0b10000
MIPS_INS(BLTZAL, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BLTZAL)
/* BLTZALL - branch on less than zero and link likely */
#define MIPS_FUNCT_BLTZALL 0b10010
MIPS_INS(BLTZALL, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BLTZALL)
/* BLTZL - branch on less than zero likely */
#define MIPS_FUNCT_BLTZL 0b00010
MIPS_INS(BLTZL, .op = MIPS_OP_REGIMM, .bfunct = MIPS_FUNCT_BLTZL)
/* BNE - branch on not equal */
#define MIPS_OP_BNE 0b000101
MIPS_INS(BNE, .op = MIPS_OP_BNE)
/* BNEL - branch on not equal likely */
#define MIPS_OP_BNEL 0b010101
MIPS_INS(BNEL, .op = MIPS_OP_BNEL)
/* DIV - divide */
#define MIPS_FUNCT_SOP32 0b011010
#define MIPS_SOP32_DIV 0b00010
MIPS_INS(DIV, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP32_DIV,
.funct = MIPS_FUNCT_SOP32)
/* MOD - modulo */
#define MIPS_SOP32_MOD 0b00011
MIPS_INS(MOD, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP32_MOD,
.funct = MIPS_FUNCT_SOP32)
/* DIVU - divide unsigned */
#define MIPS_FUNCT_SOP33 0b011011
#define MIPS_SOP33_DIVU 0b00010
MIPS_INS(DIVU, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP33_DIVU,
.funct = MIPS_FUNCT_SOP33)
/* MODU - modulo unsigned */
#define MIPS_SOP33_MODU 0b00011
MIPS_INS(MODU, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP33_MODU,
.funct = MIPS_FUNCT_SOP33)
/* J - jump */
#define MIPS_OP_J 0b000010
MIPS_INS(J, .op = MIPS_OP_J)
/* JAL - jump and link */
#define MIPS_OP_JAL 0b000011
MIPS_INS(JAL, .op = MIPS_OP_JAL)
/* JALR - jump and link register */
#define MIPS_FUNCT_JALR 0b001001
MIPS_INS(JALR, .rd = MIPS_REG_RA, .op = MIPS_OP_SPECIAL,
.funct = MIPS_FUNCT_JALR)
/* JALX - jump and link exchange */
#define MIPS_OP_JALX 0b011101
MIPS_INS(JALX, .op = MIPS_OP_JALX)
/* JR - jump register */
#define MIPS_FUNCT_JR 0b001000
MIPS_INS(JR, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_JR)
/* LB - load byte */
#define MIPS_OP_LB 0b100000
MIPS_INS(LB, .op = MIPS_OP_LB)
/* LBU - load byte unsigned */
#define MIPS_OP_LBU 0b100100
MIPS_INS(LBU, .op = MIPS_OP_LBU)
/* LH - load half */
#define MIPS_OP_LH 0b100001
MIPS_INS(LH, .op = MIPS_OP_LH)
/* LHU - load half unsigned */
#define MIPS_OP_LHU 0b100101
MIPS_INS(LHU, .op = MIPS_OP_LHU)
/* LUI - load upper immediate */
#define MIPS_OP_LUI 0b001111
MIPS_INS(LUI, .op = MIPS_OP_LUI)
/* LW - load word */
#define MIPS_OP_LW 0b100011
MIPS_INS(LW, .op = MIPS_OP_LW)
/* MFHI - move from hi */
#define MIPS_FUNCT_MFHI 0b010000
MIPS_INS(MFHI, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_MFHI)
/* MFLO - move from hi */
#define MIPS_FUNCT_MFLO 0b010010
MIPS_INS(MFLO, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_MFLO)
/* MTHI - move from hi */
#define MIPS_FUNCT_MTHI 0b010001
MIPS_INS(MTHI, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_MTHI)
/* MTLO - move from hi */
#define MIPS_FUNCT_MTLO 0b010011
MIPS_INS(MTLO, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_MTLO)
/* MUL - multiply low word */
#define MIPS_FUNCT_SOP30 0b011000
#define MIPS_SOP30_MUL 0b00010
MIPS_INS(MUL, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP30_MUL,
.funct = MIPS_FUNCT_SOP30)
/* MUH - multiply high word */
#define MIPS_SOP30_MUH 0b00011
MIPS_INS(MUH, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP30_MUH,
.funct = MIPS_FUNCT_SOP30)
/* MULU - multiply low word unsigned */
#define MIPS_FUNCT_SOP31 0b011001
#define MIPS_SOP31_MULU 0b00010
MIPS_INS(MULU, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP31_MULU,
.funct = MIPS_FUNCT_SOP31)
/* MUHU - multiply high word unsgined */
#define MIPS_SOP31_MUHU 0b00011
MIPS_INS(MUHU, .op = MIPS_OP_SPECIAL, .shamt = MIPS_SOP31_MUHU,
.funct = MIPS_FUNCT_SOP31)
/* SB - store byte */
#define MIPS_OP_SB 0b101000
MIPS_INS(SB, .op = MIPS_OP_SB)
/* SH - store half */
#define MIPS_OP_SH 0b101001
MIPS_INS(SH, .op = MIPS_OP_SH)
/* SW - store word */
#define MIPS_OP_SW 0b101011
MIPS_INS(SW, .op = MIPS_OP_SW)
/* SLL - shift left logical */
#define MIPS_FUNCT_SLL 0b000000
MIPS_INS(SLL, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SLL)
/* SLLV - shift left logical variable */
#define MIPS_FUNCT_SLLV 0b000100
MIPS_INS(SLLV, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SLLV)
/* SLT - set less then */
#define MIPS_FUNCT_SLT 0b101010
MIPS_INS(SLT, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SLT)
/* SLTI - set less then immediate */
#define MIPS_OP_SLTI 0b001010
MIPS_INS(SLTI, .op = MIPS_OP_SLTI)
/* SLTIU - set less then imemdiate unsigned */
#define MIPS_OP_SLTIU 0b001011
MIPS_INS(SLTIU, .op = MIPS_OP_SLTIU)
/* SLTU - set less than unsigned */
#define MIPS_FUNCT_SLTU 0b101011
MIPS_INS(SLTU, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SLTU)
/* SRA - shift right arithmetic */
#define MIPS_FUNCT_SRA 0b000011
MIPS_INS(SRA, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SRA)
/* SRAV - shift right arithmetic variable */
#define MIPS_FUNCT_SRAV 0b000111
MIPS_INS(SRAV, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SRAV)
/* SRL - shift right logical */
#define MIPS_FUNCT_SRL 0b000010
MIPS_INS(SRL, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SRL)
/* SRLV - shift right logical variable */
#define MIPS_FUNCT_SRLV 0b000110
MIPS_INS(SRLV, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SRLV)
/* SUB - subtract */
#define MIPS_FUNCT_SUB 0b100010
MIPS_INS(SUB, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SUB)
/* SUBU - subtract unsigned */
#define MIPS_FUNCT_SUBU 0b100011
MIPS_INS(SUBU, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SUBU)
/* SYSCALL - syscall */
#define MIPS_FUNCT_SYSCALL 0b001100
MIPS_INS(SYSCALL, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_SYSCALL)
/* OR - or */
#define MIPS_FUNCT_OR 0b100101
MIPS_INS(OR, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_OR)
/* ORI - or imemdiate */
#define MIPS_OP_ORI 0b001101
MIPS_INS(ORI, .op = MIPS_OP_ORI)
/* NOR - not or */
#define MIPS_FUNCT_NOR 0b100111
MIPS_INS(NOR, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_NOR)
/* XOR - exclusive or */
#define MIPS_FUNCT_XOR 0b100110
MIPS_INS(XOR, .op = MIPS_OP_SPECIAL, .funct = MIPS_FUNCT_XOR)
/* XORI - exclusive or immediate */
#define MIPS_OP_XORI 0b001110
MIPS_INS(XORI, .op = MIPS_OP_XORI)
};
#undef MIPS_INS
#endif /* __MIPS_H__ */

142
include/mips32.h Normal file
View file

@ -0,0 +1,142 @@
/* Copyright (c) 2024 Freya Murphy */
#ifndef __MIPS32_H__
#define __MIPS32_H__
#include <stddef.h>
#include <stdint.h>
#include <mlimits.h>
/* all mips registers $0-$31 */
enum mips32_register {
MIPS32_REG_ZERO = 0,
MIPS32_REG_AT = 1,
MIPS32_REG_V0 = 2,
MIPS32_REG_V1 = 3,
MIPS32_REG_A0 = 4,
MIPS32_REG_A1 = 5,
MIPS32_REG_A2 = 6,
MIPS32_REG_A3 = 7,
MIPS32_REG_T0 = 8,
MIPS32_REG_T1 = 9,
MIPS32_REG_T2 = 10,
MIPS32_REG_T3 = 11,
MIPS32_REG_T4 = 12,
MIPS32_REG_T5 = 13,
MIPS32_REG_T6 = 14,
MIPS32_REG_T7 = 15,
MIPS32_REG_S0 = 16,
MIPS32_REG_S1 = 17,
MIPS32_REG_S2 = 18,
MIPS32_REG_S3 = 19,
MIPS32_REG_S4 = 20,
MIPS32_REG_S5 = 21,
MIPS32_REG_S6 = 22,
MIPS32_REG_S7 = 23,
MIPS32_REG_T8 = 24,
MIPS32_REG_T9 = 25,
MIPS32_REG_K0 = 26,
MIPS32_REG_K1 = 27,
MIPS32_REG_GP = 28,
MIPS32_REG_SP = 29,
MIPS32_REG_FP = 30,
MIPS32_REG_RA = 31,
};
/* mips instruction */
union mips32_instruction {
/* raw ins */
uint32_t raw : 32;
/* register type */
struct {
uint32_t funct : 6;
uint32_t shamt : 5;
uint32_t rd : 5;
uint32_t rt : 5;
uint32_t rs : 5;
uint32_t op : 6;
};
/* immediate type */
struct {
uint32_t immd : 16;
uint32_t : 16;
};
/* jump type */
struct {
uint32_t target : 26;
uint32_t : 6;
};
/* branch compact */
struct {
int32_t offs26 : 26;
uint32_t : 6;
};
/* branch */
struct {
int32_t offset : 16;
uint32_t bfunct : 5;
uint32_t : 11;
};
} __attribute__((packed));
/// grammer syntax:
///
/// ... the grammer takes entries parsed from the instruction,
/// and updates the instructions with values based on the type
/// of entry. i.e. immd would require a immd in the next argument,
/// and update the low 16bits of the instruction.
///
/// GRAMMER -> ENTRIES
/// GRAMMER -> ε
/// ENTRIES -> ENTRIES, ENTRY
/// ENTRY -> rd // i.e. $at
/// ENTRY -> rs
/// ENTRY -> rt
/// ENTRY -> immd // i.e. 0x80
/// ENTRY -> offset // i.e. main (16bits)
/// ENTRY -> offest(base) // i.e. 4($sp)
/// ENTRY -> target // i.e. main (28bits shifted)
///
/// // grammer entries are always defined onto themselves... meaning the
/// // name of their type directly corresponds to the mips field in the
/// // instruction
///
/// pseudo grammer syntax:
///
/// ... psuedo entries represents what values should be placed where
/// in each of the pseudo instructions. psuedo grammer is extended such
/// that hardcoded values can be returned. i.e. setting rt=$at
///
/// GRAMMER -> ENTRIES
/// GRAMMER -> ε
/// ENTREIS -> ENTRIES, ENTRYSET
/// ENTRYSET -> ENTRY | SET
/// SET -> ENTRY = <REGISTER>
/// ENTRY -> <GRAMMER: ENTRY> // i.e. any valid entry from grammer synax
/// ENTRY -> hi // high 16bits of <target> into <immd>
/// ENTRY -> lo // low 16bits of <target> into <immd>
/* mips grammer */
struct mips32_grammer {
// the name of the ins
char *name;
// the grammer of the ins
char *grammer;
// the index of the ins (if real)
int enum_index;
// for pseudo instructions only
int pseudo_len;
struct mips32__pseudo_grammer {
// what instruction is this
// part in the pseudo instruction
int enum_index;
// what parts of the instruction
// to update with values from
// grammer
char *update;
} pseudo_grammer[MAX_ARG_LENGTH];
};
#endif /* __MIPS32_H__ */

158
include/mips32r6.h Normal file
View file

@ -0,0 +1,158 @@
/* Copyright (c) 2024 Freya Murphy */
#ifndef __MIPS32R6_H__
#define __MIPS32R6_H__
#include <mlimits.h>
#include <stdint.h>
#include <mips32.h>
/* mips instructions */
enum mips32r6_instruction_type {
MIPS32R6_INS_ADD,
MIPS32R6_INS_ADDI,
MIPS32R6_INS_ADDIU,
MIPS32R6_INS_ADDU,
MIPS32R6_INS_AND,
MIPS32R6_INS_ANDI,
MIPS32R6_INS_BAL,
MIPS32R6_INS_BALC,
MIPS32R6_INS_BC,
MIPS32R6_INS_BEQ,
MIPS32R6_INS_BGEZ,
MIPS32R6_INS_BGEZAL,
MIPS32R6_INS_BGTZ,
MIPS32R6_INS_BLEZ,
MIPS32R6_INS_BLTZ,
MIPS32R6_INS_BLTZAL,
MIPS32R6_INS_BNE,
MIPS32R6_INS_DIV,
MIPS32R6_INS_MOD,
MIPS32R6_INS_DIVU,
MIPS32R6_INS_MODU,
MIPS32R6_INS_J,
MIPS32R6_INS_JAL,
MIPS32R6_INS_JALR,
MIPS32R6_INS_JALX,
MIPS32R6_INS_JR,
MIPS32R6_INS_LB,
MIPS32R6_INS_LBU,
MIPS32R6_INS_LH,
MIPS32R6_INS_LHU,
MIPS32R6_INS_LUI,
MIPS32R6_INS_LW,
MIPS32R6_INS_MUL,
MIPS32R6_INS_MUH,
MIPS32R6_INS_MULU,
MIPS32R6_INS_MUHU,
MIPS32R6_INS_SB,
MIPS32R6_INS_SH,
MIPS32R6_INS_SW,
MIPS32R6_INS_SLL,
MIPS32R6_INS_SLLV,
MIPS32R6_INS_SLT,
MIPS32R6_INS_SLTI,
MIPS32R6_INS_SLTIU,
MIPS32R6_INS_SLTU,
MIPS32R6_INS_SRA,
MIPS32R6_INS_SRAV,
MIPS32R6_INS_SRL,
MIPS32R6_INS_SRLV,
MIPS32R6_INS_SUB,
MIPS32R6_INS_SUBU,
MIPS32R6_INS_SYSCALL,
MIPS32R6_INS_OR,
MIPS32R6_INS_ORI,
MIPS32R6_INS_NOR,
MIPS32R6_INS_XOR,
MIPS32R6_INS_XORI,
__MIPS32R6_INS_NULL,
};
#define MIPS32R6_OP_SPECIAL 0b000000
#define MIPS32R6_OP_ADDI 0b001000
#define MIPS32R6_OP_ADDIU 0b001001
#define MIPS32R6_OP_ANDI 0b001100
#define MIPS32R6_OP_REGIMM 0b000001
#define MIPS32R6_OP_BALC 0b111010
#define MIPS32R6_OP_BC 0b110010
#define MIPS32R6_OP_BEQ 0b000100
#define MIPS32R6_OP_BEQL 0b010100
#define MIPS32R6_OP_BGTZ 0b000111
#define MIPS32R6_OP_BGTZL 0b010111
#define MIPS32R6_OP_BLEZ 0b000110
#define MIPS32R6_OP_BLEZL 0b010110
#define MIPS32R6_OP_BNE 0b000101
#define MIPS32R6_OP_BNEL 0b010101
#define MIPS32R6_OP_J 0b000010
#define MIPS32R6_OP_JAL 0b000011
#define MIPS32R6_OP_JALX 0b011101
#define MIPS32R6_OP_LB 0b100000
#define MIPS32R6_OP_LBU 0b100100
#define MIPS32R6_OP_LH 0b100001
#define MIPS32R6_OP_LHU 0b100101
#define MIPS32R6_OP_LUI 0b001111
#define MIPS32R6_OP_LW 0b100011
#define MIPS32R6_OP_SB 0b101000
#define MIPS32R6_OP_SH 0b101001
#define MIPS32R6_OP_SW 0b101011
#define MIPS32R6_OP_SLTI 0b001010
#define MIPS32R6_OP_SLTIU 0b001011
#define MIPS32R6_OP_ORI 0b001101
#define MIPS32R6_OP_XORI 0b001110
#define MIPS32R6_FUNCT_ADD 0b100000
#define MIPS32R6_FUNCT_ADDU 0b100001
#define MIPS32R6_FUNCT_AND 0b100100
#define MIPS32R6_FUNCT_SOP32 0b011010
#define MIPS32R6_FUNCT_SOP33 0b011011
#define MIPS32R6_FUNCT_JALR 0b001001
#define MIPS32R6_FUNCT_JR 0b001000
#define MIPS32R6_FUNCT_MFHI 0b010000
#define MIPS32R6_FUNCT_MFLO 0b010010
#define MIPS32R6_FUNCT_MTHI 0b010001
#define MIPS32R6_FUNCT_MTLO 0b010011
#define MIPS32R6_FUNCT_SOP30 0b011000
#define MIPS32R6_FUNCT_SOP31 0b011001
#define MIPS32R6_FUNCT_SLL 0b000000
#define MIPS32R6_FUNCT_SLLV 0b000100
#define MIPS32R6_FUNCT_SLT 0b101010
#define MIPS32R6_FUNCT_SLTU 0b101011
#define MIPS32R6_FUNCT_SRA 0b000011
#define MIPS32R6_FUNCT_SRAV 0b000111
#define MIPS32R6_FUNCT_SRL 0b000010
#define MIPS32R6_FUNCT_SRLV 0b000110
#define MIPS32R6_FUNCT_SUB 0b100010
#define MIPS32R6_FUNCT_SUBU 0b100011
#define MIPS32R6_FUNCT_SYSCALL 0b001100
#define MIPS32R6_FUNCT_OR 0b100101
#define MIPS32R6_FUNCT_NOR 0b100111
#define MIPS32R6_FUNCT_XOR 0b100110
#define MIPS32R6_FUNCT_BAL 0b10001
#define MIPS32R6_FUNCT_BGEZ 0b00001
#define MIPS32R6_FUNCT_BGEZAL 0b10001
#define MIPS32R6_FUNCT_BGEZALL 0b10011
#define MIPS32R6_FUNCT_BGEZL 0b00011
#define MIPS32R6_FUNCT_BLTZ 0b00000
#define MIPS32R6_FUNCT_BLTZAL 0b10000
#define MIPS32R6_FUNCT_BLTZALL 0b10010
#define MIPS32R6_FUNCT_BLTZL 0b00010
#define MIPS32R6_SOP30_MUL 0b00010
#define MIPS32R6_SOP30_MUH 0b00011
#define MIPS32R6_SOP31_MULU 0b00010
#define MIPS32R6_SOP31_MUHU 0b00011
#define MIPS32R6_SOP32_DIV 0b00010
#define MIPS32R6_SOP32_MOD 0b00011
#define MIPS32R6_SOP33_DIVU 0b00010
#define MIPS32R6_SOP33_MODU 0b00011
#define __MIPS32R6_INS_LEN (__MIPS32R6_INS_NULL)
#define __MIPS32R6_PSEUDO_LEN (4)
#define __MIPS32R6_GRAMMER_LEN (__MIPS32R6_INS_LEN + __MIPS32R6_PSEUDO_LEN)
extern struct mips32_grammer mips32r6_grammers[__MIPS32R6_GRAMMER_LEN];
extern union mips32_instruction mips32r6_instructions[__MIPS32R6_INS_LEN];
#endif /* __MIPS32R6_H__ */

286
lib/mips32r6.c Normal file
View file

@ -0,0 +1,286 @@
#include <mips32r6.h>
#define RTYPE "rd,rs,rt"
#define ITYPE "rt,rs,immd"
#define JTYPE "target"
#define LOAD "rt,offset(base)"
#define SHIFT "rd,rt,sa"
#define SHIFTV "rd,rt,rs"
#define BRANCH "rs,rt,offset"
#define BRANCHZ "rs,offset"
#define INS(name, grammer) {#name, grammer, MIPS32R6_INS_ ##name, \
/* pseudo stub */ 0, {{0, ""}}}
#define PSEUDO(name, grammer, ...) {name, grammer, __MIPS32R6_INS_NULL, \
__VA_ARGS__ }
struct mips32_grammer mips32r6_grammers[__MIPS32R6_GRAMMER_LEN] = {
// real instructions
INS(ADD, RTYPE),
INS(ADDI, ITYPE),
INS(ADDIU, ITYPE),
INS(ADDU, RTYPE),
INS(AND, RTYPE),
INS(ADDI, ITYPE),
INS(ANDI, ITYPE),
INS(BAL, "offset"),
INS(BALC, "target"),
INS(BC, "target"),
INS(BEQ, BRANCH),
INS(BGEZ, BRANCHZ),
INS(BGEZAL, BRANCHZ),
INS(BGTZ, BRANCHZ),
INS(BLEZ, BRANCHZ),
INS(BLTZ, BRANCHZ),
INS(BLTZAL, BRANCHZ),
INS(BNE, BRANCH),
INS(DIV, RTYPE),
INS(MOD, RTYPE),
INS(DIVU, RTYPE),
INS(MODU, RTYPE),
INS(J, JTYPE),
INS(JAL, JTYPE),
INS(JALR, "rs"),
INS(JR, "rs"),
INS(LB, LOAD),
INS(LBU, LOAD),
INS(LH, LOAD),
INS(LHU, LOAD),
INS(LUI, "rt,immd"),
INS(LW, LOAD),
INS(MUL, RTYPE),
INS(MUH, RTYPE),
INS(MULU, RTYPE),
INS(MUHU, RTYPE),
INS(SB, LOAD),
INS(SH, LOAD),
INS(SW, LOAD),
INS(SLL, SHIFT),
INS(SLLV, SHIFTV),
INS(SLT, RTYPE),
INS(SLTI, ITYPE),
INS(SLTIU, ITYPE),
INS(SLTU, RTYPE),
INS(SRA, SHIFT),
INS(SRAV, SHIFTV),
INS(SRL, SHIFT),
INS(SRLV, SHIFT),
INS(SUB, RTYPE),
INS(SUBU, RTYPE),
INS(SYSCALL, ""),
INS(OR, RTYPE),
INS(ORI, ITYPE),
INS(NOR, RTYPE),
INS(XOR, RTYPE),
INS(XORI, ITYPE),
// pseudo instructions
PSEUDO("li", "rt,immd", 1, {
{MIPS32R6_INS_ADDI, "rt,immd"}
}),
PSEUDO("la", "rt,target", 2, {
{MIPS32R6_INS_LUI, "rt=$at,hi"},
{MIPS32R6_INS_ORI, "rt,rs=$at,lo"},
}),
PSEUDO("move", "rd,rs", 1, {
{MIPS32R6_INS_OR, "rd,rs"}
}),
PSEUDO("nop", "", 1, {
{MIPS32R6_INS_SLL, ""},
}),
};
#define MIPS_INS(ins, ...) \
[MIPS32R6_INS_ ##ins] = { __VA_ARGS__ },
union mips32_instruction mips32r6_instructions[__MIPS32R6_INS_LEN] = {
/* ADD - add */
MIPS_INS(ADD, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_ADD)
/* ADDI - add immediate */
MIPS_INS(ADDI, .op = MIPS32R6_OP_ADDI)
/* ADDIU - add immediate unsigned */
MIPS_INS(ADDIU, .op = MIPS32R6_OP_ADDIU)
/* ADDU - add unsigned */
MIPS_INS(ADDU, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_ADDU)
/* AND - and */
MIPS_INS(AND, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_AND)
/* ANDI - and immediate */
MIPS_INS(ANDI, .op = MIPS32R6_OP_ANDI)
/* BAL - branch and link */
MIPS_INS(BAL, .op = MIPS32R6_OP_REGIMM, .bfunct = MIPS32R6_FUNCT_BAL)
/* BALC - branch and link, compact */
MIPS_INS(BALC, .op = MIPS32R6_OP_BALC)
/* BC - branch, compact */
MIPS_INS(BC, .op = MIPS32R6_OP_BC)
/* BEQ - branch on equal */
MIPS_INS(BEQ, .op = MIPS32R6_OP_BEQ)
/* BGEZ - branch on greater than or equal to zero */
MIPS_INS(BGEZ, .op = MIPS32R6_OP_REGIMM, .bfunct = MIPS32R6_FUNCT_BGEZ)
/* BGEZAL - branch on greater than or equal to zero and link */
MIPS_INS(BGEZAL, .op = MIPS32R6_OP_REGIMM, .bfunct = MIPS32R6_FUNCT_BGEZAL)
/* BGTZ - branch on greater than zero */
MIPS_INS(BGTZ, .op = MIPS32R6_OP_BGTZ)
/* BLEZ - branch on less than or equal to zero */
MIPS_INS(BLEZ, .op = MIPS32R6_OP_BLEZ)
/* BLTZ - branch on less than zero */
MIPS_INS(BLTZ, .op = MIPS32R6_OP_REGIMM, .bfunct = MIPS32R6_FUNCT_BLTZ)
/* BLTZAL - branch on less than zero and link */
MIPS_INS(BLTZAL, .op = MIPS32R6_OP_REGIMM, .bfunct = MIPS32R6_FUNCT_BLTZAL)
/* BNE - branch on not equal */
MIPS_INS(BNE, .op = MIPS32R6_OP_BNE)
/* DIV - divide */
MIPS_INS(DIV, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP32_DIV,
.funct = MIPS32R6_FUNCT_SOP32)
/* MOD - modulo */
MIPS_INS(MOD, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP32_MOD,
.funct = MIPS32R6_FUNCT_SOP32)
/* DIVU - divide unsigned */
MIPS_INS(DIVU, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP33_DIVU,
.funct = MIPS32R6_FUNCT_SOP33)
/* MODU - modulo unsigned */
MIPS_INS(MODU, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP33_MODU,
.funct = MIPS32R6_FUNCT_SOP33)
/* J - jump */
MIPS_INS(J, .op = MIPS32R6_OP_J)
/* JAL - jump and link */
MIPS_INS(JAL, .op = MIPS32R6_OP_JAL)
/* JALR - jump and link register */
MIPS_INS(JALR, .rd = MIPS32_REG_RA, .op = MIPS32R6_OP_SPECIAL,
.funct = MIPS32R6_FUNCT_JALR)
/* JALX - jump and link exchange */
MIPS_INS(JALX, .op = MIPS32R6_OP_JALX)
/* JR - jump register */
MIPS_INS(JR, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_JR)
/* LB - load byte */
MIPS_INS(LB, .op = MIPS32R6_OP_LB)
/* LBU - load byte unsigned */
MIPS_INS(LBU, .op = MIPS32R6_OP_LBU)
/* LH - load half */
MIPS_INS(LH, .op = MIPS32R6_OP_LH)
/* LHU - load half unsigned */
MIPS_INS(LHU, .op = MIPS32R6_OP_LHU)
/* LUI - load upper immediate */
MIPS_INS(LUI, .op = MIPS32R6_OP_LUI)
/* LW - load word */
MIPS_INS(LW, .op = MIPS32R6_OP_LW)
/* MUL - multiply low word */
MIPS_INS(MUL, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP30_MUL,
.funct = MIPS32R6_FUNCT_SOP30)
/* MUH - multiply high word */
MIPS_INS(MUH, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP30_MUH,
.funct = MIPS32R6_FUNCT_SOP30)
/* MULU - multiply low word unsigned */
MIPS_INS(MULU, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP31_MULU,
.funct = MIPS32R6_FUNCT_SOP31)
/* MUHU - multiply high word unsgined */
MIPS_INS(MUHU, .op = MIPS32R6_OP_SPECIAL, .shamt = MIPS32R6_SOP31_MUHU,
.funct = MIPS32R6_FUNCT_SOP31)
/* SB - store byte */
MIPS_INS(SB, .op = MIPS32R6_OP_SB)
/* SH - store half */
MIPS_INS(SH, .op = MIPS32R6_OP_SH)
/* SW - store word */
MIPS_INS(SW, .op = MIPS32R6_OP_SW)
/* SLL - shift left logical */
MIPS_INS(SLL, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SLL)
/* SLLV - shift left logical variable */
MIPS_INS(SLLV, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SLLV)
/* SLT - set less then */
MIPS_INS(SLT, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SLT)
/* SLTI - set less then immediate */
MIPS_INS(SLTI, .op = MIPS32R6_OP_SLTI)
/* SLTIU - set less then imemdiate unsigned */
MIPS_INS(SLTIU, .op = MIPS32R6_OP_SLTIU)
/* SLTU - set less than unsigned */
MIPS_INS(SLTU, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SLTU)
/* SRA - shift right arithmetic */
MIPS_INS(SRA, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SRA)
/* SRAV - shift right arithmetic variable */
MIPS_INS(SRAV, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SRAV)
/* SRL - shift right logical */
MIPS_INS(SRL, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SRL)
/* SRLV - shift right logical variable */
MIPS_INS(SRLV, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SRLV)
/* SUB - subtract */
MIPS_INS(SUB, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SUB)
/* SUBU - subtract unsigned */
MIPS_INS(SUBU, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SUBU)
/* SYSCALL - syscall */
MIPS_INS(SYSCALL, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_SYSCALL)
/* OR - or */
MIPS_INS(OR, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_OR)
/* ORI - or imemdiate */
MIPS_INS(ORI, .op = MIPS32R6_OP_ORI)
/* NOR - not or */
MIPS_INS(NOR, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_NOR)
/* XOR - exclusive or */
MIPS_INS(XOR, .op = MIPS32R6_OP_SPECIAL, .funct = MIPS32R6_FUNCT_XOR)
/* XORI - exclusive or immediate */
MIPS_INS(XORI, .op = MIPS32R6_OP_XORI)
};
#undef MIPS_INS

View file

@ -3,7 +3,7 @@
CFLAGS += -std=gnu2x CFLAGS += -std=gnu2x
# add include directory # add include directory
CFLAGS += -isystem ../include CFLAGS += -isystem ../include -DPREFIX=$(PREFIX)
INCLUDE += ../include INCLUDE += ../include
# add lib directory # add lib directory
@ -13,7 +13,7 @@ H_SRC = $(shell find $(SRC) $(INCLUDE) -type f -name "*.h")
C_SRC = $(shell find $(SRC) -type f -name "*.c") C_SRC = $(shell find $(SRC) -type f -name "*.c")
C_OBJ = $(patsubst %.c,$(BIN)/%.o,$(C_SRC)) C_OBJ = $(patsubst %.c,$(BIN)/%.o,$(C_SRC))
.PHONY: clean build run test .PHONY: clean build run fuzz
build: $(BIN)/$(OUT) build: $(BIN)/$(OUT)
@ -24,8 +24,8 @@ clean:
run: build run: build
$(BIN)/$(OUT) $(BIN)/$(OUT)
test: fuzz: clean
make -C ../test $(OUT) make -C . build CC=afl-cc LD=afl-cc
mkdir -p ../fuzz mkdir -p ../fuzz
rm -fr ../fuzz/$(OUT) rm -fr ../fuzz/$(OUT)
afl-fuzz -i ../test/$(OUT) -o ../fuzz -M $(OUT) -- $(BIN)/$(OUT) @@ afl-fuzz -i ../test/$(OUT) -o ../fuzz -M $(OUT) -- $(BIN)/$(OUT) @@

View file

@ -1,5 +1,4 @@
#include <merror.h> #include <merror.h>
#include <mips.h>
#include <netinet/in.h> #include <netinet/in.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -9,8 +8,9 @@
#include <melf.h> #include <melf.h>
#include "asm.h" #include "asm.h"
#include "gen.h"
#include "mlimits.h" #include "mlimits.h"
#include "parse.h" #include "tab.h"
extern char *current_file; extern char *current_file;
@ -19,325 +19,158 @@ extern char *current_file;
#define SEC_ALIGN 0x1000 #define SEC_ALIGN 0x1000
static int create_symbol(struct assembler *assembler, static int elf_rel_type(enum reference_type ty) {
const char name[MAX_LEX_LENGTH], switch (ty) {
ssize_t section_idx, case REF_NONE:
size_t section_offset, return R_MIPS_NONE;
unsigned char bind) case REF_MIPS_16:
return R_MIPS_16;
case REF_MIPS_26:
return R_MIPS_26;
case REF_MIPS_PC16:
return R_MIPS_PC16;
case REF_MIPS_LO16:
return R_MIPS_LO16;
case REF_MIPS_HI16:
return R_MIPS_HI16;
}
return R_MIPS_NONE;
}
static int elf_section_init_reltab(struct section *sec,
struct elf_section *elf_sec)
{ {
Elf32_Rel *reltab = malloc(sizeof(Elf32_Rel) *
sec->reftab.len);
if (reltab == NULL) {
PERROR("cannot alloc");
return M_ERROR;
}
for (uint32_t i = 0; i < sec->reftab.len; i++) {
Elf32_Rel *rel = &reltab[i];
struct reference *ref = &sec->reftab.references[i];
rel->r_offset = B32(ref->offset);
int sym = ref->symbol->tabidx + 1;
int type = elf_rel_type(ref->type);
rel->r_info = B32(ELF32_R_INFO(sym, type));
}
elf_sec->reltab_len = sec->reftab.len;
elf_sec->reltab = reltab;
return M_SUCCESS;
}
static int elf_section_init(struct section *sec, struct elf_section *elf_sec)
{
elf_sec->data = sec;
elf_sec->shdr_idx = 0; // dont know yet
elf_sec->reltab_shidx = 0; // dont know yet
elf_sec->reltab_len = sec->reftab.len;
elf_sec->reltab = NULL;
if (sec->reftab.len && elf_section_init_reltab(sec, elf_sec))
return M_ERROR;
return M_SUCCESS;
}
/* free an elf section */
static void elf_section_free(struct elf_section *sec)
{
if (sec->reltab != NULL)
free(sec->reltab);
}
static int asm_init_sections(struct assembler *assembler)
{
struct section *sections = assembler->gen.sections;
uint32_t len = assembler->gen.sections_len;
struct elf_section *elftab = malloc(sizeof(struct elf_section) * len);
if (elftab == NULL) {
PERROR("cannot alloc");
return M_ERROR;
}
for (uint32_t i = 0; i < len; i++) {
struct elf_section *elfsec = &elftab[i];
elfsec->data = &sections[i];
if (elf_section_init(&sections[i], elfsec)) {
free(elftab);
return M_ERROR;
}
}
assembler->sections = elftab;
assembler->section_len = len;
return M_SUCCESS;
}
static int elf_sym_bind(enum symbol_type ty) {
switch (ty) {
case SYM_LOCAL:
return STB_LOCAL;
case SYM_GLOBAL:
return STB_GLOBAL;
case SYM_EXTERN:
return STB_GLOBAL;
}
return STB_GLOBAL;
}
static int asm_init_symtab(struct assembler *assembler) {
struct symbol_table *symtab = &assembler->gen.symtab;
size_t len = symtab->len + 1;
Elf32_Sym *elftab = malloc(sizeof(Elf32_Sym) * len);
if (elftab == NULL) {
PERROR("cannot alloc");
}
// add null entry
elftab[0] = (Elf32_Sym) {0};
// add rest of the entries
for (uint32_t i = 0; i < symtab->len; i++) {
struct symbol *sym = &symtab->symbols[i];
int bind = elf_sym_bind(sym->type);
int type = STT_NOTYPE;
// get name
size_t str_off; size_t str_off;
if (strtab_write_str(&assembler->strtab, name, &str_off)) if (strtab_write_str(&assembler->strtab, sym->name.str,
&str_off)) {
free(elftab);
return M_ERROR; return M_ERROR;
}
Elf32_Sym symbol = { elftab[i+1] = (Elf32_Sym) {
.st_name = B32(str_off), .st_name = B32(str_off),
.st_value = B32(section_offset), .st_info = ELF32_ST_INFO(bind, type),
.st_size = 0, .st_size = 0,
.st_info = ELF32_ST_INFO(bind, STT_NOTYPE), .st_other = 0,
.st_other = ELF32_ST_VISIBILITY(STV_DEFAULT), .st_value = B32(sym->offset),
.st_shndx = B16(section_idx), .st_shndx = 0,
}; };
// dont put magic flag values inside symbol, only real indexes
if (section_idx < 0)
symbol.st_shndx = 0;
if (symtab_push(&assembler->symtab, symbol, section_idx))
return M_ERROR;
return M_SUCCESS;
}
static int find_symbol_or_stub(struct assembler *assembler,
const char name[MAX_LEX_LENGTH],
Elf32_Sym **res,
size_t *res2)
{
if (symtab_find(&assembler->symtab, res, res2, name) == M_SUCCESS)
return M_SUCCESS;
if (create_symbol(assembler, name, SYMSEC_STUB, 0, STB_LOCAL))
return M_ERROR;
size_t idx = assembler->symtab.len - 1;
if (res != NULL)
*res = &assembler->symtab.symbols[idx];
if (res2 != NULL)
*res2 = idx;
return M_SUCCESS;
}
static int handle_directive(struct assembler *assembler,
struct mips_directive *directive)
{
switch (directive->type) {
case MIPS_DIRECTIVE_SECTION: {
struct section_table *sec_tbl = &assembler->sectab;
struct section *sec;
if (sectab_get(sec_tbl, &sec, directive->name)
== M_SUCCESS) {
sec_tbl->current = sec;
break;
} }
if (sectab_alloc(sec_tbl, &sec, directive->name)) assembler->symbols = elftab;
return M_ERROR; assembler->symtab_len = len;
sec_tbl->current = sec;
break;
}
case MIPS_DIRECTIVE_ALIGN: {
assembler->sectab.current->alignment =
1 << directive->align;
if (assembler->sectab.current->alignment == 0) {
ERROR("cannot align to zero");
return M_ERROR;
}
break;
}
case MIPS_DIRECTIVE_SPACE: {
struct section_entry entry;
entry.type = ENT_NO_DATA;
entry.size = directive->space;
if (sec_push(assembler->sectab.current, entry))
return M_ERROR;
break;
}
case MIPS_DIRECTIVE_WORD: {
for (uint32_t i = 0; i < directive->len; i++) {
struct section_entry entry;
entry.type = ENT_WORD;
entry.word = directive->words[i];
entry.size = sizeof(uint32_t);
if (sec_push(assembler->sectab.current,
entry))
return M_ERROR;
}
break;
}
case MIPS_DIRECTIVE_HALF: {
for (uint32_t i = 0; i < directive->len; i++) {
struct section_entry entry;
entry.type = ENT_HALF;
entry.half = directive->halfs[i];
entry.size = sizeof(uint16_t);
if (sec_push(assembler->sectab.current,
entry))
return M_ERROR;
}
break;
}
case MIPS_DIRECTIVE_BYTE: {
for (uint32_t i = 0; i < directive->len; i++) {
struct section_entry entry;
entry.type = ENT_BYTE;
entry.byte = directive->bytes[i];
entry.size = sizeof(uint8_t);
if (sec_push(assembler->sectab.current,
entry))
return M_ERROR;
}
break;
}
case MIPS_DIRECTIVE_EXTERN: {
if (symtab_find(&assembler->symtab, NULL, NULL,
directive->name) == M_SUCCESS) {
ERROR("cannot extern local symbol '%s'",
directive->name);
return M_ERROR;
}
if (create_symbol(assembler, directive->name, SYMSEC_EXTERN, 0,
STB_GLOBAL))
return M_ERROR;
break;
}
case MIPS_DIRECTIVE_GLOBL: {
Elf32_Sym *sym;
if (symtab_find(&assembler->symtab, &sym, NULL,
directive->name) == M_SUCCESS) {
sym->st_info = ELF32_ST_INFO(STB_GLOBAL, STT_NOTYPE);
break;
}
if (create_symbol(assembler, directive->name, SYMSEC_STUB, 0,
STB_GLOBAL))
return M_ERROR;
break;
}
case MIPS_DIRECTIVE_ASCII: {
struct section_entry entry;
entry.type = ENT_STR;
entry.size = strlen(directive->name);
memcpy(entry.str, directive->name, entry.size);
if (sec_push(assembler->sectab.current, entry))
return M_ERROR;
break;
}
case MIPS_DIRECTIVE_ASCIIZ: {
struct section_entry entry;
entry.type = ENT_STR;
entry.size = strlen(directive->name) + 1;
memcpy(entry.str, directive->name, entry.size);
if (sec_push(assembler->sectab.current, entry))
return M_ERROR;
break;
}
}
return M_SUCCESS;
}
static int handle_label(struct assembler *assembler,
const char name[MAX_LEX_LENGTH])
{
struct section *cur = assembler->sectab.current;
Elf32_Sym *ref;
size_t symidx;
if (symtab_find(&assembler->symtab, &ref, &symidx, name) == M_SUCCESS) {
ssize_t *sec = &assembler->symtab.sections[symidx];
// check if the symbol is acutally jus a stub, if so
// we need to update it
if (*sec == SYMSEC_STUB) {
*sec = cur->index;
ref->st_value = B32(sec_size(cur));
return M_SUCCESS;
}
ERROR("redefined symbol '%s'", name);
return M_ERROR;
}
if (create_symbol(assembler, name, cur->index, sec_size(cur),
STB_LOCAL))
return M_ERROR;
return M_SUCCESS;
}
static int handle_ins(struct assembler *assembler,
struct ins_expr *expr)
{
struct section *sec = assembler->sectab.current;
size_t secidx = sec->len;
for (size_t i = 0; i < expr->ins_len; i++) {
union mips_instruction_data *ins =
&expr->ins[i].data;
struct reference *ref =
&expr->ref[i];
struct section_entry entry;
entry.type = ENT_INS;
entry.size = sizeof(union mips_instruction_data);
entry.ins = B32(ins->raw);
if (sec_push(sec, entry))
return M_ERROR;
if (ref->type == R_MIPS_NONE)
continue;
size_t symidx;
if (find_symbol_or_stub(assembler, ref->name, NULL, &symidx))
return M_ERROR;
Elf32_Rela rel = {
.r_info = B32(ELF32_R_INFO(symidx, ref->type)),
.r_addend = B32(ref->addend),
.r_offset = B32(sec_index(sec, secidx + i)),
};
if (reltab_push(&sec->reltab, rel))
return M_ERROR;
}
return M_SUCCESS; return M_SUCCESS;
} }
static int parse_file(struct assembler *assembler) static int parse_file(struct assembler *assembler)
{ {
struct parser *parser = &assembler->parser; if (generate_mips32r6(&assembler->gen))
while (1) {
struct expr expr;
int res = parser_next(parser, &expr);
if (res == M_ERROR)
return M_ERROR; return M_ERROR;
if (asm_init_sections(assembler))
if (res == M_EOF)
return M_SUCCESS;
switch (expr.type) {
case EXPR_INS:
if (handle_ins(assembler, &expr.ins))
return M_ERROR; return M_ERROR;
break; if (asm_init_symtab(assembler))
case EXPR_DIRECTIVE:
if (handle_directive(assembler,
&expr.directive))
return M_ERROR; return M_ERROR;
break;
case EXPR_LABEL:
if (handle_label(assembler, expr.label))
return M_ERROR;
break;
case EXPR_CONSTANT:
break;
}
}
return M_SUCCESS;
}
static int assemble_phdr(struct assembler *assembler, Elf32_Phdr **res,
uint32_t *res2)
{
Elf32_Phdr *phdr = malloc(sizeof(Elf32_Phdr) *
assembler->sectab.len);
if (phdr == NULL) {
PERROR("cannot alloc");
return M_ERROR;;
}
for (uint32_t i = 0; i < assembler->sectab.len; i++) {
Elf32_Phdr *hdr = &phdr[i];
struct section *sec = &assembler->sectab.sections[i];
size_t size = sec_size(sec);
hdr->p_type = B32(PT_LOAD);
hdr->p_flags = B32(
(sec->execute << 0) |
(sec->write << 1) |
(sec->read << 2));
hdr->p_offset = 0;
hdr->p_vaddr = 0;
hdr->p_paddr = 0;
hdr->p_filesz = B32(size);
hdr->p_memsz = B32(size);
hdr->p_align = B32(SEC_ALIGN);
}
*res = phdr;
*res2 = assembler->sectab.len;
return M_SUCCESS; return M_SUCCESS;
} }
@ -349,8 +182,8 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res,
max_entries += 1; // symtab max_entries += 1; // symtab
max_entries += 1; // strtab max_entries += 1; // strtab
max_entries += 1; // shtrtab max_entries += 1; // shtrtab
max_entries += assembler->sectab.len; // sections max_entries += assembler->section_len; // sections
max_entries += assembler->sectab.len; // reltabs per section max_entries += assembler->section_len; // reltabs per section
Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * max_entries); Elf32_Shdr *shdr = malloc(sizeof(Elf32_Shdr) * max_entries);
@ -366,16 +199,17 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res,
shdr[count++] = (Elf32_Shdr) {0}; shdr[count++] = (Elf32_Shdr) {0};
// reltables // reltables
for (uint32_t i = 0; i < assembler->sectab.len; i++) { for (uint32_t i = 0; i < assembler->section_len; i++) {
struct section *sec = &assembler->sectab.sections[i]; struct elf_section *sec = &assembler->sections[i];
const char *prefix = ".reltab."; const char *prefix = ".reltab";
char reltab_name[MAX_LEX_LENGTH + 8]; char reltab_name[MAX_LEX_LENGTH + strlen(prefix)];
if (sec->reltab.len == 0) if (sec->reltab_len == 0)
continue; continue;
strcpy(reltab_name, prefix); strcpy(reltab_name, prefix);
strcat(reltab_name, sec->name); strncat(reltab_name, sec->data->name.str,
MAX_LEX_LENGTH - strlen(prefix));
if (strtab_write_str(&assembler->shstrtab, if (strtab_write_str(&assembler->shstrtab,
reltab_name, &str_off)) { reltab_name, &str_off)) {
@ -386,7 +220,7 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res,
sec->reltab_shidx = count; sec->reltab_shidx = count;
shdr[count++] = (Elf32_Shdr) { shdr[count++] = (Elf32_Shdr) {
.sh_name = B32(str_off), .sh_name = B32(str_off),
.sh_type = B32(SHT_RELA), .sh_type = B32(SHT_REL),
.sh_flags = 0, .sh_flags = 0,
.sh_addr = 0, .sh_addr = 0,
.sh_offset = 0, .sh_offset = 0,
@ -394,38 +228,38 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res,
.sh_link = 0, .sh_link = 0,
.sh_info = 0, .sh_info = 0,
.sh_addralign = B32(1), .sh_addralign = B32(1),
.sh_entsize = B32(sizeof(Elf32_Rela)), .sh_entsize = B32(sizeof(Elf32_Rel)),
}; };
} }
// for each section // for each section
for (uint32_t i = 0; i < assembler->sectab.len; i++) { for (uint32_t i = 0; i < assembler->section_len; i++) {
struct section *sec = &assembler->sectab.sections[i]; struct elf_section *sec = &assembler->sections[i];
char name[MAX_LEX_LENGTH+1] = "."; const char *name = sec->data->name.str;
strcat(name, sec->name);
if (strtab_write_str(&assembler->shstrtab, name, &str_off)) { if (strtab_write_str(&assembler->shstrtab, name, &str_off)) {
free(shdr); free(shdr);
return M_ERROR; return M_ERROR;
} }
sec->shdr_idx = count; sec->shdr_idx = count;
if (sec->reltab.len != 0) if (sec->reltab_len != 0)
shdr[sec->reltab_shidx].sh_info = B32(count); shdr[sec->reltab_shidx].sh_info = B32(count);
shdr[count++] = (Elf32_Shdr){ shdr[count++] = (Elf32_Shdr){
.sh_name = B32(str_off), .sh_name = B32(str_off),
.sh_type = B32(SHT_PROGBITS), .sh_type = B32(sec->data->execute ?
SHT_PROGBITS : SHT_NOBITS),
.sh_flags = B32( .sh_flags = B32(
(sec->write << 0) | (sec->data->write << 0) |
(sec->execute << 2) | (sec->data->execute << 2) |
SHF_ALLOC), SHF_ALLOC),
.sh_addr = 0, .sh_addr = 0,
.sh_offset = 0, .sh_offset = 0,
.sh_size = 0, .sh_size = 0,
.sh_link = 0, .sh_link = 0,
.sh_info = 0, .sh_info = 0,
.sh_addralign = B32(sec->alignment), .sh_addralign = B32(SEC_ALIGN),
.sh_entsize = 0, .sh_entsize = 0,
}; };
} }
@ -490,9 +324,9 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res,
.sh_entsize = 0, .sh_entsize = 0,
}; };
for (uint32_t i = 0; i < assembler->sectab.len; i++) { for (uint32_t i = 0; i < assembler->section_len; i++) {
struct section *sec = &assembler->sectab.sections[i]; struct elf_section *sec = &assembler->sections[i];
if (sec->reltab.len == 0) if (sec->reltab_len == 0)
continue; continue;
shdr[sec->reltab_shidx].sh_link = shdr[sec->reltab_shidx].sh_link =
B32(assembler->symtab_shidx); B32(assembler->symtab_shidx);
@ -507,61 +341,53 @@ static int assemble_shdr(struct assembler *assembler, Elf32_Shdr **res,
static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr) static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr)
{ {
Elf32_Shdr *shdr = (Elf32_Shdr *) assembler->shdr; Elf32_Shdr *shdr = (Elf32_Shdr *) assembler->shdr;
Elf32_Phdr *phdr = (Elf32_Phdr *) assembler->phdr;
uint32_t ptr = 0; uint32_t ptr = 0;
// we must now correct offets and sizes inside the ehdr, phdr, // we must now correct offets and sizes inside the ehdr, phdr,
// and shdr // and shdr
ptr += sizeof(Elf32_Ehdr); ptr += sizeof(Elf32_Ehdr);
// phdr
ehdr->e_phoff = B32(ptr);
ptr += assembler->phdr_len * sizeof(Elf32_Phdr);
// reltbls // reltbls
for (uint32_t i = 0; i < assembler->sectab.len; i++) { for (uint32_t i = 0; i < assembler->section_len; i++) {
struct section *sec = &assembler->sectab.sections[i]; struct elf_section *sec = &assembler->sections[i];
if (sec->reltab.len == 0) if (sec->reltab_len == 0)
continue; continue;
int idx = sec->reltab_shidx; int idx = sec->reltab_shidx;
int len = sec->reltab.len; int len = sec->reltab_len;
shdr[idx].sh_offset = B32(ptr); shdr[idx].sh_offset = B32(ptr);
shdr[idx].sh_size = B32(len * sizeof(Elf32_Rela)); shdr[idx].sh_size = B32(len * sizeof(Elf32_Rel));
ptr += len * sizeof(Elf32_Rela); ptr += len * sizeof(Elf32_Rel);
}
// section padding
{
uint32_t mod = ptr % SEC_ALIGN;
if (mod != 0)
assembler->secalign = (SEC_ALIGN - mod);
else
assembler->secalign = 0;
ptr += assembler->secalign;
} }
// sections // sections
size_t v_addr = 0; size_t v_addr = 0;
for (uint32_t i = 0; i < assembler->sectab.len; i++) { for (uint32_t i = 0; i < assembler->section_len; i++) {
struct section *sec = &assembler->sectab.sections[i];
size_t pad = v_addr % SEC_ALIGN;
if (pad)
pad = SEC_ALIGN - pad;
v_addr += pad;
struct elf_section *sec = &assembler->sections[i];
uint32_t idx = sec->shdr_idx; uint32_t idx = sec->shdr_idx;
uint32_t size = ntohl(phdr[i].p_filesz); uint32_t size = sec->data->len;
phdr[i].p_offset = B32(ptr);
phdr[i].p_vaddr = B32(v_addr);
phdr[i].p_paddr = B32(v_addr);
shdr[idx].sh_offset = B32(ptr); shdr[idx].sh_offset = B32(ptr);
shdr[idx].sh_size = phdr[i].p_filesz; shdr[idx].sh_size = B32(size);
shdr[idx].sh_addr = phdr[i].p_vaddr; shdr[idx].sh_addr = B32(v_addr);
v_addr += size; v_addr += size;
ptr += size; ptr += size;
} }
// symtab // symtab
{
uint32_t len = assembler->symtab_len;
uint32_t size = len * sizeof(Elf32_Sym);
shdr[assembler->symtab_shidx].sh_offset = B32(ptr); shdr[assembler->symtab_shidx].sh_offset = B32(ptr);
shdr[assembler->symtab_shidx].sh_link = B32(assembler->strtab_shidx); shdr[assembler->symtab_shidx].sh_link =
shdr[assembler->symtab_shidx].sh_size = B32(assembler->strtab_shidx);
B32(assembler->symtab.len * sizeof(Elf32_Sym)); shdr[assembler->symtab_shidx].sh_size = B32(size);
ptr += assembler->symtab.len * sizeof(Elf32_Sym); ptr += size;
}
// strtab // strtab
shdr[assembler->strtab_shidx].sh_offset = B32(ptr); shdr[assembler->strtab_shidx].sh_offset = B32(ptr);
@ -573,24 +399,10 @@ static void update_offsets(struct assembler *assembler, Elf32_Ehdr *ehdr)
shdr[assembler->shstrtab_shidx].sh_size = shdr[assembler->shstrtab_shidx].sh_size =
B32(assembler->shstrtab.size); B32(assembler->shstrtab.size);
ptr += assembler->shstrtab.size; ptr += assembler->shstrtab.size;
// shdr // shdr
ehdr->e_shoff = B32(ptr); ehdr->e_shoff = B32(ptr);
} }
static void update_sym_shindx(struct assembler *assembler)
{
for (size_t i = 0; i < assembler->symtab.len; i++) {
Elf32_Sym *sym = &assembler->symtab.symbols[i];
ssize_t sec = assembler->symtab.sections[i];
if (sec >= 0) {
sym->st_shndx = B16(assembler->
sectab.sections[sec].shdr_idx);
}
}
}
static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr, static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr,
const char *path) const char *path)
{ {
@ -605,80 +417,70 @@ static int write_file(struct assembler *assembler, Elf32_Ehdr *ehdr,
// ehdr // ehdr
fwrite(ehdr, sizeof(Elf32_Ehdr), 1, out); fwrite(ehdr, sizeof(Elf32_Ehdr), 1, out);
// phdr
fwrite(assembler->phdr, sizeof(Elf32_Phdr), assembler->phdr_len, out);
// reltbls // reltbls
for (uint32_t i = 0; i < assembler->sectab.len; i++) { for (uint32_t i = 0; i < assembler->section_len; i++) {
struct section *sec = &assembler->sectab.sections[i]; struct elf_section *sec = &assembler->sections[i];
if (sec->reltab.len == 0) void *ptr = sec->reltab;
int len = sec->reltab_len;
if (len < 1)
continue; continue;
void *ptr = sec->reltab.data; fwrite(ptr, sizeof(Elf32_Rel), len, out);
int len = sec->reltab.len;
fwrite(ptr, sizeof(Elf32_Rela), len, out);
}
// section padding
for (uint32_t i = 0; i < assembler->secalign; i++) {
uint8_t zero = 0;
fwrite(&zero, 1, 1, out);
} }
// sections // sections
for (uint32_t i = 0; i < assembler->sectab.len; i++) { for (uint32_t i = 0; i < assembler->section_len; i++) {
struct section *sec = &assembler->sectab.sections[i]; struct elf_section *sec = &assembler->sections[i];
for (uint32_t j = 0; j < sec->len; j++) { void *ptr = sec->data->data;
struct section_entry *entry = &sec->entries[j]; size_t size = sec->data->len;
size_t size = entry->size; fwrite(ptr, 1, size, out);
size_t zeros = size % sec->alignment;;
if (entry->type != ENT_NO_DATA)
fwrite(&entry->data, size, 1, out);
else
zeros += size;
while(zeros) {
fputc(0, out);
zeros--;
}
}
} }
// sym tbl // sym tbl
fwrite(assembler->symtab.symbols, sizeof(Elf32_Sym), fwrite(assembler->symbols, sizeof(Elf32_Sym), assembler->symtab_len,
assembler->symtab.len, out); out);
// str tbl // str tbl
fwrite(assembler->strtab.ptr, assembler->strtab.size, 1, out); fwrite(assembler->strtab.ptr, 1, assembler->strtab.size, out);
// shstr tbl // shstr tbl
fwrite(assembler->shstrtab.ptr, assembler->shstrtab.size, 1, out); fwrite(assembler->shstrtab.ptr, 1, assembler->shstrtab.size, out);
// shdr // shdr
fwrite(assembler->shdr, sizeof(Elf32_Shdr), assembler->shdr_len, out); fwrite(assembler->shdr, sizeof(Elf32_Shdr), assembler->shdr_len, out);
// close
fclose(out); fclose(out);
return M_SUCCESS; return M_SUCCESS;
} }
static void update_sym_shndx(struct assembler *assembler)
{
for (uint32_t i = 1; i < assembler->symtab_len; i++) {
Elf32_Sym *esym = &assembler->symbols[i];
struct symbol *sym = &assembler->gen.symtab.symbols[i - 1];
// get shindx
int shindx = 0;
if (sym->secidx != SYM_SEC_STUB)
shindx = assembler->sections[sym->secidx].shdr_idx;
else if (sym->type == SYM_EXTERN)
shindx = 0;
esym->st_shndx = B16(shindx);
}
}
static int assemble_elf(struct assembler *assembler, const char *out) static int assemble_elf(struct assembler *assembler, const char *out)
{ {
if (assemble_phdr(assembler, (Elf32_Phdr **) &assembler->phdr, if (assemble_shdr(assembler, &assembler->shdr, &assembler->shdr_len))
&assembler->phdr_len)) {
return M_ERROR; return M_ERROR;
}
if (assemble_shdr(assembler, (Elf32_Shdr **) &assembler->shdr,
&assembler->shdr_len)) {
return M_ERROR;
};
Elf32_Ehdr ehdr = MIPS_ELF_EHDR; Elf32_Ehdr ehdr = MIPS_ELF_EHDR;
ehdr.e_phnum = B16(assembler->phdr_len);
ehdr.e_shnum = B16(assembler->shdr_len); ehdr.e_shnum = B16(assembler->shdr_len);
ehdr.e_shstrndx = B16(assembler->shstrtab_shidx); ehdr.e_shstrndx = B16(assembler->shstrtab_shidx);
update_offsets(assembler, &ehdr); update_offsets(assembler, &ehdr);
update_sym_shindx(assembler); update_sym_shndx(assembler);
if (write_file(assembler, &ehdr, out)) if (write_file(assembler, &ehdr, out))
return M_ERROR; return M_ERROR;
@ -709,10 +511,16 @@ int assemble_file(struct assembler_arguments args)
int assembler_init(struct assembler *assembler, const char *path) int assembler_init(struct assembler *assembler, const char *path)
{ {
if (lexer_init(path, &assembler->lexer)) assembler->shdr = NULL;
return M_ERROR; assembler->symbols = NULL;
assembler->sections = NULL;
assembler->strtab.ptr = NULL;
assembler->shstrtab.ptr = NULL;
assembler->gen.sections = NULL;
assembler->gen.symtab.symbols = NULL;
assembler->section_len = 0;
if (parser_init(&assembler->lexer, &assembler->parser)) if (generator_init(path, &assembler->gen))
return M_ERROR; return M_ERROR;
if (strtab_init(&assembler->shstrtab)) if (strtab_init(&assembler->shstrtab))
@ -721,31 +529,22 @@ int assembler_init(struct assembler *assembler, const char *path)
if (strtab_init(&assembler->strtab)) if (strtab_init(&assembler->strtab))
return M_ERROR; return M_ERROR;
if (symtab_init(&assembler->symtab))
return M_ERROR;
if (sectab_init(&assembler->sectab))
return M_ERROR;
assembler->symtab.strtab = &assembler->strtab;
assembler->phdr = NULL;
assembler->shdr = NULL;
return M_SUCCESS; return M_SUCCESS;
} }
void assembler_free(struct assembler *assembler) void assembler_free(struct assembler *assembler)
{ {
if (assembler->phdr)
free(assembler->phdr);
if (assembler->shdr) if (assembler->shdr)
free(assembler->shdr); free(assembler->shdr);
if (assembler->symbols)
free(assembler->symbols);
if (assembler->sections) {
for (uint32_t i = 0; i < assembler->section_len; i++)
elf_section_free(&assembler->sections[i]);
free(assembler->sections);
}
sectab_free(&assembler->sectab);
symtab_free(&assembler->symtab);
strtab_free(&assembler->strtab); strtab_free(&assembler->strtab);
strtab_free(&assembler->shstrtab); strtab_free(&assembler->shstrtab);
generator_free(&assembler->gen);
parser_free(&assembler->parser);
lexer_free(&assembler->lexer);
} }

View file

@ -3,19 +3,15 @@
#ifndef __ASM_H__ #ifndef __ASM_H__
#define __ASM_H__ #define __ASM_H__
#include <stddef.h>
#include <elf.h> #include <elf.h>
#include <mips.h>
#include "mlimits.h" #include "gen.h"
#include "parse.h"
#include "lex.h"
/// ///
/// ELF string table /// ELF string table
/// ///
struct str_table { struct elf_str_table {
// size of the ptr in bytes // size of the ptr in bytes
size_t size; size_t size;
@ -25,199 +21,60 @@ struct str_table {
}; };
/* initalize a string table */ /* initalize a string table */
int strtab_init(struct str_table *strtab); int strtab_init(struct elf_str_table *strtab);
/* free a string table */ /* free a string table */
void strtab_free(struct str_table *strtab); void strtab_free(struct elf_str_table *strtab);
/* get a string form the string table */ /* get a string form the string table */
int strtab_get_str(struct str_table *strtab, const char *str, size_t *res); int strtab_get_str(struct elf_str_table *strtab, const char *str, size_t *res);
/* get or append a string into the string table */ /* get or append a string into the string table */
int strtab_write_str(struct str_table *strtab, const char *str, size_t *res); int strtab_write_str(struct elf_str_table *strtab, const char *str, size_t *res);
/// ///
/// ELF symbol table /// elf section
///
struct symbol_table {
// length in size in sym ammt
size_t len;
size_t size;
// the Elf symbols
Elf32_Sym *symbols;
// keeps track of what section each ELF symbol is in
// *!!this is NOT the section header index in the ELF ehdr!!*
ssize_t *sections;
// symbols reference a string table that acutally
// holds the strings
//
// *weak* ptr, we do not own this!!!
struct str_table *strtab;
};
/* initalize a symbol table */
int symtab_init(struct symbol_table *symtab);
/* free the symbol table */
void symtab_free(struct symbol_table *symtab);
/* add a symbol to the symbol tbl */
int symtab_push(struct symbol_table *symtab, const Elf32_Sym sym,
ssize_t sec_idx);
/* find a symbol by name in the symbol table */
int symtab_find(struct symbol_table *symtab, Elf32_Sym **sym, size_t *idx,
const char name[MAX_LEX_LENGTH]);
///
/// ELF relocation table
///
struct relocation_table {
size_t len;
size_t size;
Elf32_Rela *data;
};
/* initalize a relocation table */
int reltab_init(struct relocation_table *reltab);
/* free the relocation table */
void reltab_free(struct relocation_table *reltab);
/* add a entry to the relocation table */
int reltab_push(struct relocation_table *reltab, const Elf32_Rela rel);
///
/// section entry
///
enum section_entry_type {
ENT_INS,
ENT_WORD,
ENT_HALF,
ENT_BYTE,
ENT_STR,
ENT_NO_DATA,
};
/* holds a entry inside the section, i.e. a instruction, raw data,
* special directives */
struct section_entry {
size_t size;
enum section_entry_type type;
union {
// to get memory address
char data;
// data
uint32_t ins;
char str[MAX_LEX_LENGTH];
int32_t word;
int16_t half;
int8_t byte;
};
};
///
/// section
/// ///
/* holds a section of the asm file (i.e. .text, .bss, .data) */ /* holds a section of the asm file (i.e. .text, .bss, .data) */
struct section { struct elf_section {
// length and size of amount of entries // section data *weak* pointer
size_t len; struct section *data;
size_t size;
struct section_entry *entries;
// section name
char name[MAX_LEX_LENGTH];
// index of the section in // index of the section in
// all the sections
size_t index;
// index of the sectio in
// the ELF shdr // the ELF shdr
size_t shdr_idx; size_t shdr_idx;
// ELF section data // relocation table
bool read;
bool write;
bool execute;
uint16_t alignment;
// ELF tables
size_t reltab_shidx; size_t reltab_shidx;
struct relocation_table reltab; uint32_t reltab_len;
Elf32_Rel *reltab;
}; };
/* get the size of the section in bytes */
size_t sec_size(struct section *section);
/* get the index of a entry in bytes */
size_t sec_index(struct section *section, size_t index);
/* add a section entry to the section */
int sec_push(struct section *section, struct section_entry entry);
/* holds eachs section */
struct section_table {
// length and size of amount of sections
size_t len;
size_t size;
struct section *sections;
// the current section
struct section *current;
};
/* initalize the section table */
int sectab_init(struct section_table *sec_tbl);
/* free the section table */
void sectab_free(struct section_table *sec_tbl);
/* create a new section in the section table */
int sectab_alloc(struct section_table *sec_tbl, struct section **sec,
const char name[MAX_LEX_LENGTH]);
/* get a section by name from the section table */
int sectab_get(struct section_table *sec_tbl, struct section **sec,
const char name[MAX_LEX_LENGTH]);
/// ///
/// assembler /// assembler
/// ///
struct assembler { struct assembler {
// the token lexer // the code generator
struct lexer lexer; struct generator gen;
// the expression parser
struct parser parser;
/// ELF tables /// symbol table
size_t symtab_shidx; size_t symtab_shidx;
struct symbol_table symtab; size_t symtab_len;
Elf32_Sym *symbols;
// sh string table
size_t strtab_shidx; size_t strtab_shidx;
struct str_table strtab; struct elf_str_table strtab;
// string table
size_t shstrtab_shidx; size_t shstrtab_shidx;
struct str_table shstrtab; struct elf_str_table shstrtab;
/// Segments /// sections
struct section_table sectab; uint32_t section_len;
uint32_t secalign; // align sections to 0x1000 when writing struct elf_section *sections;
/// program header
Elf32_Phdr *phdr;
uint32_t phdr_len;
/// section header /// section header
Elf32_Shdr *shdr; Elf32_Shdr *shdr;

812
masm/gen.c Normal file
View file

@ -0,0 +1,812 @@
#include <stdlib.h>
#include <merror.h>
#include <melf.h>
#include <mips32.h>
#include <mips32r6.h>
#include "tab.h"
#include "gen.h"
#include "parse.h"
///
/// section table
///
static void section_get_default_perm(struct section *sec, const char *name)
{
#define __LEN 7
static const struct perms {
char *name;
bool read;
bool write;
bool execute;
int alignment;
} defaults[__LEN] = {
{".text", true, false, true, 4},
{".code", true, false, true, 4},
{".data", true, true, false, 1},
{".stack", true, true, false, 1},
{".rodata", true, false, false, 1},
{".bss", true, true, false, 1},
{".robss", true, false, false, 1},
};
for (int i = 0; i < __LEN; i++) {
const struct perms *p = &defaults[i];
if (strcasecmp(name, p->name) != 0)
continue;
sec->read = p->read;
sec->write = p->write;
sec->execute = p->execute;
sec->align = p->alignment;
break;
}
}
static int section_get(struct generator *gen, struct section **res,
const struct string *const name)
{
/// find the section if it exists
for (size_t i = 0; i < gen->sections_len; i++) {
struct section *sec = &gen->sections[i];
if (sec->name.len != name->len)
continue;
if (strcmp(sec->name.str, name->str) != 0)
continue;
*res = sec;
return M_SUCCESS;
}
/// allocate a new one if it doesnt
size_t size = gen->sections_size ? gen->sections_size * 2 : 8;
void *new = realloc(gen->sections, size * sizeof(struct section));
if (new == NULL) {
PERROR("cannot realloc");
return M_ERROR;
}
gen->sections_size = size;
gen->sections = new;
struct section *sec = &gen->sections[gen->sections_len++];
// alloc reftab
if (reftab_init(&sec->reftab))
return M_ERROR;
// copy name
if (string_clone(&sec->name, name))
return M_ERROR;
// set defaults
sec->len = 0;
sec->size = 0;
sec->align = 1;
sec->data = NULL;
sec->read = true;
sec->write = true;
sec->execute = false;
section_get_default_perm(sec, name->str);
*res = sec;
return M_SUCCESS;
}
static int section_extend(struct section *section, size_t space)
{
size_t newlen = section->len + space;
if (newlen < section->size)
return M_SUCCESS;
size_t size = section->size ? section->size * 2 + newlen : newlen * 2;
void *new = realloc(section->data, size);
if (new == NULL) {
PERROR("cannot realloc");
return M_ERROR;
}
section->size = size;
section->data = new;
return M_SUCCESS;
}
static int section_push(struct section *section, void *data, size_t len)
{
size_t newlen = section->len + len;
size_t zeros = newlen % section->align;
if (zeros)
zeros = section->align - zeros;
if (section_extend(section, len + zeros))
return M_ERROR;
memset(section->data + section->len, 0, zeros);
memcpy(section->data + section->len + zeros, data, len);
section->len += len + zeros;
return M_SUCCESS;
}
static int section_zero(struct section *section, size_t len)
{
size_t zeros = section->len % section->align;
if (zeros)
zeros = section->align - zeros;
if (section_extend(section, len + zeros))
return M_ERROR;
memset(section->data + section->len, 0, len + zeros);
section->len += len + zeros;
return M_SUCCESS;
}
void section_free(struct section *section)
{
reftab_free(&section->reftab);
string_free(&section->name);
free(section->data);
}
///
/// generation functions
///
static void print_curr_line(struct generator *gen,
const struct expr *const expr)
{
int line = expr->line_no,
len = expr->byte_end - expr->byte_start,
nl = true,
c = EOF;
FILE *file = gen->parser.lexer.file;
fseek(file, expr->byte_start, SEEK_SET);
while (len--) {
c = getc(file);
if (c == EOF || c == '\0')
break;
if (nl) {
fprintf(stderr, "\t%d | ", line);
line++;
nl = false;
}
if (c == '\n')
nl = true;
putc(c, stderr);
}
}
static int gen_directive_whb(struct generator *gen, const void *data,
uint32_t count, uint32_t len)
{
// TODO: endianess
for (uint32_t i = 0; i < count; i++) {
void *ptr = (char *) data + (len * i);
if (section_push(gen->current, ptr, len))
return M_ERROR;
}
return M_SUCCESS;
}
static int gen_directive(struct generator *gen,
const struct expr *const e)
{
const struct expr_directive *const expr = &e->directive;
int res = M_SUCCESS;
switch (expr->type) {
case EXPR_DIRECTIVE_ALIGN:
if (expr->align < 1) {
ERROR("alignment cannot be zero");
print_curr_line(gen, e);
return M_ERROR;
}
gen->current->align = expr->align;
break;
case EXPR_DIRECTIVE_SPACE:
res = section_zero(gen->current, expr->space);
break;
case EXPR_DIRECTIVE_WORD:
res = gen_directive_whb(gen, expr->words, expr->len,
sizeof(uint32_t));
break;
case EXPR_DIRECTIVE_HALF:
res = gen_directive_whb(gen, expr->halfs, expr->len,
sizeof(uint16_t));
break;
case EXPR_DIRECTIVE_BYTE:
res = gen_directive_whb(gen, expr->bytes, expr->len,
sizeof(uint8_t));
break;
case EXPR_DIRECTIVE_SECTION:
res = section_get(gen, &gen->current, &expr->section);
break;
case EXPR_DIRECTIVE_EXTERN: {
struct symbol *sym;
res = symtab_find_or_stub(&gen->symtab, &sym, &expr->label);
if (res == M_SUCCESS)
sym->type = SYM_EXTERN;
break;
}
case EXPR_DIRECTIVE_GLOBL: {
struct symbol *sym;
res = symtab_find_or_stub(&gen->symtab, &sym, &expr->label);
if (res == M_SUCCESS)
sym->type = SYM_GLOBAL;
break;
}
case EXPR_DIRECTIVE_ASCII:
res = section_push(gen->current, expr->string.str,
expr->string.len - 1);
break;
case EXPR_DIRECTIVE_ASCIIZ:
res = section_push(gen->current, expr->string.str,
expr->string.len);
break;
}
return res;
}
static int gen_constant(struct generator *gen, struct expr_const *const expr)
{
(void) gen;
(void) expr;
ERROR("constants not yet implemented");
return M_ERROR;
}
static enum grammer_type get_gmr_type(const char *name, size_t *len)
{
#define CHK(part, str) { \
if (strncasecmp(str, name, strlen(str)) == 0) { \
*len = strlen(str); \
return GMR_ ##part; \
}} \
CHK(RD, "rd")
CHK(RS, "rs")
CHK(RT, "rt")
CHK(IMMD, "immd")
CHK(OFFSET_BASE, "offset(base)")
CHK(OFFSET, "offset")
CHK(TARGET, "target")
CHK(HI, "hi")
CHK(LO, "lo")
#undef CHK
ERROR("!!! BUG: this should never hit !!!");
exit(1);
}
static int parse_register(enum mips32_register *reg, struct string *name)
{
int len = name->len;
int c0 = len > 0 ? name->str[0] : '\0',
c1 = len > 1 ? name->str[1] : '\0',
c2 = len > 2 ? name->str[2] : '\0',
c3 = len > 3 ? name->str[3] : '\0';
// $zero
if (c0 == 'z') {
if (c1 == 'e' && c2 == 'r' && c3 == 'o') {
*reg = MIPS32_REG_ZERO;
return M_SUCCESS;
}
}
// $a0-a3 $at
else if (c0 == 'a') {
if (c1 == 't') {
*reg = MIPS32_REG_AT;
return M_SUCCESS;
}
if (c1 >= '0' && c1 <= '3') {
*reg = MIPS32_REG_A0;
*reg += c1 - '0';
return M_SUCCESS;
}
}
// $v0-v1
else if (c0 == 'v') {
if (c1 >= '0' && c1 <= '1') {
*reg = MIPS32_REG_V0;
*reg += c1 - '0';
return M_SUCCESS;
}
}
// $t0-t9
else if (c0 == 't') {
if (c1 >= '0' && c1 <= '7') {
*reg = MIPS32_REG_T0;
*reg += c1 - '0';
return M_SUCCESS;
}
// reg T8-T9 are not in order with T0-T7
if (c1 >= '8' && c1 <= '9') {
*reg = MIPS32_REG_T8;
*reg += c1 - '8';
return M_SUCCESS;
}
}
// $s0-s7 $sp
else if (c0 == 's') {
if (c1 >= '0' && c1 <= '7') {
*reg = MIPS32_REG_S0;
*reg += c1 - '0';
return M_SUCCESS;
}
if (c1 == 'p') {
*reg = MIPS32_REG_SP;
return M_SUCCESS;
}
}
// $k0-k1
else if (c0 == 'k') {
if (c1 >= '0' && c1 <= '1') {
*reg = MIPS32_REG_K0;
*reg += c1 - '0';
return M_SUCCESS;
}
}
// $gp
else if (c0 == 'g') {
if (c1 == 'p') {
*reg = MIPS32_REG_GP;
return M_SUCCESS;
}
}
// $fp
else if (c0 == 'f') {
if (c1 == 'p') {
*reg = MIPS32_REG_FP;
return M_SUCCESS;
}
}
// $rp
else if (c0 == 'r') {
if (c1 == 'a') {
*reg = MIPS32_REG_RA;
return M_SUCCESS;
}
}
// $0-31 (non aliased register names)
else if (c0 >= '0' && c0 <= '9') {
int i = c0 - '0';
if (c1 >= '0' && c1 <= '9') {
i *= 10;
i += c1 - '0';
}
if (i <= 31) {
*reg = i;
return M_SUCCESS;
}
}
ERROR("unknown register $%.*s", name->len, name->str);
return M_ERROR;
}
static int gen_ins_read_state(struct generator *gen,
struct expr *const expr,
struct gen_ins_state *state,
struct mips32_grammer *grammer)
{
char *ptr = grammer->grammer;
uint32_t argi = 0;
// read values into state
while (*ptr != '\0') {
if (argi >= expr->instruction.args_len) {
ERROR("not enough arguments passed");
print_curr_line(gen, expr);
return M_ERROR;
}
struct expr_ins_arg *arg = &expr->instruction.args[argi++];
size_t skip;
switch (get_gmr_type(ptr, &skip)) {
case GMR_RD:
// rd
if (arg->type != EXPR_INS_ARG_REGISTER) {
ERROR("expected a register");
print_curr_line(gen, expr);
return M_ERROR;
}
if (parse_register(&state->rd, &arg->reg)) {
print_curr_line(gen, expr);
return M_ERROR;
}
break;
case GMR_RS:
// rs
if (arg->type != EXPR_INS_ARG_REGISTER) {
ERROR("expected a register");
print_curr_line(gen, expr);
return M_ERROR;
}
if (parse_register(&state->rs, &arg->reg)) {
print_curr_line(gen, expr);
return M_ERROR;
}
break;
case GMR_RT:
// rt
if (arg->type != EXPR_INS_ARG_REGISTER) {
ERROR("expected a register");
print_curr_line(gen, expr);
return M_ERROR;
}
if (parse_register(&state->rt, &arg->reg)) {
print_curr_line(gen, expr);
return M_ERROR;
}
break;
case GMR_IMMD:
// immd
if (arg->type != EXPR_INS_ARG_IMMEDIATE) {
ERROR("expected an immediate");
print_curr_line(gen, expr);
return M_ERROR;
}
state->immd = arg->immd;
break;
case GMR_OFFSET:
// offset
state->offset = 0;
if (arg->type == EXPR_INS_ARG_IMMEDIATE)
state->offset = arg->immd;
else if (arg->type == EXPR_INS_ARG_LABEL)
state->label = &arg->label;
else {
ERROR("invalid instruction");
print_curr_line(gen, expr);
return M_ERROR;
}
break;
case GMR_OFFSET_BASE:
// offset(base)
if (arg->type != EXPR_INS_ARG_OFFSET) {
ERROR("expected an offset($base)");
print_curr_line(gen, expr);
return M_ERROR;
}
state->offset = arg->offset.immd;
if (parse_register(&state->base, &arg->offset.reg)) {
print_curr_line(gen, expr);
return M_ERROR;
}
break;
case GMR_TARGET:
// target
state->target = 0;
if (arg->type == EXPR_INS_ARG_IMMEDIATE)
state->target = arg->immd;
else if (arg->type == EXPR_INS_ARG_LABEL)
state->label = &arg->label;
else {
ERROR("invalid instruction");
print_curr_line(gen, expr);
return M_ERROR;
}
break;
default:
break;
}
// skip entry
ptr += skip;
// skip comma
if (*ptr == ',') {
ptr++;
continue;
} else if (*ptr == '\0') {
break;
} else {
ERROR("!! BUG3: invalid splitting char %c !!!", *ptr);
exit(1);
}
}
return M_SUCCESS;
}
static int gen_ins_write_state(
struct generator *gen,
union mips32_instruction ins, // the instruction to modify
struct gen_ins_state *state, // the current read state
char *grammer) // the gramemr to parse
{
char *ptr = grammer;
enum reference_type reftype = REF_NONE;
// read values into state
while (*ptr != '\0') {
// parse next dsl entry
size_t skip;
enum grammer_type gmr = get_gmr_type(ptr, &skip);
// check for dsl hardcoded register argument
bool hardcoded = false;
enum mips32_register hard_reg;
if (*(ptr + skip) == '=') {
// parse argument
char *rptr = ptr + skip + 2;
hardcoded = true;
struct string regname;
string_bss(&regname, rptr);
if (parse_register(&hard_reg, &regname)) {
ERROR("!!! BUG2: this should never hit !!!");
exit(1);
}
}
// skip till next comma
for (;*ptr != '\0' && *ptr != ','; ptr++);
if (*ptr == ',')
ptr++;
switch (gmr) {
case GMR_RD:
ins.rd = hardcoded ? hard_reg : state->rd;
break;
case GMR_RS:
ins.rs = hardcoded ? hard_reg : state->rs;
break;
case GMR_RT:
ins.rt = hardcoded ? hard_reg : state->rt;
break;
case GMR_IMMD:
ins.immd = state->immd;
break;
case GMR_OFFSET:
ins.offset = state->offset;
reftype = REF_MIPS_16;
break;
case GMR_OFFSET_BASE:
ins.offset = state->offset;
ins.rs = state->base;
reftype = REF_MIPS_16;
break;
case GMR_TARGET:
ins.target = state->target;
reftype = REF_MIPS_26;
break;
case GMR_HI:
ins.immd = state->target >> 16;
reftype = REF_MIPS_HI16;
break;
case GMR_LO:
ins.immd = state->target & 0x0000FFFF;
reftype = REF_MIPS_LO16;
break;
}
}
// get offset for reference (if needed)
uint32_t offset = gen->current->len;
size_t zeros = offset % gen->current->align;
if (zeros)
zeros = gen->current->align - zeros;
offset += zeros;
// write instructon to section
uint32_t raw = B32(ins.raw);
if (section_push(gen->current, &raw, sizeof(uint32_t))) {
return M_ERROR;
}
// create reference (if needed)
if (reftype != REF_NONE && state->label != NULL) {
struct symbol *sym;
if (symtab_find_or_stub(&gen->symtab, &sym, state->label))
return M_ERROR;
struct reference ref = {
.type = reftype,
.symbol = sym,
.offset = offset
};
if (reftab_push(&gen->current->reftab, &ref)) {
return M_ERROR;
}
}
return M_SUCCESS;
}
static int gen_ins(struct generator *gen, struct expr *const expr)
{
struct mips32_grammer *grammer = NULL;
for (uint32_t i = 0; i < gen->grammers_len; i++) {
struct mips32_grammer *temp = &gen->grammers[i];
if (strcasecmp(temp->name, expr->instruction.name.str) != 0)
continue;
grammer = temp;
break;
}
if (grammer == NULL) {
ERROR("unknown instruction");
print_curr_line(gen, expr);
return M_ERROR;
}
struct gen_ins_state state;
state.label = NULL;
// read in the values from the parser
if (gen_ins_read_state(gen, expr, &state, grammer))
return M_ERROR;
// write the values into the instructions
// ...and then the sections
if (grammer->pseudo_len > 0) {
// write pseudo
for (int i = 0; i < grammer->pseudo_len; i++) {
union mips32_instruction ins = gen->instructions[
grammer->pseudo_grammer[i].enum_index];
if (gen_ins_write_state(gen, ins, &state,
grammer->pseudo_grammer[i].update))
return M_ERROR;
}
} else {
// write real
union mips32_instruction ins
= gen->instructions[grammer->enum_index];
if (gen_ins_write_state(gen, ins, &state, grammer->grammer))
return M_ERROR;
}
return M_SUCCESS;
}
static int gen_label(struct generator *gen, struct string *const label)
{
uint32_t offset = gen->current->len;
ptrdiff_t secidx = gen->current - gen->sections;
size_t zeros = offset % gen->current->align;
if (zeros)
zeros = gen->current->align - zeros;
offset += zeros;
struct symbol *sym;
/* update existing symbol (if exists) */
if (symtab_find(&gen->symtab, &sym, label->str) == M_SUCCESS) {
if (sym->secidx != SYM_SEC_STUB) {
// symbols that are not labeled stub are fully defined,
// it is a error to redefine them
ERROR("redefined symbol '%s'", label->str);
return M_ERROR;
}
sym->secidx = secidx;
sym->offset = offset;
/* create a new symbol */
} else {
struct symbol new = {
.secidx = secidx,
.offset = offset,
.type = SYM_LOCAL,
};
if (string_clone(&new.name, label))
return M_ERROR;
if (symtab_push(&gen->symtab, &new)) {
string_free(&new.name);
return M_ERROR;
}
}
return M_SUCCESS;
}
/* run codegen */
static int generate(struct generator *gen)
{
struct expr expr;
int res = M_SUCCESS;
// get the next expression
if ((res = parser_next(&gen->parser, &expr)))
return res;
// if its not a segment directive
// (and we dont have a section)
// create the default
if ((
expr.type != EXPR_DIRECTIVE ||
expr.directive.type != EXPR_DIRECTIVE_SECTION) &&
gen->current == NULL) {
// create .data section
struct string temp = {
.str = ".data",
.len = 5,
.size = 5,
.allocated = false
};
if (section_get(gen, &gen->current, &temp)) {
expr_free(&expr);
return M_ERROR;
}
}
res = M_SUCCESS;
switch (expr.type) {
case EXPR_DIRECTIVE:
res = gen_directive(gen, &expr);
break;
case EXPR_CONSTANT:
res = gen_constant(gen, &expr.constant);
break;
case EXPR_INS:
res = gen_ins(gen, &expr);
break;
case EXPR_LABEL:
res = gen_label(gen, &expr.label);
break;
}
expr_free(&expr);
return res;
}
/* run codegen with the mips32r6 specification */
int generate_mips32r6(struct generator *gen)
{
gen->instructions_len = __MIPS32R6_INS_LEN;
gen->instructions = mips32r6_instructions;
gen->grammers_len = __MIPS32R6_GRAMMER_LEN;
gen->grammers = mips32r6_grammers;
int res;
while (res = generate(gen), 1) {
if (res == M_ERROR)
return M_ERROR;
if (res == M_EOF)
break;
}
return M_SUCCESS;
}
int generator_init(const char *file, struct generator *gen)
{
if (parser_init(file, &gen->parser))
return M_ERROR;
if (symtab_init(&gen->symtab))
return M_ERROR;
gen->sections = NULL;
gen->sections_len = 0;
gen->sections_size = 0;
return M_SUCCESS;
}
void generator_free(struct generator *gen)
{
parser_free(&gen->parser);
symtab_free(&gen->symtab);
for (size_t i = 0; i < gen->sections_len; i++)
section_free(&gen->sections[i]);
free(gen->sections);
}

118
masm/gen.h Normal file
View file

@ -0,0 +1,118 @@
/* Copyright (c) 2024 Freya Murphy */
#ifndef __GEN_H__
#define __GEN_H__
#include <mlimits.h>
#include <mips32.h>
#include <stdint.h>
#include "parse.h"
#include "tab.h"
// predefine
struct generator;
///
/// a section
///
struct section {
// name
struct string name;
// alignment
size_t align;
// data
char *data;
size_t len;
size_t size;
// permissions
bool read;
bool write;
bool execute;
/// reference table
struct reference_table reftab;
};
void section_free(struct section *section);
///
/// instruction generation state
///
struct gen_ins_state {
// rd,rst,rt
enum mips32_register rd;
enum mips32_register rs;
enum mips32_register rt;
// immd
uint16_t immd;
// offset(base)
uint16_t offset;
enum mips32_register base;
// target
uint32_t target;
// current referencd label
struct string *label;
};
///
/// grammer type
///
enum grammer_type {
GMR_RD,
GMR_RS,
GMR_RT,
GMR_IMMD,
GMR_OFFSET,
GMR_OFFSET_BASE,
GMR_TARGET,
GMR_HI,
GMR_LO,
};
///
/// generates assembley
/// from a parser stream
///
struct generator {
struct parser parser;
// current instruction table
size_t instructions_len;
union mips32_instruction *instructions;
// current grammer table
size_t grammers_len;
struct mips32_grammer *grammers;
// segments
size_t sections_len;
size_t sections_size;
struct section *sections;
// current section
struct section *current;
// symbol table
struct symbol_table symtab;
};
/* generate the input as mips32r6 */
int generate_mips32r6(struct generator *gen);
/* initalize a generator */
int generator_init(const char *file, struct generator *gen);
/* free a generator */
void generator_free(struct generator *gen);
#endif /* __GEN_H__ */

View file

@ -2,6 +2,10 @@
#include <mlimits.h> #include <mlimits.h>
#include <merror.h> #include <merror.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/mman.h>
#include <sys/stat.h>
static struct { static struct {
int x; int x;
@ -46,64 +50,24 @@ static void skip_comment(struct lexer *lexer)
} }
} }
/* lexes text until whitespace
* returns error on zero length or too long */
static int lex_ident(struct lexer *lexer, char text[MAX_LEX_LENGTH])
{
int len = 0;
char *ptr = text;
int c;
while (1) {
c = lex_peek(lexer);
if (!(
(c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
(c == '_')
)) {
break;
}
// pop char out of lexer
lex_next(lexer);
if (len + 1 == MAX_LEX_LENGTH) {
ERROR_POS(pos, "ident has max length of %d",
MAX_LEX_LENGTH);
return M_ERROR;
}
*ptr++ = c;
len++;
}
if (len == 0) {
ERROR_POS(pos, "attempted to lex empty ident %d",
MAX_LEX_LENGTH);
return M_ERROR;
}
*ptr = '\0';
return M_SUCCESS;
}
/* lexes a string until closing quote /* lexes a string until closing quote
* returns error if string is too long or hit newline */ * returns error if string is too long or hit newline */
static int lex_string(struct lexer *lexer,char text[MAX_LEX_LENGTH]) static int lex_string(struct lexer *lexer, struct string *string)
{ {
int len = 0; char c;
char *ptr = text; string_init(string);
int c;
while (1) { while (1) {
c = lex_next(lexer); c = lex_next(lexer);
// stop on ending quote
if (c == '"') if (c == '"')
break; break;
// strings cannot span multiple lines // strings cannot span multiple lines
if (c == '\n') { if (c == '\n') {
ERROR_POS(pos, "reached newline before end of string"); ERROR_POS(pos, "reached newline before end of string");
string_free(string);
return M_ERROR; return M_ERROR;
} }
@ -129,20 +93,73 @@ static int lex_string(struct lexer *lexer,char text[MAX_LEX_LENGTH])
} }
} }
if (len + 1 == MAX_LEX_LENGTH) { // push char into string
ERROR_POS(pos, "string has max length of %d", if (string_push(string, c)) {
MAX_LEX_LENGTH); string_free(string);
return M_ERROR;
}
}
// null terminate string
if (string_push(string, '\0')) {
free(string->str);
return M_ERROR; return M_ERROR;
} }
*ptr++ = c;
len++;
}
*ptr = '\0';
return M_SUCCESS; return M_SUCCESS;
} }
/* lexes text until whitespace
* returns error on zero length or too long */
static int lex_ident(struct lexer *lexer, struct string *string,
char prefix)
{
char c;
string_init(string);
if (prefix != '\0' && string_push(string, prefix)) {
string_free(string);
return M_ERROR;
}
while (1) {
c = lex_peek(lexer);
if (!(
(c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
(c == '_')
)) {
break;
}
// pop char out of lexer
lex_next(lexer);
// push char into string
if (string_push(string, c)) {
free(string->str);
return M_ERROR;
}
}
// empty idents are not allowed
if (string->len < 1) {
string_free(string);
ERROR("empty ident tokens are not allowed");
return M_ERROR;
}
// null terminate string
if (string_push(string, '\0')) {
string_free(string);
return M_ERROR;
}
return M_SUCCESS;
}
/* lexes a integer number in base 2,8,10, or 16, /* lexes a integer number in base 2,8,10, or 16,
* uses base 10 by default but chan be changed by 0b, 0o, and 0x */ * uses base 10 by default but chan be changed by 0b, 0o, and 0x */
static int lex_number(struct lexer *lexer, int64_t *n) static int lex_number(struct lexer *lexer, int64_t *n)
@ -221,6 +238,7 @@ int lexer_next(struct lexer *lexer, struct token *token)
again: // use label to avoid whitespace recursion again: // use label to avoid whitespace recursion
token->x = lexer->x; token->x = lexer->x;
token->y = lexer->y; token->y = lexer->y;
token->off = ftell(lexer->file);
pos.x = lexer->x; pos.x = lexer->x;
pos.y = lexer->y; pos.y = lexer->y;
token->type = TOK_EOF; token->type = TOK_EOF;
@ -231,54 +249,80 @@ again: // use label to avoid whitespace recursion
switch (c) { switch (c) {
case EOF: case EOF:
// return a EOF token
case '\0': case '\0':
token->type = TOK_EOF; token->type = TOK_EOF;
break; break;
// skip the comment
// .. and return a NL token
case ';': case ';':
case '#': case '#':
skip_comment(lexer); skip_comment(lexer);
token->type = TOK_NL; token->type = TOK_NL;
break; break;
// skip the whitespace and
// try to parse the next character
case ' ': case ' ':
case '\t': case '\t':
// skip white space // skip white space
lex_next(lexer); lex_next(lexer);
goto again; goto again;
// return a NL token
case '\n': case '\n':
lex_next(lexer); lex_next(lexer);
token->type = TOK_NL; token->type = TOK_NL;
break; break;
// return a comma token
case ',': case ',':
lex_next(lexer); lex_next(lexer);
token->type = TOK_COMMA; token->type = TOK_COMMA;
break; break;
// return a equal token
case '=': case '=':
lex_next(lexer); lex_next(lexer);
token->type = TOK_EQUAL; token->type = TOK_EQUAL;
break; break;
// return a left paren token
case '(': case '(':
lex_next(lexer); lex_next(lexer);
token->type = TOK_LPAREN; token->type = TOK_LPAREN;
break; break;
// return a right paren token
case ')': case ')':
token->type = TOK_RPAREN; token->type = TOK_RPAREN;
lex_next(lexer); lex_next(lexer);
break; break;
// return a register token
case '$': case '$':
token->type = TOK_REG; token->type = TOK_REG;
lex_next(lexer); lex_next(lexer);
res = lex_ident(lexer, token->text); res = lex_ident(lexer, &token->string, '\0');
break; break;
// return a directive token
case '.': case '.':
token->type = TOK_DIRECTIVE; token->type = TOK_DIRECTIVE;
lex_next(lexer); lex_next(lexer);
res = lex_ident(lexer, token->text); res = lex_ident(lexer, &token->string, '.');
break; break;
// return a string token
case '"': case '"':
token->type = TOK_STRING; token->type = TOK_STRING;
lex_next(lexer); lex_next(lexer);
res = lex_string(lexer, token->text); res = lex_string(lexer, &token->string);
break; break;
// return a number token
case '-': case '-':
case '0': case '0':
case '1': case '1':
@ -293,35 +337,44 @@ again: // use label to avoid whitespace recursion
token->type = TOK_NUMBER; token->type = TOK_NUMBER;
res = lex_number(lexer, &token->number); res = lex_number(lexer, &token->number);
break; break;
// return a ident or label token depending
// if it ends with a colon
default: default:
token->type = TOK_IDENT; token->type = TOK_IDENT;
res = lex_ident(lexer, token->text); res = lex_ident(lexer, &token->string, '\0');
if (lex_peek(lexer) == ':') { if (lex_peek(lexer) == ':') {
lex_next(lexer); lex_next(lexer);
token->type = TOK_LABEL; token->type = TOK_LABEL;
} }
break; break;
} }
return res; return res;
} }
int lexer_init(const char *path, struct lexer *lexer) int lexer_init(const char *path, struct lexer *lexer)
{ {
FILE *file = fopen(path, "r"); /// defaults
if (file == NULL) { lexer->file = NULL;
PERROR("cannot read '%s'", path);
return M_ERROR;
}
lexer->file = file;
lexer->peek = EOF; lexer->peek = EOF;
lexer->x = 1; lexer->x = 1;
lexer->y = 1; lexer->y = 1;
/// load file
lexer->file = fopen(path, "r");
if (lexer->file == NULL) {
PERROR("cannot read");
return M_ERROR;
}
return M_SUCCESS; return M_SUCCESS;
} }
int lexer_free(struct lexer *lexer) void lexer_free(struct lexer *lexer)
{ {
return fclose(lexer->file); if (lexer->file)
fclose(lexer->file);
} }
char *token_str(enum token_type type) char *token_str(enum token_type type)
@ -355,6 +408,7 @@ char *token_str(enum token_type type)
return "unknown"; return "unknown";
} }
/* save the current state from the lexer */
void lexer_save(struct lexer *lexer, struct lexer_state *state) void lexer_save(struct lexer *lexer, struct lexer_state *state)
{ {
state->x = lexer->x; state->x = lexer->x;
@ -371,3 +425,18 @@ void lexer_load(struct lexer *lexer, const struct lexer_state *state)
lexer->peek = state->peek; lexer->peek = state->peek;
fseek(lexer->file, state->offset, SEEK_SET); fseek(lexer->file, state->offset, SEEK_SET);
} }
void token_free(struct token *token)
{
switch (token->type) {
case TOK_REG:
case TOK_IDENT:
case TOK_LABEL:
case TOK_STRING:
case TOK_DIRECTIVE:
if (token->string.str)
free(token->string.str);
break;
default:
}
}

View file

@ -7,13 +7,86 @@
#include <stdio.h> #include <stdio.h>
#include <stdint.h> #include <stdint.h>
struct lexer { /// represents a non null
FILE *file; /// terminated string
int peek; struct string {
int x; char *str;
int y; uint32_t len;
uint32_t size;
bool allocated;
}; };
/* initalize a string */
void string_init(struct string *string);
/* free a string */
void string_free(struct string *string);
/* clone a string, leave the old one */
int string_clone(struct string *dst, const struct string *const src);
/* move a string, delete the old one */
void string_move(struct string *dst, struct string *src);
/* pushes a char onto a string */
int string_push(struct string *string, char c);
/* load a string from the bss (not allocated) */
void string_bss(struct string *string, char *src);
enum token_type {
/// has no associated
/// data
TOK_COMMA,
TOK_EQUAL,
TOK_LPAREN,
TOK_RPAREN,
TOK_EOF,
TOK_NL,
/// uses number
TOK_NUMBER,
/// uses string
TOK_REG,
TOK_IDENT,
TOK_LABEL,
TOK_STRING,
TOK_DIRECTIVE,
};
/// represents a token
/// returned from the lexer
struct token {
/// type
enum token_type type;
/// position
int x, y;
/// pos in bytes
int off;
/// data
union {
int64_t number;
struct string string;
};
};
/* frees a token*/
void token_free(struct token *token);
/// holds the data
/// for the current lexer
struct lexer {
// the currently
// open file
FILE *file;
// the last character peeked
int peek;
// the current position
int x, y;
};
/// holds a previous state of a
/// lexer, which allows rebounding
struct lexer_state { struct lexer_state {
long offset; long offset;
int peek; int peek;
@ -21,36 +94,11 @@ struct lexer_state {
int y; int y;
}; };
enum token_type {
TOK_IDENT,
TOK_REG,
TOK_LABEL,
TOK_STRING,
TOK_COMMA,
TOK_EQUAL,
TOK_LPAREN,
TOK_RPAREN,
TOK_NUMBER,
TOK_EOF,
TOK_NL,
TOK_DIRECTIVE,
};
struct token {
enum token_type type;
union {
int64_t number;
char text[MAX_LEX_LENGTH];
};
int x;
int y;
};
/* initalize a lexer */ /* initalize a lexer */
int lexer_init(const char *file, struct lexer *lexer); int lexer_init(const char *file, struct lexer *lexer);
/* free the lxer */ /* free the lexer */
int lexer_free(struct lexer *lexer); void lexer_free(struct lexer *lexer);
/* lexes the next token, returns M_ERROR on error, /* lexes the next token, returns M_ERROR on error,
* and TOK_EOF on EOF */ * and TOK_EOF on EOF */

BIN
masm/out.o Normal file

Binary file not shown.

File diff suppressed because it is too large Load diff

View file

@ -6,89 +6,147 @@
#include "lex.h" #include "lex.h"
#include <mlimits.h> #include <mlimits.h>
#include <mips.h>
#include <stdint.h> #include <stdint.h>
/* mips directive types */ /// the type to a direcive
enum mips_directive_type { enum expr_directive_type {
MIPS_DIRECTIVE_ALIGN, EXPR_DIRECTIVE_ALIGN,
MIPS_DIRECTIVE_SPACE, EXPR_DIRECTIVE_SPACE,
MIPS_DIRECTIVE_WORD, EXPR_DIRECTIVE_WORD,
MIPS_DIRECTIVE_HALF, EXPR_DIRECTIVE_HALF,
MIPS_DIRECTIVE_BYTE, EXPR_DIRECTIVE_BYTE,
MIPS_DIRECTIVE_SECTION, EXPR_DIRECTIVE_SECTION,
MIPS_DIRECTIVE_EXTERN, EXPR_DIRECTIVE_EXTERN,
MIPS_DIRECTIVE_GLOBL, EXPR_DIRECTIVE_GLOBL,
MIPS_DIRECTIVE_ASCII, EXPR_DIRECTIVE_ASCII,
MIPS_DIRECTIVE_ASCIIZ, EXPR_DIRECTIVE_ASCIIZ,
}; };
/* mip32 directive */ /// holds a directive
struct mips_directive { struct expr_directive {
enum mips_directive_type type; // the type of the directive
uint32_t len; // used for words, halfs, bytes enum expr_directive_type type;
// lengh of .word, .half, or .byte directive
uint32_t len;
// directive data
union { union {
// e.g. align 2
uint16_t align; uint16_t align;
// e.g. space 4096
uint16_t space; uint16_t space;
// e.g. .word 0x1 0x2
uint32_t words[MAX_ARG_LENGTH]; uint32_t words[MAX_ARG_LENGTH];
uint16_t halfs[MAX_ARG_LENGTH]; uint16_t halfs[MAX_ARG_LENGTH];
uint8_t bytes[MAX_ARG_LENGTH]; uint8_t bytes[MAX_ARG_LENGTH];
char name[MAX_ARG_LENGTH]; // e.g. .ascii "hello world!"
struct string string;
// e.g. .globl main
struct string label;
// e.g. .text
struct string section;
}; };
}; };
struct reference { /// holds a constant expression
// ELF relocate type struct expr_const {
unsigned char type; // the name of the constant
struct string name;
/// symbol name // the value of the constant
char name[MAX_LEX_LENGTH]; uint32_t num;
/// integer addend
int64_t addend;
}; };
struct const_expr { /// the type to a right
char name[MAX_LEX_LENGTH]; /// hand side argument to an
uint32_t value; /// instruction
enum expr_ins_arg_type {
// e.g. $ra
EXPR_INS_ARG_REGISTER,
// e.g. 0x80
EXPR_INS_ARG_IMMEDIATE,
// e.g. main
EXPR_INS_ARG_LABEL,
// e.g. 4($sp)
EXPR_INS_ARG_OFFSET,
}; };
struct ins_expr { /// a right hand argument
/// to an instruction
struct expr_ins_arg {
enum expr_ins_arg_type type;
union {
// register
struct string reg;
// immediate
uint64_t immd;
// label
struct string label;
// offset
struct expr_ins_offset {
// immediate
uint64_t immd;
// register
struct string reg;
} offset;
};
};
/// holds a instruction
struct expr_ins {
/// pesudo instructions can return /// pesudo instructions can return
/// more than one instruction /// more than one instruction
size_t ins_len; struct string name;
struct mips_instruction ins[2];
/// instructions can reference symbols. // the arguments of the instruction
/// instruction `n` will be paried with reference `n` uint32_t args_len;
struct reference ref[2]; struct expr_ins_arg args[MAX_ARG_LENGTH];
}; };
enum expr_type { enum expr_type {
// e.g. .align 2
EXPR_DIRECTIVE, EXPR_DIRECTIVE,
// e.g. SIZE = 8
EXPR_CONSTANT, EXPR_CONSTANT,
// e.g. li $t0, 17
EXPR_INS, EXPR_INS,
// e.g. _start:
EXPR_LABEL, EXPR_LABEL,
}; };
struct expr { struct expr {
enum expr_type type; enum expr_type type;
uint32_t line_no;
uint32_t byte_start;
uint32_t byte_end;
union { union {
// directive // directive
struct mips_directive directive; struct expr_directive directive;
// constant // constant
struct const_expr constant; struct expr_const constant;
// instruction // instruction
struct ins_expr ins; struct expr_ins instruction;
// label // label
char label[MAX_LEX_LENGTH]; struct string label;
}; };
}; };
void expr_free(struct expr *expr);
struct parser { struct parser {
// the lexer // the lexer
// *weak* ponter, we do not own this // *weak* ponter, we do not own this
struct lexer *lexer; struct lexer lexer;
// the last token peeked // the last token peeked
struct token peek; struct token peek;
}; };
@ -97,7 +155,7 @@ struct parser {
int parser_next(struct parser *parser, struct expr *expr); int parser_next(struct parser *parser, struct expr *expr);
/* initalize the base parser */ /* initalize the base parser */
int parser_init(struct lexer *lexer, struct parser *parser); int parser_init(const char *file, struct parser *parser);
/* free the base parser */ /* free the base parser */
void parser_free(struct parser *parser); void parser_free(struct parser *parser);

43
masm/reftab.c Normal file
View file

@ -0,0 +1,43 @@
#include <stdlib.h>
#include <merror.h>
#include "tab.h"
#define REFTAB_INIT_LEN 8
int reftab_init(struct reference_table *reftab)
{
reftab->size = REFTAB_INIT_LEN;
reftab->len = 0;
reftab->references = malloc(sizeof(struct reference)
* REFTAB_INIT_LEN);
if (reftab->references == NULL) {
PERROR("cannot alloc");
return M_ERROR;
}
return M_SUCCESS;
}
void reftab_free(struct reference_table *reftab)
{
free(reftab->references);
}
int reftab_push(struct reference_table *reftab, struct reference *ref)
{
if (reftab->len >= reftab->size) {
reftab->size *= 2;
reftab->references = realloc(reftab->references,
sizeof(struct reference) * reftab->size);
if (reftab->references == NULL) {
PERROR("cannot realloc");
return M_ERROR;
}
}
reftab->references[reftab->len++] = *ref;
return M_SUCCESS;
}

View file

@ -1,43 +0,0 @@
#include <elf.h>
#include <stdlib.h>
#include <merror.h>
#include "asm.h"
#define RELTAB_INIT_LEN 8
int reltab_init(struct relocation_table *reltab)
{
reltab->size = RELTAB_INIT_LEN;
reltab->len = 0;
reltab->data = malloc(sizeof(Elf32_Rela) * RELTAB_INIT_LEN);
if (reltab->data == NULL) {
PERROR("cannot alloc");
return M_ERROR;
}
return M_SUCCESS;
}
void reltab_free(struct relocation_table *reltab)
{
free(reltab->data);
}
int reltab_push(struct relocation_table *reltab, const Elf32_Rela rel)
{
if (reltab->len >= reltab->size) {
reltab->size *= 2;
reltab->data = realloc(reltab->data, sizeof(Elf32_Rela)
* reltab->size);
if (reltab->data == NULL) {
PERROR("cannot realloc");
return M_ERROR;
}
}
reltab->data[reltab->len++] = rel;
return M_SUCCESS;
}

View file

@ -1,166 +0,0 @@
#include <string.h>
#include <stdlib.h>
#include <mips.h>
#include <merror.h>
#include <mlimits.h>
#include "asm.h"
#define SECTBL_INIT_LEN 8
static const char inital_section[MAX_LEX_LENGTH] = "data";
int sectab_init(struct section_table *sectab)
{
sectab->size = SECTBL_INIT_LEN;
sectab->len = 0;
sectab->sections = malloc(sizeof(struct section) * SECTBL_INIT_LEN);
if (sectab->sections == NULL) {
PERROR("cannot alloc");
return M_ERROR;
}
if (sectab_alloc(sectab, &sectab->current, inital_section))
return M_ERROR;
return M_SUCCESS;
}
void sectab_free(struct section_table *sectab)
{
for (size_t i = 0; i < sectab->len; i++) {
reltab_free(&sectab->sections[i].reltab);
free(sectab->sections[i].entries);
}
free(sectab->sections);
}
struct section_settings {
const char *name;
bool read;
bool write;
bool execute;
size_t align;
};
static struct section_settings default_section_settings[] = {
{"data", true, true, false, 1},
{"bss", true, true, false, 1},
{"rodata", true, false, false, 1},
{"text", true, false, true, 4},
};
int sectab_alloc(struct section_table *sectab, struct section **res,
const char name[MAX_LEX_LENGTH])
{
if (sectab->len >= sectab->size) {
sectab->size *= 2;
sectab->sections = realloc(sectab->sections,
sizeof(struct section) * sectab->size);
if (sectab->sections == NULL) {
PERROR("cannot realloc");
return M_ERROR;
}
}
/* set the sectio defaults */
struct section *sec;
sec = &sectab->sections[sectab->len];
strcpy(sec->name,name);
sec->len = 0;
sec->size = SECTBL_INIT_LEN;
sec->alignment = 1;
sec->read = true;
sec->write = true;
sec->execute = false;
sec->index = sectab->len;
sec->entries = malloc(sizeof(struct section_entry) * SECTBL_INIT_LEN);
if (reltab_init(&sec->reltab))
return M_ERROR;
/* overwrite the default if the given name has their own
* defaults */
for (int i = 0; i < 4; i++) {
struct section_settings *set = &default_section_settings[i];
if (strcmp(set->name, name) == 0) {
sec->read = set->read;
sec->write = set->write;
sec->execute = set->execute;
sec->alignment = set->align;
break;
}
}
if (sec->entries == NULL) {
PERROR("cannot alloc");
return M_ERROR;
}
sectab->len++;
*res = sec;
return M_SUCCESS;
}
int sectab_get(struct section_table *sectab, struct section **sec,
const char name[MAX_LEX_LENGTH])
{
for (size_t i = 0; i < sectab->len; i++) {
struct section *temp = &sectab->sections[i];
if (strcmp(name, temp->name) == 0) {
if (sec != NULL)
*sec = temp;
return M_SUCCESS;
}
}
return M_ERROR;
}
int sec_push(struct section *section, struct section_entry entry)
{
if (section->len >= section->size) {
section->size *= 2;
void *new = realloc(section->entries,
sizeof(struct section_entry) * section->size);
if (new == NULL) {
PERROR("cannot realloc");
return M_ERROR;
}
section->entries = new;
}
section->entries[section->len++] = entry;
return M_SUCCESS;
}
size_t sec_size(struct section *sec)
{
size_t n = 0;
for (size_t i = 0; i < sec->len; i++) {
size_t t = sec->entries[i].size;
size_t m = t % sec->alignment;
if (m)
t += sec->alignment - m;
n += t;
}
return n;
}
size_t sec_index(struct section *sec, size_t idx)
{
size_t n = 0;
for (size_t i = 0; i < idx; i++) {
size_t t = sec->entries[i].size;
size_t m = t % sec->alignment;
if (m)
t += sec->alignment - m;
n += t;
}
return n;
}

81
masm/string.c Normal file
View file

@ -0,0 +1,81 @@
#include <merror.h>
#include <stdlib.h>
#include "lex.h"
/* init a empty string buffer */
inline void string_init(struct string *string)
{
string->len = 0;
string->size = 0;
string->allocated = true;
string->str = NULL;
}
/* free a string buffer */
inline void string_free(struct string *string)
{
if (string->allocated && string->str)
free(string->str);
}
/* clone a string buffer */
inline int string_clone(struct string *dst, const struct string *const src)
{
dst->len = src->len;
dst->size = src->len;
dst->allocated = src->allocated;
/// bss strings do not need to be
/// malloced or copied
if (src->allocated == false) {
dst->str = src->str;
return M_SUCCESS;
}
dst->str = malloc(sizeof(char) * src->len);
if (dst->str == NULL) {
PERROR("cannot alloc");
return M_ERROR;
}
memcpy(dst->str, src->str, sizeof(char) * src->len);
return M_SUCCESS;
}
/* moves a string */
inline void string_move(struct string *dst, struct string *src)
{
dst->len = src->len;
dst->size = src->len;
dst->allocated = src->allocated;
dst->str = src->str;
// delete ptr in src
src->str = NULL;
}
/* pushes a char onto a string */
int string_push(struct string *string, char c)
{
if (string->len >= string->size) {
int len = string->size ? string->size * 2 : 8;
char *new = realloc(string->str, sizeof(char) + len);
if (new == NULL) {
PERROR("cannot realloc");
return M_ERROR;
}
string->size = len;
string->str = new;
}
string->str[string->len++] = c;
return M_SUCCESS;
}
void string_bss(struct string *string, char *src)
{
int len = strlen(src);
string->str = src;
string->len = len;
string->size = len;
string->allocated = false;
}

View file

@ -4,7 +4,7 @@
#include "asm.h" #include "asm.h"
int strtab_get_str(struct str_table *strtab, const char *str, size_t *res) int strtab_get_str(struct elf_str_table *strtab, const char *str, size_t *res)
{ {
for (size_t i = 0; i < strtab->size; i ++) { for (size_t i = 0; i < strtab->size; i ++) {
if (strcmp(strtab->ptr + i, str) == 0) { if (strcmp(strtab->ptr + i, str) == 0) {
@ -17,7 +17,7 @@ int strtab_get_str(struct str_table *strtab, const char *str, size_t *res)
return M_ERROR; return M_ERROR;
} }
int strtab_write_str(struct str_table *strtab, const char *str, size_t *res) int strtab_write_str(struct elf_str_table *strtab, const char *str, size_t *res)
{ {
if (strtab_get_str(strtab, str, res) == M_SUCCESS) if (strtab_get_str(strtab, str, res) == M_SUCCESS)
return M_SUCCESS; return M_SUCCESS;
@ -36,7 +36,7 @@ int strtab_write_str(struct str_table *strtab, const char *str, size_t *res)
return M_SUCCESS; return M_SUCCESS;
} }
int strtab_init(struct str_table *strtab) int strtab_init(struct elf_str_table *strtab)
{ {
strtab->size = 1; strtab->size = 1;
strtab->ptr = malloc(1); strtab->ptr = malloc(1);
@ -48,7 +48,7 @@ int strtab_init(struct str_table *strtab)
return M_SUCCESS; return M_SUCCESS;
} }
void strtab_free(struct str_table *strtab) void strtab_free(struct elf_str_table *strtab)
{ {
free(strtab->ptr); free(strtab->ptr);
} }

View file

@ -1,4 +1,3 @@
#include <elf.h>
#include <merror.h> #include <merror.h>
#include <netinet/in.h> #include <netinet/in.h>
#include <stddef.h> #include <stddef.h>
@ -6,7 +5,8 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "asm.h" #include "lex.h"
#include "tab.h"
#define SYMTBL_INIT_LEN 24 #define SYMTBL_INIT_LEN 24
@ -14,62 +14,76 @@ int symtab_init(struct symbol_table *symtab)
{ {
symtab->size = SYMTBL_INIT_LEN; symtab->size = SYMTBL_INIT_LEN;
symtab->len = 0; symtab->len = 0;
symtab->symbols = malloc(sizeof(Elf32_Sym) * SYMTBL_INIT_LEN); symtab->symbols = malloc(sizeof(struct symbol) * SYMTBL_INIT_LEN);
symtab->sections = malloc(sizeof(ssize_t) * SYMTBL_INIT_LEN);
if (symtab->symbols == NULL || symtab->sections == NULL) { if (symtab->symbols == NULL) {
PERROR("cannot alloc"); PERROR("cannot alloc");
return M_ERROR; return M_ERROR;
} }
Elf32_Sym null = {0};
if (symtab_push(symtab, null, -1))
return M_ERROR;
return M_SUCCESS; return M_SUCCESS;
} }
void symtab_free(struct symbol_table *symtab) void symtab_free(struct symbol_table *symtab)
{ {
for (uint32_t i = 0; i < symtab->len; i++)
string_free(&symtab->symbols[i].name);
free(symtab->symbols); free(symtab->symbols);
free(symtab->sections);
} }
int symtab_push(struct symbol_table *symtab, Elf32_Sym sym, ssize_t sec_idx) int symtab_push(struct symbol_table *symtab, struct symbol *sym)
{ {
if (symtab->len >= symtab->size) { if (symtab->len >= symtab->size) {
symtab->size *= 2; symtab->size *= 2;
symtab->symbols = realloc(symtab->symbols, symtab->symbols = realloc(symtab->symbols,
sizeof(Elf32_Sym) * symtab->size); sizeof(struct symbol) * symtab->size);
symtab->sections = realloc(symtab->sections, if (symtab->symbols == NULL) {
sizeof(ssize_t) * symtab->size);
if (symtab->symbols == NULL || symtab->sections == NULL) {
PERROR("cannot realloc"); PERROR("cannot realloc");
return M_ERROR; return M_ERROR;
} }
} }
symtab->symbols[symtab->len] = sym; sym->tabidx = symtab->len;
symtab->sections[symtab->len++] = sec_idx; symtab->symbols[symtab->len++] = *sym;
return M_SUCCESS; return M_SUCCESS;
} }
int symtab_find(struct symbol_table *symtab, Elf32_Sym **ptr, int symtab_find(struct symbol_table *symtab, struct symbol **res,
size_t *idx, const char name[MAX_LEX_LENGTH]) const char *name)
{ {
for (uint32_t i = 0; i < symtab->len; i++) { for (uint32_t i = 0; i < symtab->len; i++) {
Elf32_Sym *sym = &symtab->symbols[i]; struct symbol *sym = &symtab->symbols[i];
const char *str = &symtab->strtab->ptr[ntohl(sym->st_name)]; if (strcmp(sym->name.str, name) == 0) {
if (strcmp(str, name) == 0) { if (res != NULL)
if (ptr != NULL) *res = sym;
*ptr = sym;
ptrdiff_t diff = sym - symtab->symbols;
if (idx != NULL)
*idx = diff;
return M_SUCCESS; return M_SUCCESS;
} }
} }
return M_ERROR; return M_ERROR;
} }
int symtab_find_or_stub(struct symbol_table *symtab, struct symbol **res,
const struct string *const name)
{
if (symtab_find(symtab, res, name->str) == M_SUCCESS)
return M_SUCCESS;
struct symbol temp = {
.offset = 0,
.secidx = SYM_SEC_STUB,
.type = SYM_LOCAL,
};
if (string_clone(&temp.name, name))
return M_ERROR;
if (symtab_push(symtab, &temp)) {
string_free(&temp.name);
return M_ERROR;
}
if (res != NULL)
*res = &symtab->symbols[symtab->len - 1];
return M_SUCCESS;
}

98
masm/tab.h Normal file
View file

@ -0,0 +1,98 @@
/* Copyright (c) 2024 Freya Murphy */
#ifndef __TAB_H__
#define __TAB_H__
#include <stdint.h>
#include <stddef.h>
#include "lex.h"
///
/// Symbol table
///
#define SYM_SEC_STUB (UINT32_MAX)
enum symbol_type {
SYM_LOCAL,
SYM_GLOBAL,
SYM_EXTERN,
};
struct symbol {
// the offset of the symbol in a section
uint32_t offset;
// the index of section the symbol is in
uint32_t secidx;
// index into this table
uint32_t tabidx;
// the name of the symbol
struct string name;
// type
enum symbol_type type;
};
struct symbol_table {
// length in size in sym ammt
size_t len;
size_t size;
// symbols
struct symbol *symbols;
};
/* initalize a symbol table */
int symtab_init(struct symbol_table *symtab);
/* free the symbol table */
void symtab_free(struct symbol_table *symtab);
/* add a symbol to the symbol tbl */
int symtab_push(struct symbol_table *symtab, struct symbol *sym);
/* find a symbol by name in the symbol table */
int symtab_find(struct symbol_table *symtab, struct symbol **sym,
const char *name);
/* find an existing symbol with a name or stub a temp one */
int symtab_find_or_stub(struct symbol_table *symtab, struct symbol **sym,
const struct string *const name);
///
/// Reference table
///
enum reference_type {
REF_NONE,
REF_MIPS_16,
REF_MIPS_26,
REF_MIPS_PC16,
REF_MIPS_LO16,
REF_MIPS_HI16,
};
struct reference {
enum reference_type type;
struct symbol *symbol;
uint32_t offset;
};
struct reference_table {
// size
size_t len;
size_t size;
// references
struct reference *references;
};
/* initalize a reference table */
int reftab_init(struct reference_table *reftab);
/* free the reference table */
void reftab_free(struct reference_table *reftab);
/* add a reference to the reference tbl */
int reftab_push(struct reference_table *reftab, struct reference *ref);
#endif /* __TAB_H__ */