summaryrefslogtreecommitdiff
path: root/masm/parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'masm/parse.c')
-rw-r--r--masm/parse.c1459
1 files changed, 333 insertions, 1126 deletions
diff --git a/masm/parse.c b/masm/parse.c
index dbe6ade..b36aa1e 100644
--- a/masm/parse.c
+++ b/masm/parse.c
@@ -1,1326 +1,533 @@
#include <mlimits.h>
-#include <merror.h>
#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-#include <elf.h>
+#include <merror.h>
+#include <stddef.h>
-#include "parse.h"
#include "lex.h"
-#include "mips.h"
+#include "parse.h"
-#define B16(x) (x)
-#define B32(x) (x)
+///
+/// Token Functions
+/// either get a token, peek a token,
+/// or assert a token was returned
+///
+/* get the next token from the lexer */
static int next_token(struct parser *parser, struct token *tok)
{
+ // return peeked first
if (parser->peek.type != TOK_EOF) {
if (tok != NULL)
*tok = parser->peek;
+ else
+ token_free(&parser->peek);
+
parser->peek.type = TOK_EOF;
return M_SUCCESS;
}
+
+ // get next token
struct token token;
- if (lexer_next(parser->lexer, &token))
+ if (lexer_next(&parser->lexer, &token))
return M_ERROR;
- if (tok != NULL)
+
+ // return value if given pointer
+ // else free
+ if (tok != NULL) {
*tok = token;
+ } else {
+ token_free(&token);
+ }
+
return M_SUCCESS;
}
-
+/* peek the next token from the lexer */
static int peek_token(struct parser *parser, struct token *tok)
{
+ // if we dont have a saved token
+ // get the next one
if (parser->peek.type == TOK_EOF) {
if (next_token(parser, &parser->peek))
return M_ERROR;
}
+
+ // return it if we were given
+ // a pointer
if (tok != NULL)
*tok = parser->peek;
+
return M_SUCCESS;
}
-
+/* get the next token from the lexer, and assert its of type <type> */
static int assert_token(struct parser *parser, enum token_type type,
struct token *tok)
{
+ // get next token
struct token token;
if (next_token(parser, &token))
return M_ERROR;
+
+ // assert its of type <type>
if (token.type != type) {
ERROR_POS(token, "expected a token of type '%s', got '%s'",
token_str(type), token_str(token.type));
+ token_free(&token);
return M_ERROR;
}
- if (tok != NULL)
+
+ // return value if given pointer
+ // else free
+ if (tok != NULL) {
*tok = token;
+ } else {
+ token_free(&token);
+ }
+
return M_SUCCESS;
}
+/* get the next token from the lexer, and assert its of type NL */
static int assert_eol(struct parser *parser)
{
struct token token;
if (next_token(parser, &token))
return M_ERROR;
if (token.type != TOK_NL && token.type != TOK_EOF) {
- ERROR_POS(token, "expected a new line or end of file");
+ ERROR_POS(token, "expected a new line or end of file, got '%s'",
+ token_str(token.type));
return M_ERROR;
}
+ token_free(&token);
return M_SUCCESS;
}
-/* each instruction has a given parse format
- * internal to the parser */
-enum mips_parse_format {
- // register type: rs, rt, td
- MIPS_PARSE_R,
- // register type: rs, rt
- MIPS_PARSE_R2,
- // register type: rd
- MIPS_PARSE_RD,
- // register type: rs
- MIPS_PARSE_RS,
- // imeediate type: rs, rt, immd
- MIPS_PARSE_I,
- // jump type: offset
- MIPS_PARSE_J,
- // offset 16b type: offset
- MIPS_PARSE_O16,
- // offset 26b type: offset
- MIPS_PARSE_O26,
- // breanch equal type: rs, rt, offset
- MIPS_PARSE_BE,
- // branch zero type: rs, offset
- MIPS_PARSE_BZ,
- // store and load: rt, offset(base)
- MIPS_PARSE_SL,
- // store and load immediate: rt, immediate
- MIPS_PARSE_SLI,
- // shift: rd, rt, sa
- MIPS_PARSE_S,
- // shift variable: rd, rt, rs
- MIPS_PARSE_SV,
- // none:
- MIPS_PARSE_NONE,
-};
-
-#define FORMAT(ins, format) \
- [MIPS_INS_##ins] = MIPS_PARSE_##format, \
-
-const enum mips_parse_format mips_parse_formats[] = {
- FORMAT(ADD, R)
- FORMAT(ADDI, I)
- FORMAT(ADDIU, I)
- FORMAT(ADDU, R)
- FORMAT(AND, R)
- FORMAT(ANDI, I)
- FORMAT(BAL, O16)
- FORMAT(BALC, O26)
- FORMAT(BC, O26)
- FORMAT(BEQ, BE)
- FORMAT(BEQL, BE)
- FORMAT(BGEZ, BZ)
- FORMAT(BGEZAL, BZ)
- FORMAT(BGEZALL, BZ)
- FORMAT(BGEZL, BZ)
- FORMAT(BGTZ, BZ)
- FORMAT(BGTZL, BZ)
- FORMAT(BLEZ, BZ)
- FORMAT(BLEZL, BZ)
- FORMAT(BLTZ, BZ)
- FORMAT(BLTZAL, BZ)
- FORMAT(BLTZALL, BZ)
- FORMAT(BLTZL, BZ)
- FORMAT(BNE, BE)
- FORMAT(BNEL, BE)
- FORMAT(DIV, R)
- FORMAT(MOD, R)
- FORMAT(DIVU, R)
- FORMAT(MODU, R)
- FORMAT(J, J)
- FORMAT(JAL, J)
- FORMAT(JALR, RS) // TODO: handle rd
- FORMAT(JALX, J)
- FORMAT(JR, RS)
- FORMAT(LB, SL)
- FORMAT(LBU, SL)
- FORMAT(LH, SL)
- FORMAT(LHU, SL)
- FORMAT(LUI, SLI)
- FORMAT(LW, SL)
- FORMAT(MFHI, RD)
- FORMAT(MFLO, RD)
- FORMAT(MTHI, RS)
- FORMAT(MTLO, RS)
- FORMAT(MUL, R)
- FORMAT(MUH, R)
- FORMAT(MULU, R)
- FORMAT(MUHU, R)
- FORMAT(SB, SL)
- FORMAT(SH, SL)
- FORMAT(SW, SL)
- FORMAT(SLL, S)
- FORMAT(SLLV, SV)
- FORMAT(SLT, R)
- FORMAT(SLTI, I)
- FORMAT(SLTIU, I)
- FORMAT(SLTU, R)
- FORMAT(SRA, S)
- FORMAT(SRAV, SV)
- FORMAT(SRL, S)
- FORMAT(SRLV, SV)
- FORMAT(SYSCALL, NONE)
- FORMAT(OR, R)
- FORMAT(ORI, I)
- FORMAT(NOR, R)
- FORMAT(SUB, R)
- FORMAT(SUBU, R)
- FORMAT(XOR, R)
- FORMAT(XORI, I)
-};
-
-#undef FORMAT
-
-#define MAX5 (1 << 5)
-#define MAX16 (1 << 16)
-#define MAX26 (1 << 25)
-#define MAX32 (1 << 31)
-
-static int get_reference(struct parser *parser, uint64_t *offset,
- struct reference *ref, unsigned char type)
+/* peek the next token and return SUCCESS on eol */
+static int peek_eol(struct parser *parser)
{
struct token token;
-
- if (next_token(parser, &token))
- return M_ERROR;
-
- if (token.type == TOK_NUMBER) {
-
- *offset = token.number;
- return M_SUCCESS;
- }
-
- if (token.type != TOK_IDENT) {
- ERROR_POS(token, "unexpected token of type '%s'",
- token_str(token.type));
- return M_ERROR;
- }
-
- strcpy(ref->name, token.text);
- ref->type = type;
- ref->addend = 0;
-
- // return zero for now
- *offset = 0;
- return M_SUCCESS;
-}
-
-static int get_offset(struct parser *parser, int32_t *offset,
- struct reference *ref)
-{
- uint64_t off;
- if (get_reference(parser, &off, ref, R_MIPS_PC16))
- return M_ERROR;
-
- if (off % 4) {
- ERROR_POS((*parser->lexer), "cannot use offset of '%ld', must "
- "be divisble by four", off);
- return M_ERROR;
- }
-
- if (off > MAX16) {
- ERROR("offset '%d' cannot be larger than 16 bits", off);
- return M_ERROR;
- }
-
- *offset = off;
- return M_SUCCESS;
-}
-
-static int get_offset_26(struct parser *parser, int32_t *offset,
- struct reference *ref)
-{
- uint64_t off;
- if (get_reference(parser, &off, ref, R_MIPS_PC26_S2))
- return M_ERROR;
-
- if (off % 4) {
- ERROR_POS((*parser->lexer), "cannot use offset of '%ld', must "
- "be divisble by four", off);
- return M_ERROR;
- }
-
- if (off > MAX26) {
- ERROR("offset '%d' cannot be larger than 26 bits", off);
- return M_ERROR;
- }
-
- *offset = off;
- return M_SUCCESS;
-}
-
-static int get_target(struct parser *parser, uint32_t *offset,
- struct reference *ref)
-{
- uint64_t off;
- if (get_reference(parser, &off, ref, R_MIPS_26))
- return M_ERROR;
-
- if (off > MAX26) {
- ERROR("target '%d' cannot be larger than 26 bits", off);
+ if (peek_token(parser, &token))
return M_ERROR;
- }
-
- *offset = off;
- return M_SUCCESS;
+ int res = (token.type == TOK_NL || token.type == TOK_EOF) ?
+ M_SUCCESS : M_ERROR;
+ return res;
}
-static int get_instruction(const char *ident, struct mips_instruction *res)
-{
- for (int i = 0; i < __MIPS_INS_LEN; i++) {
- struct mips_instruction ins =
- mips_instructions[i];
- if (strcasecmp(ident, ins.name) == 0) {
- if (res != NULL)
- *res = ins;
- return M_SUCCESS;
- }
- }
- return M_ERROR;
-}
+///
+/// PARSER FUNCTIONS
+/// parses each type of expression
+///
-static int parse_register(struct parser *parser, enum mips_register *reg)
+static int parse_directive_whb(struct parser *parser, void *data, uint32_t *res,
+ size_t length, size_t max_size)
{
struct token token;
- if (assert_token(parser, TOK_REG, &token))
- return M_ERROR;
-
- int len = strlen(token.text);
- int c0 = len > 0 ? token.text[0] : '\0',
- c1 = len > 1 ? token.text[1] : '\0',
- c2 = len > 2 ? token.text[2] : '\0',
- c3 = len > 3 ? token.text[3] : '\0';
-
- // $zero
- if (c0 == 'z') {
- if (c1 == 'e' && c2 == 'r' && c3 == 'o') {
- *reg = MIPS_REG_ZERO;
- return M_SUCCESS;
- }
- }
-
- // $a0-a3 $at
- else if (c0 == 'a') {
- if (c1 == 't') {
- *reg = MIPS_REG_AT;
- return M_SUCCESS;
- }
- if (c1 >= '0' && c1 <= '3') {
- *reg = MIPS_REG_A0;
- *reg += c1 - '0';
- return M_SUCCESS;
- }
- }
-
- // $v0-v1
- else if (c0 == 'v') {
- if (c1 >= '0' && c1 <= '1') {
- *reg = MIPS_REG_V0;
- *reg += c1 - '0';
- return M_SUCCESS;
- }
- }
-
- // $t0-t9
- else if (c0 == 't') {
- if (c1 >= '0' && c1 <= '7') {
- *reg = MIPS_REG_T0;
- *reg += c1 - '0';
- return M_SUCCESS;
- }
- // reg T8-T9 are not in order with T0-T7
- if (c1 >= '8' && c1 <= '9') {
- *reg = MIPS_REG_T8;
- *reg += c1 - '8';
- return M_SUCCESS;
- }
- }
-
- // $s0-s7 $sp
- else if (c0 == 's') {
- if (c1 >= '0' && c1 <= '7') {
- *reg = MIPS_REG_S0;
- *reg += c1 - '0';
- return M_SUCCESS;
- }
- if (c1 == 'p') {
- *reg = MIPS_REG_SP;
- return M_SUCCESS;
- }
- }
+ int len = 0;
- // $k0-k1
- else if (c0 == 'k') {
- if (c1 >= '0' && c1 <= '1') {
- *reg = MIPS_REG_K0;
- *reg += c1 - '0';
- return M_SUCCESS;
- }
- }
+ while (1) {
+ if (peek_eol(parser) == M_SUCCESS)
+ break;
- // $gp
- else if (c0 == 'g') {
- if (c1 == 'p') {
- *reg = MIPS_REG_GP;
- return M_SUCCESS;
- }
- }
+ if (assert_token(parser, TOK_NUMBER, &token))
+ return M_ERROR;
- // $fp
- else if (c0 == 'f') {
- if (c1 == 'p') {
- *reg = MIPS_REG_FP;
- return M_SUCCESS;
+ if ((uint64_t)token.number > max_size) {
+ ERROR_POS(token, "number cannot exceed max size of %zu",
+ max_size);
+ return M_ERROR;
}
- }
- // $rp
- else if (c0 == 'r') {
- if (c1 == 'a') {
- *reg = MIPS_REG_RA;
- return M_SUCCESS;
+ if (len >= MAX_ARG_LENGTH) {
+ ERROR_POS(token, "exceeded max argument length for "
+ "directives");
+ return M_ERROR;
}
- }
- // $0-31 (non aliased register names)
- else if (c0 >= '0' && c0 <= '9') {
- int i = c0 - '0';
- if (c1 >= '0' && c1 <= '9') {
- i *= 10;
- i += c1 - '0';
- }
- if (i <= 31) {
- *reg = i;
- return M_SUCCESS;
- }
+ // BUG: does this only work on little endian???
+ memcpy((uint8_t *) data + (len++ * length), &token.number,
+ max_size);
}
- ERROR_POS(token, "unknown register $%s", token.text);
- return M_ERROR;
-}
-
-static int get_reg_offset(struct parser *parser,
- struct ins_expr *expr)
-{
- struct token token;
- enum mips_register reg;
-
- struct mips_instruction *fi = &expr->ins[0];
- struct mips_instruction *si = &expr->ins[1]; // possibly pseudo
- struct reference *fr = &expr->ref[0];
- struct reference *sr = &expr->ref[1];
-
- expr->ins_len = 1;
- fr->type = R_MIPS_NONE;
-
-// =============================================
-
- // defaults
- fi->data.rs = MIPS_REG_ZERO;
- fi->data.immd = 0;
-
- if (peek_token(parser, &token))
- return M_ERROR;
-
- if (token.type == TOK_IDENT)
- goto label;
- else if (token.type == TOK_LPAREN)
- goto reg;
- else
- goto off;
-
-// =============================================
-
-label:
-
- next_token(parser, &token);
-
- expr->ins_len = 2;
-
- // move over first instruction to add in a LUI
- *si = *fi;
- si->data.rs = MIPS_REG_AT;
- si->data.offset = 0;
-
- // update LUI
- *fi = mips_instructions[MIPS_INS_LUI];
- fi->data.rt = MIPS_REG_AT;
- fi->data.immd = 0;
-
- // update references
- strcpy(fr->name, token.text);
- fr->type = R_MIPS_HI16;
- fr->addend = 0;
- strcpy(sr->name, token.text);
- sr->type = R_MIPS_LO16;
- sr->addend = 0;
-
- goto end;
-
-// =============================================
-
-off:
-
- if (assert_token(parser, TOK_NUMBER, &token))
- return M_ERROR;
- fi->data.immd = B16(token.number);
-
- if (peek_token(parser, &token))
- return M_ERROR;
-
- if (token.type == TOK_LPAREN)
- goto reg;
- else
- goto end;
-
-// =============================================
-
-reg:
- if (assert_token(parser, TOK_LPAREN, NULL))
- return M_ERROR;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- fi->data.rs = reg;
-
- if (assert_token(parser, TOK_RPAREN, NULL))
- return M_ERROR;
-
-// =============================================
-end:
- if (peek_token(parser, &token))
- return M_ERROR;
-
+ *res = len;
return M_SUCCESS;
}
-static int parse_number(struct parser *parser, uint32_t *n, uint32_t max)
+static int parse_immd(struct parser *parser, uint16_t *num)
{
struct token token;
if (assert_token(parser, TOK_NUMBER, &token))
return M_ERROR;
- if (max && token.number > max) {
- ERROR_POS(token, "number cannot be larger than '%d'", max);
- return M_ERROR;
- }
- *n = token.number;
- return M_SUCCESS;
-}
-
-static int parse_instruction_r(struct parser *parser,
- struct mips_instruction *ins)
-{
- // format: rs, rt, rd
- enum mips_register reg;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rd = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rs = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rt = reg;
-
- return M_SUCCESS;
-}
-
-static int parse_instruction_r2(struct parser *parser,
- struct mips_instruction *ins)
-{
- // format: rs, rt
- enum mips_register reg;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rs = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rt = reg;
-
- return M_SUCCESS;
-}
-
-static int parse_instruction_rs(struct parser *parser,
- struct mips_instruction *ins)
-{
- // format: rs
- enum mips_register reg;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rs = reg;
+ // TOK_NUMBER does not need to be freed
+ *num = token.number;
return M_SUCCESS;
}
-static int parse_instruction_rd(struct parser *parser,
- struct mips_instruction *ins)
+static int parse_ident(struct parser *parser, struct string *ident)
{
- // format: rd
- enum mips_register reg;
-
- if (parse_register(parser, &reg))
+ struct token token;
+ if (assert_token(parser, TOK_IDENT, &token))
return M_ERROR;
- ins->data.rd = reg;
-
+ string_move(ident, &token.string);
return M_SUCCESS;
}
-static int parse_instruction_i(struct parser *parser,
- struct mips_instruction *ins)
+static int parse_string(struct parser *parser, struct string *string)
{
- // format: rs, rt, immd
- enum mips_register reg;
struct token token;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rt = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rs = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (assert_token(parser, TOK_NUMBER, &token))
- return M_ERROR;
-
- if (token.number >= MAX16)
+ if (assert_token(parser, TOK_STRING, &token))
return M_ERROR;
- ins->data.immd = B16(token.number);
-
+ string_move(string, &token.string);
return M_SUCCESS;
}
-static int parse_instruction_offset(struct parser *parser,
- uint32_t max,
- struct mips_instruction *ins,
- struct reference *ref)
+/* parses a directive */
+static int parse_directive(struct parser *parser, struct string *name,
+ struct expr_directive *expr)
{
- int32_t n;
+ #define CHK(n) if (strcmp(name->str, #n) == 0)
- switch (max) {
- case MAX26:
- if (get_offset_26(parser, &n, ref))
- return M_ERROR;
- ins->data.offs26 = B32(n);
- break;
- case MAX16:
- if (get_offset(parser, &n, ref))
- return M_ERROR;
- ins->data.offset = B16(n);
- break;
- default:
- return M_ERROR;
+ CHK(.align) {
+ expr->type = EXPR_DIRECTIVE_ALIGN;
+ return parse_immd(parser, &expr->align);
+ } else CHK(.space) {
+ expr->type = EXPR_DIRECTIVE_SPACE;
+ return parse_immd(parser, &expr->space);
+ } else CHK(.word) {
+ expr->type = EXPR_DIRECTIVE_WORD;
+ return parse_directive_whb(parser, expr->words, &expr->len,
+ sizeof(uint32_t), UINT32_MAX);
+ } else CHK(.half) {
+ expr->type = EXPR_DIRECTIVE_HALF;
+ return parse_directive_whb(parser, expr->halfs, &expr->len,
+ sizeof(uint16_t), UINT16_MAX);
+ } else CHK(.byte) {
+ expr->type = EXPR_DIRECTIVE_BYTE;
+ return parse_directive_whb(parser, expr->bytes, &expr->len,
+ sizeof(uint8_t), UINT8_MAX);
+ } else CHK(.extern) {
+ expr->type = EXPR_DIRECTIVE_EXTERN;
+ return parse_ident(parser, &expr->label);
+ } else CHK(.globl) {
+ expr->type = EXPR_DIRECTIVE_GLOBL;
+ return parse_ident(parser, &expr->label);
+ } else CHK(.ascii) {
+ expr->type = EXPR_DIRECTIVE_ASCII;
+ return parse_string(parser, &expr->string);
+ } else CHK(.asciiz) {
+ expr->type = EXPR_DIRECTIVE_ASCIIZ;
+ return parse_string(parser, &expr->string);
+ } else {
+ expr->type = EXPR_DIRECTIVE_SECTION;
+ string_move(&expr->section, name);
+ return M_SUCCESS;
}
- return M_SUCCESS;
-}
-
-static int parse_instruction_j(struct parser *parser,
- struct mips_instruction *ins,
- struct reference *ref)
-{
- uint32_t n;
- if (get_target(parser, &n, ref) || n > MAX26)
- return M_ERROR;
- ins->data.target = n;
-
- return M_SUCCESS;
-}
-
-static int parse_instruction_branch_equal(struct parser *parser,
- struct mips_instruction *ins,
- struct reference *ref)
-{
- enum mips_register reg;
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rs = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rt = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- int32_t off;
- if (get_offset(parser, &off, ref))
- return M_ERROR;
- ins->data.offset = B16(off);
-
- return M_SUCCESS;
-}
-
-static int parse_instruction_branch(struct parser *parser,
- struct mips_instruction *ins,
- struct reference *ref)
-{
- enum mips_register reg;
- int32_t n;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rs = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (get_offset(parser, &n, ref))
- return M_ERROR;
- ins->data.offset = B16(n);
-
- return M_SUCCESS;
-}
-
-static int parse_instruction_sl(struct parser *parser,
- struct ins_expr *expr)
-{
- enum mips_register reg;
- struct mips_instruction *ins = &expr->ins[0];
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rt = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (get_reg_offset(parser, expr))
- return M_ERROR;
-
- return M_SUCCESS;
+ #undef CHK
}
-static int parse_instruction_sli(struct parser *parser,
- struct mips_instruction *ins)
+static int parse_constant(struct parser *parser, struct string *name,
+ struct expr_const *constant)
{
- enum mips_register reg;
- struct token token;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rt = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX16)
+ if (assert_token(parser, TOK_EQUAL, NULL))
return M_ERROR;
- ins->data.immd = B16(token.number);
- return M_SUCCESS;
-}
-
-static int parse_instruction_s(struct parser *parser,
- struct mips_instruction *ins)
-{
- enum mips_register reg;
struct token token;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rd = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rt = reg;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
-
- if (assert_token(parser, TOK_NUMBER, &token) || token.number > MAX5)
+ if (assert_token(parser, TOK_NUMBER, &token))
return M_ERROR;
- ins->data.shamt = token.number;
+ string_move(&constant->name, name);
+ constant->num = token.number;
return M_SUCCESS;
}
-static int parse_instruction_sv(struct parser *parser,
- struct mips_instruction *ins)
+static int parse_offset(struct parser *parser,
+ struct expr_ins_arg *arg,
+ uint64_t immd)
{
- enum mips_register reg;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
- ins->data.rd = reg;
+ // the immediate has already been parsed
+ // now parse (REG)
- if (assert_token(parser, TOK_COMMA, NULL))
+ if (assert_token(parser, TOK_LPAREN, NULL))
return M_ERROR;
- if (parse_register(parser, &reg))
+ struct token token;
+ if (assert_token(parser, TOK_REG, &token))
return M_ERROR;
- ins->data.rt = reg;
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
+ // set values
+ string_move(&arg->offset.reg, &token.string);
+ arg->offset.immd = immd;
- if (parse_register(parser, &reg))
+ if (assert_token(parser, TOK_RPAREN, NULL)) {
+ string_free(&arg->offset.reg);
return M_ERROR;
- ins->data.rs = reg;
+ }
return M_SUCCESS;
}
-static int parse_pseudo_li(struct parser *parser, struct ins_expr *expr)
+static int parse_instruction_arg(struct parser *parser,
+ struct expr_ins_arg *arg)
{
- enum mips_register reg;
- uint32_t immd;
-
- if (parse_register(parser, &reg))
- return M_ERROR;
-
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
+ // allowed token matches:
+ //
+ // register:
+ // REG
+ //
+ // label:
+ // IDENT
+ //
+ // immediate:
+ // IMMD
+ //
+ // offset:
+ // (REG)
+ // IMMD(REG)
- if (parse_number(parser, &immd, MAX16))
- return M_ERROR;
-
- expr->ins_len = 1;
- expr->ins[0] = mips_instructions[MIPS_INS_ORI];
- expr->ins[0].data.rt = reg;
- expr->ins[0].data.rs = MIPS_REG_ZERO;
- expr->ins[0].data.immd = B16(immd);
- expr->ref[0].type = R_MIPS_NONE;
-
- return M_SUCCESS;
-}
-
-static int parse_pseudo_la(struct parser *parser, struct ins_expr *expr)
-{
- enum mips_register reg;
struct token token;
-
- uint16_t hi = 0, lo = 0;
-
- if (parse_register(parser, &reg))
+ if (peek_token(parser, &token))
return M_ERROR;
- if (assert_token(parser, TOK_COMMA, NULL))
- return M_ERROR;
+ // if its a left paren, were parsing
+ // an offset
+ if (token.type == TOK_LPAREN) {
+ arg->type = EXPR_INS_ARG_OFFSET;
+ return parse_offset(parser, arg, 0);
+ }
+ // token must now be either a number (immediate)
+ // register, or label,,,
+ // ... take ownership of the next token
if (next_token(parser, &token))
return M_ERROR;
- if (token.type == TOK_IDENT) {
- expr->ref[0].type = R_MIPS_HI16;
- expr->ref[0].addend = 0;
- strcpy(expr->ref[0].name, token.text);
- expr->ref[1].type = R_MIPS_LO16;
- expr->ref[1].addend = 0;
- strcpy(expr->ref[1].name, token.text);
- } else if (token.type == TOK_NUMBER && token.number > MAX32) {
- hi = token.number >> 16;
- lo = token.number & 0x0000ffff;
- expr->ref[0].type = R_MIPS_NONE;
- expr->ref[1].type = R_MIPS_NONE;
- } else {
- return M_ERROR;
+ // if its a register... return
+ // there are no other pathways
+ if (token.type == TOK_REG) {
+ arg->type = EXPR_INS_ARG_REGISTER;
+ string_move(&arg->reg, &token.string);
+ return M_SUCCESS;
}
- expr->ins_len = 2;
- expr->ins[0] = mips_instructions[MIPS_INS_LUI];
- expr->ins[0].data.rt = reg;
- expr->ins[0].data.immd = B16(hi);
- expr->ins[1] = mips_instructions[MIPS_INS_ADDI];
- expr->ins[1].data.rt = reg;
- expr->ins[1].data.rs = reg;
- expr->ins[1].data.immd = B16(lo);
-
- return M_SUCCESS;
-}
-
-static int parse_pseudo_move(struct parser *parser, struct ins_expr *expr)
-{
- enum mips_register rd, rs;
-
- if (parse_register(parser, &rd))
- return M_ERROR;
+ // if it is a label... return
+ // therea are no other pathways
+ if (token.type == TOK_IDENT) {
+ arg->type = EXPR_INS_ARG_LABEL;
+ string_move(&arg->label, &token.string);
+ return M_SUCCESS;
+ }
- if (assert_token(parser, TOK_COMMA, NULL))
+ // now it must be a number...
+ // throw a error if its now
+ if (token.type != TOK_NUMBER) {
+ ERROR_POS(token, "expected number, got %s",
+ token_str(token.type));
+ token_free(&token);
return M_ERROR;
+ }
- if (parse_register(parser, &rs))
+ uint64_t immd = (uint64_t)token.number;
+ // now if the next token is a lparen
+ // parse offset, else return immd
+ if (peek_token(parser, &token))
return M_ERROR;
- expr->ins_len = 1;
- expr->ins[0] = mips_instructions[MIPS_INS_OR];
- expr->ins[0].data.rs = rs;
- expr->ins[0].data.rt = MIPS_REG_ZERO;
- expr->ins[0].data.rd = rd;
- expr->ref[0].type = R_MIPS_NONE;
-
- return M_SUCCESS;
-}
-
-static int parse_pseudo_nop(struct parser *parser, struct ins_expr *expr)
-{
- (void) parser;
-
- expr->ins_len = 1;
- expr->ins[0] = mips_instructions[MIPS_INS_SLL];
- expr->ref[0].type = R_MIPS_NONE;
-
- return M_SUCCESS;
-}
-
-static int parse_pseudo_instruction(struct parser *parser,
- struct ins_expr *expr,
- struct token ident)
-{
- // disablle logging in the logging
- // module
- extern int log_disabled;
- log_disabled = 1;
-
- int res = M_ERROR;
-
- struct lexer_state state;
- lexer_save(parser->lexer, &state);
-
- #define CHK(name) if (strcmp(ident.text, #name) == 0)
-
- CHK(li)
- res = parse_pseudo_li(parser, expr);
- else CHK(la)
- res = parse_pseudo_la(parser, expr);
- else CHK(move)
- res = parse_pseudo_move(parser, expr);
- else CHK(nop)
- res = parse_pseudo_nop(parser, expr);
-
- #undef CHK
-
- if (res) {
- // reset on fail
- lexer_load(parser->lexer, &state);
- expr->ins[0].data.raw = 0;
- expr->ins[1].data.raw = 0;
- expr->ref[0] = (struct reference) {0};
- expr->ref[1] = (struct reference) {0};
+ if (token.type == TOK_LPAREN) {
+ arg->type = EXPR_INS_ARG_OFFSET;
+ return parse_offset(parser, arg, immd);
+ } else {
+ arg->type = EXPR_INS_ARG_IMMEDIATE;
+ arg->immd = immd;
+ return M_SUCCESS;
}
- log_disabled = 0;
- return res;
}
-static int parse_instruction(struct parser *parser,
- struct ins_expr *expr,
- struct token ident)
+static int parse_instruction(struct parser *parser, struct string *name,
+ struct expr_ins *ins)
{
- struct mips_instruction instruction;
- enum mips_parse_format format;
- int res = M_SUCCESS;
-
- if (parse_pseudo_instruction(parser, expr, ident) == M_SUCCESS)
- return M_SUCCESS;
-
- if (get_instruction(ident.text, &instruction)) {
- ERROR_POS(ident, "unknown instruction '%s'", ident.text);
- return M_ERROR;
- }
-
- struct mips_instruction *ins = &expr->ins[0];
- struct reference *ref = &expr->ref[0];
-
- // this will only ever generate one instruction
- expr->ins_len = 1;
- *ins = instruction;
- ref->type = R_MIPS_NONE;
-
- format = mips_parse_formats[instruction.type];
- switch (format) {
- case MIPS_PARSE_R:
- res = parse_instruction_r(parser, ins);
- break;
- case MIPS_PARSE_R2:
- res = parse_instruction_r2(parser, ins);
- break;
- case MIPS_PARSE_RS:
- res = parse_instruction_rs(parser, ins);
- break;
- case MIPS_PARSE_RD:
- res = parse_instruction_rd(parser, ins);
- break;
- case MIPS_PARSE_I:
- res = parse_instruction_i(parser, ins);
- break;
- case MIPS_PARSE_J:
- res = parse_instruction_j(parser, ins, ref);
- break;
- case MIPS_PARSE_O16:
- res = parse_instruction_offset(parser, MAX16, ins, ref);
- break;
- case MIPS_PARSE_O26:
- res = parse_instruction_offset(parser, MAX26, ins, ref);
- break;
- case MIPS_PARSE_BE:
- res = parse_instruction_branch_equal(parser, ins, ref);
- break;
- case MIPS_PARSE_BZ:
- res = parse_instruction_branch(parser, ins, ref);
- break;
- case MIPS_PARSE_SL:
- res = parse_instruction_sl(parser, expr);
- break;
- case MIPS_PARSE_SLI:
- res = parse_instruction_sli(parser, ins);
- break;
- case MIPS_PARSE_S:
- res = parse_instruction_s(parser, ins);
- break;
- case MIPS_PARSE_SV:
- res = parse_instruction_sv(parser, ins);
- break;
- case MIPS_PARSE_NONE:
- res = M_SUCCESS;
- break;
- }
+ int len = 0;
- if (res == M_SUCCESS && assert_eol(parser))
- return M_ERROR;
+ if (peek_eol(parser) == M_SUCCESS)
+ goto skip_args;
- return res;
-}
+ while (1) {
+ if (len >= MAX_ARG_LENGTH) {
+ ERROR_POS(parser->lexer,
+ "reached max argument length");
+ return M_ERROR;
+ }
-static int parse_directive_align(struct parser *parser,
- struct mips_directive *directive)
-{
- struct token token;
- if (assert_token(parser, TOK_NUMBER, &token))
- return M_ERROR;
+ if (parse_instruction_arg(parser, &ins->args[len++]))
+ return M_ERROR;
- if (token.number < 0) {
- ERROR_POS(token, "cannot align negative");
- return M_ERROR;
- }
+ if (peek_eol(parser) == M_SUCCESS)
+ break;
- if (token.number > MAX16) {
- ERROR_POS(token, "cannot align more than 65kb");
- return M_ERROR;
+ if (assert_token(parser, TOK_COMMA, NULL))
+ return M_ERROR;
}
- directive->type = MIPS_DIRECTIVE_ALIGN;
- directive->align = token.number;
+skip_args:
+ string_move(&ins->name, name);
+ ins->args_len = len;
return M_SUCCESS;
}
-static int parse_directive_space(struct parser *parser,
- struct mips_directive *directive)
+/* gets the next value from the parser */
+int parser_next(struct parser *parser, struct expr *expr)
{
- struct token token;
- if (assert_token(parser, TOK_NUMBER, &token))
- return M_ERROR;
+ // the next token being looked at
+ struct token token = {
+ .type = TOK_NL
+ };
- if (token.number < 0) {
- ERROR_POS(token, "cannot reserve negative");
- return M_ERROR;
- }
+ // the result to return
+ int res = M_SUCCESS;
- if (token.number > MAX16) {
- ERROR_POS(token, "cannot reserve more than 65kb");
- return M_ERROR;
+ // skip all new lines
+ while (1) {
+ if (next_token(parser, &token))
+ return M_ERROR;
+ if (token.type != TOK_NL)
+ break;
+ token_free(&token);
}
- directive->type = MIPS_DIRECTIVE_SPACE;
- directive->space = token.number;
-
- return M_SUCCESS;
-}
+ expr->line_no = parser->lexer.y;
+ expr->byte_start = token.off;
+ expr->byte_end = token.off;
-static int parse_directive_whb(struct parser *parser,
- struct mips_directive *directive,
- enum mips_directive_type type)
-{
- struct token token;
- uint32_t size = 0;
- uint32_t len = 0;
+ // if EOF, return M_EOF
+ if (token.type == TOK_EOF)
+ return M_EOF;
- switch (type) {
- case MIPS_DIRECTIVE_WORD:
- size = UINT32_MAX;
- break;
- case MIPS_DIRECTIVE_HALF:
- size = UINT16_MAX;
- break;
- case MIPS_DIRECTIVE_BYTE:
- size = UINT8_MAX;
- break;
- default:
+ // when a ident ends with a colon
+ // parse a lebel
+ else if (token.type == TOK_LABEL) {
+ expr->type = EXPR_LABEL;
+ // label now owns string
+ string_move(&expr->label, &token.string);
}
- directive->type = type;
-
- while (1) {
- if (assert_token(parser, TOK_NUMBER, &token))
- return M_ERROR;
+ // when a ident starts with a dot
+ // parse a directive
+ else if (token.type == TOK_DIRECTIVE) {
+ expr->type = EXPR_DIRECTIVE;
+ res = parse_directive(parser, &token.string, &expr->directive);
+ }
- if (len >= MAX_ARG_LENGTH) {
- ERROR_POS(token, "directives cannot be longer than "
- "%d arguments", MAX_ARG_LENGTH);
+ // peek the next token:
+ // 1. = means parse constant
+ // 2. else parse instruction
+ else {
+ if (token.type != TOK_IDENT) {
+ ERROR_POS(token, "expected ident, got %s",
+ token_str(token.type));
+ token_free(&token);
return M_ERROR;
}
- if (token.number > size) {
- ERROR_POS(token, "number cannot execede max size of: "
- "%d", size);
+ struct token peek;
+ if (peek_token(parser, &peek)) {
+ token_free(&token);
return M_ERROR;
}
- switch (type) {
- case MIPS_DIRECTIVE_WORD:
- directive->words[len++] = token.number;
-
- break;
- case MIPS_DIRECTIVE_HALF:
- directive->halfs[len++] = token.number;
- break;
- case MIPS_DIRECTIVE_BYTE:
- directive->bytes[len++] = token.number;
- break;
- default:
- }
-
- if (peek_token(parser, &token))
- return M_ERROR;
-
- if (token.type == TOK_COMMA) {
- next_token(parser, NULL);
- continue;
+ if (peek.type == TOK_EQUAL) {
+ expr->type = EXPR_CONSTANT;
+ res = parse_constant(parser, &token.string,
+ &expr->constant);
+ } else {
+ expr->type = EXPR_INS;
+ res = parse_instruction(parser, &token.string,
+ &expr->instruction);
}
-
- break;
}
- directive->len = len;
+ // update byte end for expr
+ expr->byte_end = ftell(parser->lexer.file);
- return M_SUCCESS;
-}
-
-static int parse_directive_extern(struct parser *parser,
- struct mips_directive *directive)
-{
- struct token token;
- if (assert_token(parser, TOK_IDENT, &token))
- return M_ERROR;
-
- directive->type = MIPS_DIRECTIVE_EXTERN;
- strcpy(directive->name, token.text);
+ // free tokens
+ token_free(&token);
- return M_SUCCESS;
-}
-
-static int parse_directive_globl(struct parser *parser,
- struct mips_directive *directive)
-{
- struct token token;
- if (assert_token(parser, TOK_IDENT, &token))
+ // everything must end in a new line
+ if (res == M_SUCCESS && assert_eol(parser))
return M_ERROR;
- directive->type = MIPS_DIRECTIVE_GLOBL;
- strcpy(directive->name, token.text);
-
- return M_SUCCESS;
+ return res;
}
-static int parse_directive_ascii(struct parser *parser,
- struct mips_directive *directive,
- enum mips_directive_type type)
+int parser_init(const char *file, struct parser *parser)
{
- struct token token;
- if (assert_token(parser, TOK_STRING, &token))
+ parser->peek.type = TOK_EOF;
+ if (lexer_init(file, &parser->lexer))
return M_ERROR;
-
- directive->type = type;
- strcpy(directive->name, token.text);
-
- return M_SUCCESS;
-}
-
-static int parse_section(struct mips_directive *directive,
- char name[MAX_LEX_LENGTH])
-{
- directive->type = MIPS_DIRECTIVE_SECTION;
- strcpy(directive->name, name);
-
return M_SUCCESS;
}
-static int parse_directive(struct parser *parser,
- struct mips_directive *directive)
-{
- struct token token;
- if (assert_token(parser, TOK_DIRECTIVE, &token))
- return M_ERROR;
-
- // .align n
- if (strcmp(token.text, "align") == 0)
- return parse_directive_align(parser, directive);
- else if (strcmp(token.text, "space") == 0)
- return parse_directive_space(parser, directive);
- else if (strcmp(token.text, "word") == 0)
- return parse_directive_whb(parser, directive,
- MIPS_DIRECTIVE_WORD);
- else if (strcmp(token.text, "half") == 0)
- return parse_directive_whb(parser, directive,
- MIPS_DIRECTIVE_HALF);
- else if (strcmp(token.text, "byte") == 0)
- return parse_directive_whb(parser, directive,
- MIPS_DIRECTIVE_BYTE);
- else if (strcmp(token.text, "extern") == 0)
- return parse_directive_extern(parser, directive);
- else if (strcmp(token.text, "globl") == 0)
- return parse_directive_globl(parser, directive);
- else if (strcmp(token.text, "ascii") == 0)
- return parse_directive_ascii(parser, directive,
- MIPS_DIRECTIVE_ASCII);
- else if (strcmp(token.text, "asciiz") == 0)
- return parse_directive_ascii(parser, directive,
- MIPS_DIRECTIVE_ASCIIZ);
- else
- return parse_section(directive, token.text);
-}
-static int parse_constant(struct parser *parser, struct const_expr *expr,
- struct token ident)
+void parser_free(struct parser *parser)
{
- struct token number;
-
- if (assert_token(parser, TOK_EQUAL, NULL))
- return M_ERROR;
-
- if (assert_token(parser, TOK_NUMBER, &number))
- return M_ERROR;
-
- strcpy(expr->name,ident.text);
- expr->value = number.number;
-
- return M_SUCCESS;
+ token_free(&parser->peek);
+ lexer_free(&parser->lexer);
}
-static int parser_handle_ident(struct parser *parser, struct expr *expr)
+static inline void expr_directive_free(struct expr_directive *dir)
{
- struct token ident;
- struct token peek;
-
- if (assert_token(parser, TOK_IDENT, &ident))
- return M_ERROR;
-
- if (peek_token(parser, &peek))
- return M_ERROR;
-
- if (peek.type == TOK_EQUAL) {
- expr->type = EXPR_CONSTANT;
- return parse_constant(parser, &expr->constant, ident);
- } else {
- expr->type = EXPR_INS;
- return parse_instruction(parser, &expr->ins, ident);
+ switch (dir->type) {
+ case EXPR_DIRECTIVE_SECTION:
+ string_free(&dir->section);
+ break;
+ case EXPR_DIRECTIVE_EXTERN:
+ case EXPR_DIRECTIVE_GLOBL:
+ string_free(&dir->label);
+ break;
+ case EXPR_DIRECTIVE_ASCII:
+ case EXPR_DIRECTIVE_ASCIIZ:
+ string_free(&dir->string);
+ break;
+ default:
}
}
-
-static int parse_label(struct parser *parser,
- struct expr *expr)
-{
- struct token token;
-
- if (assert_token(parser, TOK_LABEL, &token))
- return M_ERROR;
- strcpy(expr->label, token.text);
-
- return M_SUCCESS;
-}
-
-
-int parser_next(struct parser *parser, struct expr *expr)
+static inline void expr_ins_arg_free(struct expr_ins_arg *arg)
{
- struct token token;
- int res = M_SUCCESS;
-
-again:
- if (peek_token(parser, &token))
- return M_ERROR;
-
- switch (token.type) {
- case TOK_NL:
- next_token(parser, NULL);
- goto again;
-
- case TOK_EOF:
- res = M_EOF;
- break;
-
- case TOK_LABEL:
- expr->type = EXPR_LABEL;
- res = parse_label(parser, expr);
- break;
-
- case TOK_DIRECTIVE:
- expr->type = EXPR_DIRECTIVE;
- res = parse_directive(parser, &expr->directive);
- break;
-
- case TOK_IDENT:
- res = parser_handle_ident(parser, expr);
- break;
-
- default:
- ERROR_POS(token, "unexpected token '%s'",
- token_str(token.type));
- return M_ERROR;
-
+ switch (arg->type) {
+ case EXPR_INS_ARG_REGISTER:
+ string_free(&arg->reg);
+ break;
+ case EXPR_INS_ARG_IMMEDIATE:
+ break;
+ case EXPR_INS_ARG_LABEL:
+ string_free(&arg->label);
+ break;
+ case EXPR_INS_ARG_OFFSET:
+ string_free(&arg->offset.reg);
+ break;
}
-
- return res;
-}
-
-int parser_init(struct lexer *lexer, struct parser *parser)
-{
- parser->lexer = lexer;
- parser->peek.type = TOK_EOF;
- return M_SUCCESS;
}
-
-void parser_free(struct parser *parser)
+void expr_free(struct expr *expr)
{
- (void) parser;
+ switch (expr->type) {
+ case EXPR_DIRECTIVE:
+ expr_directive_free(&expr->directive);
+ break;
+ case EXPR_CONSTANT:
+ string_free(&expr->constant.name);
+ break;
+ case EXPR_INS:
+ string_free(&expr->instruction.name);
+ for (uint32_t i = 0; i < expr->instruction.args_len; i++)
+ expr_ins_arg_free(&expr->instruction.args[i]);
+ break;
+ case EXPR_LABEL:
+ string_free(&expr->label);
+ break;
+ }
}