mips/masm/parse.c

534 lines
11 KiB
C
Raw Permalink Normal View History

#include <mlimits.h>
2024-09-13 15:11:18 +00:00
#include <stdint.h>
2024-10-04 23:41:10 +00:00
#include <merror.h>
#include <stddef.h>
#include "lex.h"
2024-10-04 23:41:10 +00:00
#include "parse.h"
2024-10-04 23:41:10 +00:00
///
/// Token Functions
/// either get a token, peek a token,
/// or assert a token was returned
///
2024-09-22 20:02:42 +00:00
2024-10-04 23:41:10 +00:00
/* get the next token from the lexer */
2024-09-11 16:06:09 +00:00
static int next_token(struct parser *parser, struct token *tok)
{
2024-10-04 23:41:10 +00:00
// return peeked first
if (parser->peek.type != TOK_EOF) {
if (tok != NULL)
*tok = parser->peek;
2024-10-04 23:41:10 +00:00
else
token_free(&parser->peek);
parser->peek.type = TOK_EOF;
return M_SUCCESS;
}
2024-10-04 23:41:10 +00:00
// get next token
struct token token;
2024-10-04 23:41:10 +00:00
if (lexer_next(&parser->lexer, &token))
return M_ERROR;
2024-10-04 23:41:10 +00:00
// return value if given pointer
// else free
if (tok != NULL) {
*tok = token;
2024-10-04 23:41:10 +00:00
} else {
token_free(&token);
}
return M_SUCCESS;
}
2024-10-04 23:41:10 +00:00
/* peek the next token from the lexer */
2024-09-11 16:06:09 +00:00
static int peek_token(struct parser *parser, struct token *tok)
{
2024-10-04 23:41:10 +00:00
// if we dont have a saved token
// get the next one
if (parser->peek.type == TOK_EOF) {
if (next_token(parser, &parser->peek))
return M_ERROR;
}
2024-10-04 23:41:10 +00:00
// return it if we were given
// a pointer
if (tok != NULL)
*tok = parser->peek;
2024-10-04 23:41:10 +00:00
return M_SUCCESS;
}
2024-10-04 23:41:10 +00:00
/* get the next token from the lexer, and assert its of type <type> */
2024-09-11 16:06:09 +00:00
static int assert_token(struct parser *parser, enum token_type type,
struct token *tok)
{
2024-10-04 23:41:10 +00:00
// get next token
struct token token;
if (next_token(parser, &token))
return M_ERROR;
2024-10-04 23:41:10 +00:00
// assert its of type <type>
if (token.type != type) {
ERROR_POS(token, "expected a token of type '%s', got '%s'",
token_str(type), token_str(token.type));
2024-10-04 23:41:10 +00:00
token_free(&token);
return M_ERROR;
}
2024-10-04 23:41:10 +00:00
// return value if given pointer
// else free
if (tok != NULL) {
*tok = token;
2024-10-04 23:41:10 +00:00
} else {
token_free(&token);
}
return M_SUCCESS;
}
2024-10-04 23:41:10 +00:00
/* get the next token from the lexer, and assert its of type NL */
2024-09-11 16:06:09 +00:00
static int assert_eol(struct parser *parser)
{
struct token token;
if (next_token(parser, &token))
return M_ERROR;
if (token.type != TOK_NL && token.type != TOK_EOF) {
2024-10-04 23:41:10 +00:00
ERROR_POS(token, "expected a new line or end of file, got '%s'",
token_str(token.type));
return M_ERROR;
}
2024-10-04 23:41:10 +00:00
token_free(&token);
return M_SUCCESS;
}
2024-10-04 23:41:10 +00:00
/* peek the next token and return SUCCESS on eol */
static int peek_eol(struct parser *parser)
2024-09-11 16:06:09 +00:00
{
struct token token;
2024-10-04 23:41:10 +00:00
if (peek_token(parser, &token))
2024-09-11 16:06:09 +00:00
return M_ERROR;
2024-10-04 23:41:10 +00:00
int res = (token.type == TOK_NL || token.type == TOK_EOF) ?
M_SUCCESS : M_ERROR;
return res;
}
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
///
/// PARSER FUNCTIONS
/// parses each type of expression
///
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
static int parse_directive_whb(struct parser *parser, void *data, uint32_t *res,
size_t length, size_t max_size)
{
struct token token;
int len = 0;
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
while (1) {
if (peek_eol(parser) == M_SUCCESS)
break;
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
if (assert_token(parser, TOK_NUMBER, &token))
return M_ERROR;
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
if ((uint64_t)token.number > max_size) {
ERROR_POS(token, "number cannot exceed max size of %zu",
max_size);
return M_ERROR;
}
2024-09-13 15:11:18 +00:00
2024-10-04 23:41:10 +00:00
if (len >= MAX_ARG_LENGTH) {
ERROR_POS(token, "exceeded max argument length for "
"directives");
return M_ERROR;
}
2024-09-13 15:11:18 +00:00
2024-10-04 23:41:10 +00:00
// BUG: does this only work on little endian???
memcpy((uint8_t *) data + (len++ * length), &token.number,
max_size);
2024-09-13 15:11:18 +00:00
}
2024-10-04 23:41:10 +00:00
*res = len;
2024-09-13 15:11:18 +00:00
return M_SUCCESS;
}
2024-10-04 23:41:10 +00:00
static int parse_immd(struct parser *parser, uint16_t *num)
2024-09-13 15:11:18 +00:00
{
2024-10-04 23:41:10 +00:00
struct token token;
if (assert_token(parser, TOK_NUMBER, &token))
2024-09-13 15:11:18 +00:00
return M_ERROR;
2024-10-04 23:41:10 +00:00
// TOK_NUMBER does not need to be freed
*num = token.number;
2024-09-13 15:11:18 +00:00
return M_SUCCESS;
2024-09-11 16:06:09 +00:00
}
2024-10-04 23:41:10 +00:00
static int parse_ident(struct parser *parser, struct string *ident)
2024-09-11 16:06:09 +00:00
{
2024-10-04 23:41:10 +00:00
struct token token;
if (assert_token(parser, TOK_IDENT, &token))
2024-09-13 15:11:18 +00:00
return M_ERROR;
2024-10-04 23:41:10 +00:00
string_move(ident, &token.string);
2024-09-13 15:11:18 +00:00
return M_SUCCESS;
2024-09-11 16:06:09 +00:00
}
2024-10-04 23:41:10 +00:00
static int parse_string(struct parser *parser, struct string *string)
2024-09-11 16:06:09 +00:00
{
struct token token;
2024-10-04 23:41:10 +00:00
if (assert_token(parser, TOK_STRING, &token))
2024-09-11 16:06:09 +00:00
return M_ERROR;
2024-10-04 23:41:10 +00:00
string_move(string, &token.string);
return M_SUCCESS;
}
/* parses a directive */
static int parse_directive(struct parser *parser, struct string *name,
struct expr_directive *expr)
{
#define CHK(n) if (strcmp(name->str, #n) == 0)
CHK(.align) {
expr->type = EXPR_DIRECTIVE_ALIGN;
return parse_immd(parser, &expr->align);
} else CHK(.space) {
expr->type = EXPR_DIRECTIVE_SPACE;
return parse_immd(parser, &expr->space);
} else CHK(.word) {
expr->type = EXPR_DIRECTIVE_WORD;
return parse_directive_whb(parser, expr->words, &expr->len,
sizeof(uint32_t), UINT32_MAX);
} else CHK(.half) {
expr->type = EXPR_DIRECTIVE_HALF;
return parse_directive_whb(parser, expr->halfs, &expr->len,
sizeof(uint16_t), UINT16_MAX);
} else CHK(.byte) {
expr->type = EXPR_DIRECTIVE_BYTE;
return parse_directive_whb(parser, expr->bytes, &expr->len,
sizeof(uint8_t), UINT8_MAX);
} else CHK(.extern) {
expr->type = EXPR_DIRECTIVE_EXTERN;
return parse_ident(parser, &expr->label);
} else CHK(.globl) {
expr->type = EXPR_DIRECTIVE_GLOBL;
return parse_ident(parser, &expr->label);
} else CHK(.ascii) {
expr->type = EXPR_DIRECTIVE_ASCII;
return parse_string(parser, &expr->string);
} else CHK(.asciiz) {
expr->type = EXPR_DIRECTIVE_ASCIIZ;
return parse_string(parser, &expr->string);
} else {
expr->type = EXPR_DIRECTIVE_SECTION;
string_move(&expr->section, name);
return M_SUCCESS;
2024-09-11 16:06:09 +00:00
}
2024-10-04 23:41:10 +00:00
#undef CHK
2024-09-11 16:06:09 +00:00
}
2024-10-04 23:41:10 +00:00
static int parse_constant(struct parser *parser, struct string *name,
struct expr_const *constant)
2024-09-13 15:11:18 +00:00
{
2024-10-04 23:41:10 +00:00
if (assert_token(parser, TOK_EQUAL, NULL))
2024-09-13 15:11:18 +00:00
return M_ERROR;
2024-10-04 23:41:10 +00:00
struct token token;
2024-09-14 00:15:56 +00:00
if (assert_token(parser, TOK_NUMBER, &token))
2024-09-13 15:11:18 +00:00
return M_ERROR;
2024-10-04 23:41:10 +00:00
string_move(&constant->name, name);
constant->num = token.number;
return M_SUCCESS;
}
2024-09-13 15:11:18 +00:00
2024-10-04 23:41:10 +00:00
static int parse_offset(struct parser *parser,
struct expr_ins_arg *arg,
uint64_t immd)
{
// the immediate has already been parsed
// now parse (REG)
2024-09-13 15:11:18 +00:00
if (assert_token(parser, TOK_LPAREN, NULL))
return M_ERROR;
2024-10-04 23:41:10 +00:00
struct token token;
if (assert_token(parser, TOK_REG, &token))
2024-09-13 15:11:18 +00:00
return M_ERROR;
2024-10-04 23:41:10 +00:00
// set values
string_move(&arg->offset.reg, &token.string);
arg->offset.immd = immd;
2024-09-13 15:11:18 +00:00
2024-10-04 23:41:10 +00:00
if (assert_token(parser, TOK_RPAREN, NULL)) {
string_free(&arg->offset.reg);
2024-09-13 15:11:18 +00:00
return M_ERROR;
2024-10-04 23:41:10 +00:00
}
2024-09-13 15:11:18 +00:00
return M_SUCCESS;
}
2024-10-04 23:41:10 +00:00
static int parse_instruction_arg(struct parser *parser,
struct expr_ins_arg *arg)
2024-09-13 15:11:18 +00:00
{
2024-10-04 23:41:10 +00:00
// allowed token matches:
//
// register:
// REG
//
// label:
// IDENT
//
// immediate:
// IMMD
//
// offset:
// (REG)
// IMMD(REG)
2024-09-13 15:11:18 +00:00
struct token token;
2024-10-04 23:41:10 +00:00
if (peek_token(parser, &token))
2024-09-13 15:11:18 +00:00
return M_ERROR;
2024-10-04 23:41:10 +00:00
// if its a left paren, were parsing
// an offset
if (token.type == TOK_LPAREN) {
arg->type = EXPR_INS_ARG_OFFSET;
return parse_offset(parser, arg, 0);
}
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
// token must now be either a number (immediate)
// register, or label,,,
// ... take ownership of the next token
if (next_token(parser, &token))
2024-09-11 16:06:09 +00:00
return M_ERROR;
2024-10-04 23:41:10 +00:00
// if its a register... return
// there are no other pathways
if (token.type == TOK_REG) {
arg->type = EXPR_INS_ARG_REGISTER;
string_move(&arg->reg, &token.string);
return M_SUCCESS;
}
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
// if it is a label... return
// therea are no other pathways
if (token.type == TOK_IDENT) {
arg->type = EXPR_INS_ARG_LABEL;
string_move(&arg->label, &token.string);
return M_SUCCESS;
}
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
// now it must be a number...
// throw a error if its now
if (token.type != TOK_NUMBER) {
ERROR_POS(token, "expected number, got %s",
token_str(token.type));
token_free(&token);
2024-09-11 16:06:09 +00:00
return M_ERROR;
2024-10-04 23:41:10 +00:00
}
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
uint64_t immd = (uint64_t)token.number;
// now if the next token is a lparen
// parse offset, else return immd
if (peek_token(parser, &token))
2024-09-11 16:06:09 +00:00
return M_ERROR;
2024-10-04 23:41:10 +00:00
if (token.type == TOK_LPAREN) {
arg->type = EXPR_INS_ARG_OFFSET;
return parse_offset(parser, arg, immd);
} else {
arg->type = EXPR_INS_ARG_IMMEDIATE;
arg->immd = immd;
return M_SUCCESS;
}
2024-09-11 16:06:09 +00:00
}
2024-10-04 23:41:10 +00:00
static int parse_instruction(struct parser *parser, struct string *name,
struct expr_ins *ins)
2024-09-11 16:06:09 +00:00
{
2024-10-04 23:41:10 +00:00
int len = 0;
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
if (peek_eol(parser) == M_SUCCESS)
goto skip_args;
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
while (1) {
if (len >= MAX_ARG_LENGTH) {
ERROR_POS(parser->lexer,
"reached max argument length");
return M_ERROR;
}
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
if (parse_instruction_arg(parser, &ins->args[len++]))
return M_ERROR;
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
if (peek_eol(parser) == M_SUCCESS)
break;
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
if (assert_token(parser, TOK_COMMA, NULL))
return M_ERROR;
}
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
skip_args:
string_move(&ins->name, name);
ins->args_len = len;
2024-09-11 16:06:09 +00:00
return M_SUCCESS;
}
2024-10-04 23:41:10 +00:00
/* gets the next value from the parser */
int parser_next(struct parser *parser, struct expr *expr)
2024-09-11 16:06:09 +00:00
{
2024-10-04 23:41:10 +00:00
// the next token being looked at
struct token token = {
.type = TOK_NL
};
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
// the result to return
int res = M_SUCCESS;
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
// skip all new lines
while (1) {
if (next_token(parser, &token))
return M_ERROR;
if (token.type != TOK_NL)
break;
token_free(&token);
}
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
expr->line_no = parser->lexer.y;
expr->byte_start = token.off;
expr->byte_end = token.off;
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
// if EOF, return M_EOF
if (token.type == TOK_EOF)
return M_EOF;
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
// when a ident ends with a colon
// parse a lebel
else if (token.type == TOK_LABEL) {
expr->type = EXPR_LABEL;
// label now owns string
string_move(&expr->label, &token.string);
}
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
// when a ident starts with a dot
// parse a directive
else if (token.type == TOK_DIRECTIVE) {
expr->type = EXPR_DIRECTIVE;
res = parse_directive(parser, &token.string, &expr->directive);
}
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
// peek the next token:
// 1. = means parse constant
// 2. else parse instruction
else {
if (token.type != TOK_IDENT) {
ERROR_POS(token, "expected ident, got %s",
token_str(token.type));
token_free(&token);
return M_ERROR;
}
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
struct token peek;
if (peek_token(parser, &peek)) {
token_free(&token);
return M_ERROR;
}
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
if (peek.type == TOK_EQUAL) {
expr->type = EXPR_CONSTANT;
res = parse_constant(parser, &token.string,
&expr->constant);
} else {
expr->type = EXPR_INS;
res = parse_instruction(parser, &token.string,
&expr->instruction);
}
}
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
// update byte end for expr
expr->byte_end = ftell(parser->lexer.file);
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
// free tokens
token_free(&token);
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
// everything must end in a new line
if (res == M_SUCCESS && assert_eol(parser))
2024-09-13 15:11:18 +00:00
return M_ERROR;
2024-09-11 16:06:09 +00:00
2024-10-04 23:41:10 +00:00
return res;
2024-09-11 16:06:09 +00:00
}
2024-10-04 23:41:10 +00:00
int parser_init(const char *file, struct parser *parser)
2024-09-11 16:06:09 +00:00
{
2024-10-04 23:41:10 +00:00
parser->peek.type = TOK_EOF;
if (lexer_init(file, &parser->lexer))
2024-09-11 16:06:09 +00:00
return M_ERROR;
return M_SUCCESS;
}
2024-10-04 23:41:10 +00:00
void parser_free(struct parser *parser)
{
token_free(&parser->peek);
lexer_free(&parser->lexer);
2024-09-11 16:06:09 +00:00
}
2024-10-04 23:41:10 +00:00
static inline void expr_directive_free(struct expr_directive *dir)
2024-09-11 16:06:09 +00:00
{
2024-10-04 23:41:10 +00:00
switch (dir->type) {
case EXPR_DIRECTIVE_SECTION:
string_free(&dir->section);
break;
case EXPR_DIRECTIVE_EXTERN:
case EXPR_DIRECTIVE_GLOBL:
string_free(&dir->label);
break;
case EXPR_DIRECTIVE_ASCII:
case EXPR_DIRECTIVE_ASCIIZ:
string_free(&dir->string);
break;
default:
}
2024-09-11 16:06:09 +00:00
}
2024-10-04 23:41:10 +00:00
static inline void expr_ins_arg_free(struct expr_ins_arg *arg)
2024-09-11 16:06:09 +00:00
{
2024-10-04 23:41:10 +00:00
switch (arg->type) {
case EXPR_INS_ARG_REGISTER:
string_free(&arg->reg);
break;
case EXPR_INS_ARG_IMMEDIATE:
break;
case EXPR_INS_ARG_LABEL:
string_free(&arg->label);
break;
case EXPR_INS_ARG_OFFSET:
string_free(&arg->offset.reg);
break;
}
2024-09-11 16:06:09 +00:00
}
2024-10-04 23:41:10 +00:00
void expr_free(struct expr *expr)
2024-09-11 16:06:09 +00:00
{
2024-10-04 23:41:10 +00:00
switch (expr->type) {
case EXPR_DIRECTIVE:
expr_directive_free(&expr->directive);
2024-09-11 16:06:09 +00:00
break;
2024-10-04 23:41:10 +00:00
case EXPR_CONSTANT:
string_free(&expr->constant.name);
2024-09-11 16:06:09 +00:00
break;
2024-10-04 23:41:10 +00:00
case EXPR_INS:
string_free(&expr->instruction.name);
for (uint32_t i = 0; i < expr->instruction.args_len; i++)
expr_ins_arg_free(&expr->instruction.args[i]);
2024-09-11 16:06:09 +00:00
break;
2024-10-04 23:41:10 +00:00
case EXPR_LABEL:
string_free(&expr->label);
2024-09-11 16:06:09 +00:00
break;
}
}