533 lines
11 KiB
C
533 lines
11 KiB
C
#include <mlimits.h>
|
|
#include <stdint.h>
|
|
#include <merror.h>
|
|
#include <stddef.h>
|
|
|
|
#include "lex.h"
|
|
#include "parse.h"
|
|
|
|
///
|
|
/// Token Functions
|
|
/// either get a token, peek a token,
|
|
/// or assert a token was returned
|
|
///
|
|
|
|
/* get the next token from the lexer */
|
|
static int next_token(struct parser *parser, struct token *tok)
|
|
{
|
|
// return peeked first
|
|
if (parser->peek.type != TOK_EOF) {
|
|
if (tok != NULL)
|
|
*tok = parser->peek;
|
|
else
|
|
token_free(&parser->peek);
|
|
|
|
parser->peek.type = TOK_EOF;
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
// get next token
|
|
struct token token;
|
|
if (lexer_next(&parser->lexer, &token))
|
|
return M_ERROR;
|
|
|
|
// return value if given pointer
|
|
// else free
|
|
if (tok != NULL) {
|
|
*tok = token;
|
|
} else {
|
|
token_free(&token);
|
|
}
|
|
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
/* peek the next token from the lexer */
|
|
static int peek_token(struct parser *parser, struct token *tok)
|
|
{
|
|
// if we dont have a saved token
|
|
// get the next one
|
|
if (parser->peek.type == TOK_EOF) {
|
|
if (next_token(parser, &parser->peek))
|
|
return M_ERROR;
|
|
}
|
|
|
|
// return it if we were given
|
|
// a pointer
|
|
if (tok != NULL)
|
|
*tok = parser->peek;
|
|
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
/* get the next token from the lexer, and assert its of type <type> */
|
|
static int assert_token(struct parser *parser, enum token_type type,
|
|
struct token *tok)
|
|
{
|
|
// get next token
|
|
struct token token;
|
|
if (next_token(parser, &token))
|
|
return M_ERROR;
|
|
|
|
// assert its of type <type>
|
|
if (token.type != type) {
|
|
ERROR_POS(token, "expected a token of type '%s', got '%s'",
|
|
token_str(type), token_str(token.type));
|
|
token_free(&token);
|
|
return M_ERROR;
|
|
}
|
|
|
|
// return value if given pointer
|
|
// else free
|
|
if (tok != NULL) {
|
|
*tok = token;
|
|
} else {
|
|
token_free(&token);
|
|
}
|
|
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
/* get the next token from the lexer, and assert its of type NL */
|
|
static int assert_eol(struct parser *parser)
|
|
{
|
|
struct token token;
|
|
if (next_token(parser, &token))
|
|
return M_ERROR;
|
|
if (token.type != TOK_NL && token.type != TOK_EOF) {
|
|
ERROR_POS(token, "expected a new line or end of file, got '%s'",
|
|
token_str(token.type));
|
|
return M_ERROR;
|
|
}
|
|
token_free(&token);
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
/* peek the next token and return SUCCESS on eol */
|
|
static int peek_eol(struct parser *parser)
|
|
{
|
|
struct token token;
|
|
if (peek_token(parser, &token))
|
|
return M_ERROR;
|
|
int res = (token.type == TOK_NL || token.type == TOK_EOF) ?
|
|
M_SUCCESS : M_ERROR;
|
|
return res;
|
|
}
|
|
|
|
///
|
|
/// PARSER FUNCTIONS
|
|
/// parses each type of expression
|
|
///
|
|
|
|
static int parse_directive_whb(struct parser *parser, void *data, uint32_t *res,
|
|
size_t length, size_t max_size)
|
|
{
|
|
struct token token;
|
|
int len = 0;
|
|
|
|
while (1) {
|
|
if (peek_eol(parser) == M_SUCCESS)
|
|
break;
|
|
|
|
if (assert_token(parser, TOK_NUMBER, &token))
|
|
return M_ERROR;
|
|
|
|
if ((uint64_t)token.number > max_size) {
|
|
ERROR_POS(token, "number cannot exceed max size of %zu",
|
|
max_size);
|
|
return M_ERROR;
|
|
}
|
|
|
|
if (len >= MAX_ARG_LENGTH) {
|
|
ERROR_POS(token, "exceeded max argument length for "
|
|
"directives");
|
|
return M_ERROR;
|
|
}
|
|
|
|
// BUG: does this only work on little endian???
|
|
memcpy((uint8_t *) data + (len++ * length), &token.number,
|
|
max_size);
|
|
}
|
|
|
|
*res = len;
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
static int parse_immd(struct parser *parser, uint16_t *num)
|
|
{
|
|
struct token token;
|
|
if (assert_token(parser, TOK_NUMBER, &token))
|
|
return M_ERROR;
|
|
|
|
// TOK_NUMBER does not need to be freed
|
|
*num = token.number;
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
static int parse_ident(struct parser *parser, struct string *ident)
|
|
{
|
|
struct token token;
|
|
if (assert_token(parser, TOK_IDENT, &token))
|
|
return M_ERROR;
|
|
string_move(ident, &token.string);
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
static int parse_string(struct parser *parser, struct string *string)
|
|
{
|
|
struct token token;
|
|
if (assert_token(parser, TOK_STRING, &token))
|
|
return M_ERROR;
|
|
string_move(string, &token.string);
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
/* parses a directive */
|
|
static int parse_directive(struct parser *parser, struct string *name,
|
|
struct expr_directive *expr)
|
|
{
|
|
#define CHK(n) if (strcmp(name->str, #n) == 0)
|
|
|
|
CHK(.align) {
|
|
expr->type = EXPR_DIRECTIVE_ALIGN;
|
|
return parse_immd(parser, &expr->align);
|
|
} else CHK(.space) {
|
|
expr->type = EXPR_DIRECTIVE_SPACE;
|
|
return parse_immd(parser, &expr->space);
|
|
} else CHK(.word) {
|
|
expr->type = EXPR_DIRECTIVE_WORD;
|
|
return parse_directive_whb(parser, expr->words, &expr->len,
|
|
sizeof(uint32_t), UINT32_MAX);
|
|
} else CHK(.half) {
|
|
expr->type = EXPR_DIRECTIVE_HALF;
|
|
return parse_directive_whb(parser, expr->halfs, &expr->len,
|
|
sizeof(uint16_t), UINT16_MAX);
|
|
} else CHK(.byte) {
|
|
expr->type = EXPR_DIRECTIVE_BYTE;
|
|
return parse_directive_whb(parser, expr->bytes, &expr->len,
|
|
sizeof(uint8_t), UINT8_MAX);
|
|
} else CHK(.extern) {
|
|
expr->type = EXPR_DIRECTIVE_EXTERN;
|
|
return parse_ident(parser, &expr->label);
|
|
} else CHK(.globl) {
|
|
expr->type = EXPR_DIRECTIVE_GLOBL;
|
|
return parse_ident(parser, &expr->label);
|
|
} else CHK(.ascii) {
|
|
expr->type = EXPR_DIRECTIVE_ASCII;
|
|
return parse_string(parser, &expr->string);
|
|
} else CHK(.asciiz) {
|
|
expr->type = EXPR_DIRECTIVE_ASCIIZ;
|
|
return parse_string(parser, &expr->string);
|
|
} else {
|
|
expr->type = EXPR_DIRECTIVE_SECTION;
|
|
string_move(&expr->section, name);
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
#undef CHK
|
|
}
|
|
|
|
static int parse_constant(struct parser *parser, struct string *name,
|
|
struct expr_const *constant)
|
|
{
|
|
if (assert_token(parser, TOK_EQUAL, NULL))
|
|
return M_ERROR;
|
|
|
|
struct token token;
|
|
if (assert_token(parser, TOK_NUMBER, &token))
|
|
return M_ERROR;
|
|
|
|
string_move(&constant->name, name);
|
|
constant->num = token.number;
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
static int parse_offset(struct parser *parser,
|
|
struct expr_ins_arg *arg,
|
|
uint64_t immd)
|
|
{
|
|
// the immediate has already been parsed
|
|
// now parse (REG)
|
|
|
|
if (assert_token(parser, TOK_LPAREN, NULL))
|
|
return M_ERROR;
|
|
|
|
struct token token;
|
|
if (assert_token(parser, TOK_REG, &token))
|
|
return M_ERROR;
|
|
|
|
// set values
|
|
string_move(&arg->offset.reg, &token.string);
|
|
arg->offset.immd = immd;
|
|
|
|
if (assert_token(parser, TOK_RPAREN, NULL)) {
|
|
string_free(&arg->offset.reg);
|
|
return M_ERROR;
|
|
}
|
|
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
static int parse_instruction_arg(struct parser *parser,
|
|
struct expr_ins_arg *arg)
|
|
{
|
|
// allowed token matches:
|
|
//
|
|
// register:
|
|
// REG
|
|
//
|
|
// label:
|
|
// IDENT
|
|
//
|
|
// immediate:
|
|
// IMMD
|
|
//
|
|
// offset:
|
|
// (REG)
|
|
// IMMD(REG)
|
|
|
|
struct token token;
|
|
if (peek_token(parser, &token))
|
|
return M_ERROR;
|
|
|
|
// if its a left paren, were parsing
|
|
// an offset
|
|
if (token.type == TOK_LPAREN) {
|
|
arg->type = EXPR_INS_ARG_OFFSET;
|
|
return parse_offset(parser, arg, 0);
|
|
}
|
|
|
|
// token must now be either a number (immediate)
|
|
// register, or label,,,
|
|
// ... take ownership of the next token
|
|
if (next_token(parser, &token))
|
|
return M_ERROR;
|
|
|
|
// if its a register... return
|
|
// there are no other pathways
|
|
if (token.type == TOK_REG) {
|
|
arg->type = EXPR_INS_ARG_REGISTER;
|
|
string_move(&arg->reg, &token.string);
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
// if it is a label... return
|
|
// therea are no other pathways
|
|
if (token.type == TOK_IDENT) {
|
|
arg->type = EXPR_INS_ARG_LABEL;
|
|
string_move(&arg->label, &token.string);
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
// now it must be a number...
|
|
// throw a error if its now
|
|
if (token.type != TOK_NUMBER) {
|
|
ERROR_POS(token, "expected number, got %s",
|
|
token_str(token.type));
|
|
token_free(&token);
|
|
return M_ERROR;
|
|
}
|
|
|
|
uint64_t immd = (uint64_t)token.number;
|
|
// now if the next token is a lparen
|
|
// parse offset, else return immd
|
|
if (peek_token(parser, &token))
|
|
return M_ERROR;
|
|
|
|
if (token.type == TOK_LPAREN) {
|
|
arg->type = EXPR_INS_ARG_OFFSET;
|
|
return parse_offset(parser, arg, immd);
|
|
} else {
|
|
arg->type = EXPR_INS_ARG_IMMEDIATE;
|
|
arg->immd = immd;
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
}
|
|
|
|
static int parse_instruction(struct parser *parser, struct string *name,
|
|
struct expr_ins *ins)
|
|
{
|
|
int len = 0;
|
|
|
|
if (peek_eol(parser) == M_SUCCESS)
|
|
goto skip_args;
|
|
|
|
while (1) {
|
|
if (len >= MAX_ARG_LENGTH) {
|
|
ERROR_POS(parser->lexer,
|
|
"reached max argument length");
|
|
return M_ERROR;
|
|
}
|
|
|
|
|
|
if (parse_instruction_arg(parser, &ins->args[len++]))
|
|
return M_ERROR;
|
|
|
|
if (peek_eol(parser) == M_SUCCESS)
|
|
break;
|
|
|
|
if (assert_token(parser, TOK_COMMA, NULL))
|
|
return M_ERROR;
|
|
}
|
|
|
|
skip_args:
|
|
|
|
string_move(&ins->name, name);
|
|
ins->args_len = len;
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
/* gets the next value from the parser */
|
|
int parser_next(struct parser *parser, struct expr *expr)
|
|
{
|
|
// the next token being looked at
|
|
struct token token = {
|
|
.type = TOK_NL
|
|
};
|
|
|
|
// the result to return
|
|
int res = M_SUCCESS;
|
|
|
|
// skip all new lines
|
|
while (1) {
|
|
if (next_token(parser, &token))
|
|
return M_ERROR;
|
|
if (token.type != TOK_NL)
|
|
break;
|
|
token_free(&token);
|
|
}
|
|
|
|
expr->line_no = parser->lexer.y;
|
|
expr->byte_start = token.off;
|
|
expr->byte_end = token.off;
|
|
|
|
// if EOF, return M_EOF
|
|
if (token.type == TOK_EOF)
|
|
return M_EOF;
|
|
|
|
// when a ident ends with a colon
|
|
// parse a lebel
|
|
else if (token.type == TOK_LABEL) {
|
|
expr->type = EXPR_LABEL;
|
|
// label now owns string
|
|
string_move(&expr->label, &token.string);
|
|
}
|
|
|
|
// when a ident starts with a dot
|
|
// parse a directive
|
|
else if (token.type == TOK_DIRECTIVE) {
|
|
expr->type = EXPR_DIRECTIVE;
|
|
res = parse_directive(parser, &token.string, &expr->directive);
|
|
}
|
|
|
|
// peek the next token:
|
|
// 1. = means parse constant
|
|
// 2. else parse instruction
|
|
else {
|
|
if (token.type != TOK_IDENT) {
|
|
ERROR_POS(token, "expected ident, got %s",
|
|
token_str(token.type));
|
|
token_free(&token);
|
|
return M_ERROR;
|
|
}
|
|
|
|
struct token peek;
|
|
if (peek_token(parser, &peek)) {
|
|
token_free(&token);
|
|
return M_ERROR;
|
|
}
|
|
|
|
if (peek.type == TOK_EQUAL) {
|
|
expr->type = EXPR_CONSTANT;
|
|
res = parse_constant(parser, &token.string,
|
|
&expr->constant);
|
|
} else {
|
|
expr->type = EXPR_INS;
|
|
res = parse_instruction(parser, &token.string,
|
|
&expr->instruction);
|
|
}
|
|
}
|
|
|
|
// update byte end for expr
|
|
expr->byte_end = ftell(parser->lexer.file);
|
|
|
|
// free tokens
|
|
token_free(&token);
|
|
|
|
// everything must end in a new line
|
|
if (res == M_SUCCESS && assert_eol(parser))
|
|
return M_ERROR;
|
|
|
|
return res;
|
|
}
|
|
|
|
int parser_init(const char *file, struct parser *parser)
|
|
{
|
|
parser->peek.type = TOK_EOF;
|
|
if (lexer_init(file, &parser->lexer))
|
|
return M_ERROR;
|
|
return M_SUCCESS;
|
|
}
|
|
|
|
|
|
void parser_free(struct parser *parser)
|
|
{
|
|
token_free(&parser->peek);
|
|
lexer_free(&parser->lexer);
|
|
}
|
|
|
|
static inline void expr_directive_free(struct expr_directive *dir)
|
|
{
|
|
switch (dir->type) {
|
|
case EXPR_DIRECTIVE_SECTION:
|
|
string_free(&dir->section);
|
|
break;
|
|
case EXPR_DIRECTIVE_EXTERN:
|
|
case EXPR_DIRECTIVE_GLOBL:
|
|
string_free(&dir->label);
|
|
break;
|
|
case EXPR_DIRECTIVE_ASCII:
|
|
case EXPR_DIRECTIVE_ASCIIZ:
|
|
string_free(&dir->string);
|
|
break;
|
|
default:
|
|
}
|
|
}
|
|
|
|
static inline void expr_ins_arg_free(struct expr_ins_arg *arg)
|
|
{
|
|
switch (arg->type) {
|
|
case EXPR_INS_ARG_REGISTER:
|
|
string_free(&arg->reg);
|
|
break;
|
|
case EXPR_INS_ARG_IMMEDIATE:
|
|
break;
|
|
case EXPR_INS_ARG_LABEL:
|
|
string_free(&arg->label);
|
|
break;
|
|
case EXPR_INS_ARG_OFFSET:
|
|
string_free(&arg->offset.reg);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void expr_free(struct expr *expr)
|
|
{
|
|
switch (expr->type) {
|
|
case EXPR_DIRECTIVE:
|
|
expr_directive_free(&expr->directive);
|
|
break;
|
|
case EXPR_CONSTANT:
|
|
string_free(&expr->constant.name);
|
|
break;
|
|
case EXPR_INS:
|
|
string_free(&expr->instruction.name);
|
|
for (uint32_t i = 0; i < expr->instruction.args_len; i++)
|
|
expr_ins_arg_free(&expr->instruction.args[i]);
|
|
break;
|
|
case EXPR_LABEL:
|
|
string_free(&expr->label);
|
|
break;
|
|
}
|
|
}
|