nbtvis/nbt/json/read.c
2023-12-17 11:10:04 -05:00

633 lines
11 KiB
C

#include <ctype.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "nbt.h"
#include "lib.h"
static char ret = '\0';
typedef enum {
TOK_LBRACE,
TOK_RBRACE,
TOK_LBRACK,
TOK_RBRACK,
TOK_COLON,
TOK_COMMA,
TOK_STRING,
TOK_NUMBER,
TOK_DOUBLE,
TOK_BOOL,
TOK_NULL
} tokentype_t ;
typedef union {
bool b;
int64_t number;
double decimal;
struct {
uint16_t len;
char *data;
} string;
} tokendata_t;
typedef struct {
tokentype_t type;
tokendata_t data;
} token_t;
static void json_token_free(token_t *token) {
if (token->type == TOK_STRING)
free(token->data.string.data);
}
static bool json_parse_unicode(char buf[4], int *read, const stream_t *stream) {
char temp[5];
temp[4] = '\0';
if (stream_read(stream, temp, 4) == false)
return false;
uint16_t code_point;
char *end = NULL;
code_point = strtol(temp, &end, 16);
if (end != NULL)
return false;
int lead1 = 0b00000000;
int lead2 = 0b11000000;
int lead3 = 0b11100000;
int cont = 0b10000000;
int contmask = 0b00111111;
if (code_point < 0x0080) {
buf[0] = ((code_point >> 0)) | lead1;
*read = 1;
} else if (code_point < 0x0800) {
buf[0] = ((code_point >> 6)) | lead2;
buf[1] = ((code_point >> 0) & contmask) | cont;
*read = 2;
} else {
buf[0] = ((code_point >> 12)) | lead3;
buf[1] = ((code_point >> 6) & contmask) | cont;
buf[2] = ((code_point >> 0) & contmask) | cont;
*read = 3;
}
return true;
}
static bool json_parse_escape(char buf[4], int *read, const stream_t *stream) {
char n;
char *c = &buf[0];
*read = 1;
if (stream_read(stream, &n, 1) == false)
return false;
switch (n) {
case '"':
case '\\':
case '/':
*c = n;
return true;
case 'b':
*c = '\b';
return true;
case 'f':
*c = '\f';
return true;
case 'n':
*c = '\n';
return true;
case 'r':
*c = '\r';
return true;
case 't':
*c = '\t';
return true;
case 'u': {
return json_parse_unicode(buf, read, stream);
default:
// invalid escape
return false;
};
}
}
static bool json_parse_string(tokendata_t *token, const stream_t *stream) {
int capacity = 8;
int len = 0;
char *buf = xalloc(capacity * sizeof(char));
while (1) {
char tmp[4];
int read = 1;
if (stream_read(stream, tmp, 1) == false) {
free(buf);
return false;
}
uint8_t c = tmp[0];
if (c == '"')
break;
// non printable ascii character
if (c < 32 || c > 127) {
free(buf);
return false;
}
// an escape, dont push to buffer, get next char
if (c == '\\' && json_parse_escape(tmp, &read, stream) == false) {
free(buf);
return false;
}
if (len + read >= capacity) {
capacity *= 2;
buf = xrealloc(buf, capacity);
}
memcpy(buf + len, tmp, read);
len += read;
}
token->string.data = xalloc(len * sizeof(char));
token->string.len = len;
memcpy(token->string.data, buf, len);
free(buf);
return true;
}
static bool json_ident_assert(const stream_t *stream, const char *rest) {
char c;
if (stream_read(stream, &c, 1) == false)
return false;
if (c != *rest)
return false;
rest += 1;
if (*rest == '\0')
return true;
else
return json_ident_assert(stream, rest);
}
static bool json_parse_ident(token_t *token, const stream_t *stream, char first) {
if (first == 't' && json_ident_assert(stream, "true")) {
token->type = TOK_BOOL;
token->data.b = true;
} else if (first == 'f' && json_ident_assert(stream, "alse")) {
token->type = TOK_BOOL;
token->data.b = false;
} else if (first == 'n' && json_ident_assert(stream, "ull")) {
token->type = TOK_NULL;
} else {
return false;
}
return true;
}
static void push_char(char **buf, int *len, int *cap, char c) {
if (*len == *cap) {
*cap *= *cap * 2;
*buf = xrealloc(*buf, *cap * sizeof(char));
}
(*buf)[(*len)++] = c;
}
static bool json_parse_number(token_t *token, const stream_t *stream, char first) {
int capacity = 8;
int len = 0;
char *buf = xalloc(capacity * sizeof(char));
bool isdec = false;
bool isneg = false;
char c = first;
// PARSE DIGITS AND NEGATIVITY
while (1) {
if (c == '\0' && stream_read(stream, &c, 1) == false) {
free(buf);
return false;
}
if (c == '-' && isneg) {
// cannot negate twice
free(buf);
return false;
} else if (c == '-') {
isneg = true;
c = '\0';
} else if (c == '0' && len == 0) {
// string starting with 0 cannot not have other digits
push_char(&buf, &len, &capacity, c);
c = '\0';
break;
} else if (c >= '0' && c <= '9') {
push_char(&buf, &len, &capacity, c);
c = '\0';
} else if (len == 0) {
// invalid start of digits
free(buf);
return false;
} else {
// end of starting digits
break;
}
}
// SET NEXT CHAR C IF NOT READ YET
if (c == '\0' && stream_read(stream, &c, 1) == false) {
free(buf);
return false;
}
// THERE IS A DECIMAL
// READ STREAM OF DIGITS
if (c == '.') {
isdec = true;
push_char(&buf, &len, &capacity, c);
int declen = 0;
while (1) {
if (stream_read(stream, &c, 1) == false) {
free(buf);
return false;
}
if (c >= '0' && c <= '9') {
push_char(&buf, &len, &capacity, c);
declen++;
} else if (declen == 0) {
// invalid decimal
free(buf);
return false;
} else {
// end of decimal
break;
}
}
}
// PARSE EXPONENT
if (c == 'e' || c == 'E') {
isdec = true;
push_char(&buf, &len, &capacity, 'E');
int explen = 0; // the exponent len
if (stream_read(stream, &c, 1) == false) {
free(buf);
return false;
}
if (c == '+' || c == '-') {
push_char(&buf, &len, &capacity, c);
c = '\0';
}
while (1) {
if (c == '\0' && stream_read(stream, &c, 1) == false) {
free(buf);
return false;
}
if (c >= '0' && c <= '9') {
push_char(&buf, &len, &capacity, c);
explen++;
c = '\0';
} else if (explen == 0) {
// invalid exponent
free(buf);
return false;
} else {
break;
}
}
}
char *end = NULL;
push_char(&buf, &len, &capacity, '\0');
if (isdec) {
token->type = TOK_DOUBLE;
token->data.decimal = strtod(buf, &end);
} else {
token->type = TOK_NUMBER;
token->data.number = strtol(buf, &end, 10);
}
if (end != NULL && *end != 0)
return false;
free(buf);
ret = c;
return true;
}
static bool json_next_token(token_t *token, const stream_t *stream) {
memset(token, 0, sizeof(token_t));
char c;
retry:
if (ret != '\0') {
c = ret;
ret = '\0';
} else if (stream_read(stream, &c, 1) == false) {
return false;
}
bool ok = true;
switch (c) {
case '{':
token->type = TOK_LBRACE;
break;
case '}':
token->type = TOK_RBRACE;
break;
case '[':
token->type = TOK_LBRACK;
break;
case ']':
token->type = TOK_RBRACK;
break;
case ':':
token->type = TOK_COLON;
break;
case ',':
token->type = TOK_COMMA;
break;
case '"':
token->type = TOK_STRING;
ok = json_parse_string(&token->data, stream);
break;
case 't':
case 'f':
case 'n':
// parse null or bool
ok = json_parse_ident(token, stream, c);
break;
case ' ':
case '\n':
case '\t':
case '\r':
goto retry;
default:
if (isdigit(c) || c == '-') {
// parse number
ok = json_parse_number(token, stream, c);
} else {
// disallowed symbol
ok = false;
}
break;
}
return ok;
}
static bool json_get_list_type(tagtype_t *type, const tag_t *tags, int len) {
if (len < 1) {
*type = TAG_END;
return true;
}
*type = tags[0].type;
for (int i = 0; i < len; i++)
if (tags[i].type != *type)
return false;
return true;
}
static bool json_read_value(tag_t *tag, const stream_t *stream, token_t *first);
static bool json_read_list(tagdata_t *data, const stream_t *stream) {
token_t next = {0};
if (json_next_token(&next, stream) == false) {
json_token_free(&next);
return false;
}
token_t *ret = &next;
if (next.type == TOK_RBRACK) {
data->list.tags = NULL;
data->list.size = 0;
data->list.type = TAG_END;
return true;
}
int capacity = 8;
int len = 0;
tag_t *tags = xalloc(capacity * sizeof(tag_t));
while (1) {
tag_t value;
value.name = "";
value.name_len = 0;
if (json_read_value(&value, stream, ret) == false) {
free(tags);
return false;
}
ret = NULL;
if (len == capacity) {
capacity *= 2;
tags = xrealloc(tags, capacity * sizeof(tag_t));
}
tags[len++] = value;
if (json_next_token(&next, stream) == false) {
free(tags);
json_token_free(&next);
return false;
}
if (next.type == TOK_COMMA) {
continue;
} else if (next.type == TOK_RBRACK) {
break;
} else {
free(tags);
json_token_free(&next);
return false;
}
}
tagtype_t type;
if (json_get_list_type(&type, tags, len) == false) {
free(tags);
return false;
}
data->list.type = type;
data->list.size = len;
data->list.tags = xalloc(len * sizeof(tag_t));
memcpy(data->list.tags, tags, len * sizeof(tag_t));
free(tags);
return true;
}
static bool json_read_compound(tagdata_t *data, const stream_t *stream) {
map_t map;
map_init(&map);
token_t next = {0};
if (json_next_token(&next, stream) == false) {
json_token_free(&next);
return false;
}
if (next.type == TOK_RBRACE) {
data->compound = map;
return true;
}
while (1) {
if (next.type != TOK_STRING) {
map_free(&map);
json_token_free(&next);
return false;
}
char *name = next.data.string.data;
int name_len = next.data.string.len;
if (name_len < 1) {
map_free(&map);
free(name);
return false;
}
if (json_next_token(&next, stream) == false || next.type != TOK_COLON) {
map_free(&map);
free(name);
return false;
}
tag_t value;
if (json_read_value(&value, stream, NULL) == false) {
map_free(&map);
free(name);
return false;
}
value.name = name;
value.name_len = name_len;
map_put(&map, &value);
if (json_next_token(&next, stream) == false) {
map_free(&map);
json_token_free(&next);
return false;
}
if (next.type == TOK_COMMA) {
if (json_next_token(&next, stream) == false) {
map_free(&map);
return false;
}
continue;
} else if (next.type == TOK_RBRACE) {
break;
} else {
map_free(&map);
json_token_free(&next);
return false;
}
}
data->compound = map;
return true;
}
static bool json_read_value(tag_t *tag, const stream_t *stream, token_t *first) {
token_t token;
if (first != NULL)
token = *first;
else if (json_next_token(&token, stream) == false)
return false;
tag->name = "";
tag->name_len = 0;
bool ok = true;
switch (token.type) {
case TOK_RBRACK:
case TOK_RBRACE:
case TOK_COLON:
case TOK_COMMA:
case TOK_NULL:
ok = false;
break;
case TOK_LBRACK:
tag->type = TAG_LIST;
ok = json_read_list(&tag->data, stream);
break;
case TOK_LBRACE:
tag->type = TAG_COMPOUND;
ok = json_read_compound(&tag->data, stream);
break;
case TOK_STRING:
tag->type = TAG_STRING;
tag->data.string.data = token.data.string.data;
tag->data.string.size = token.data.string.len;
break;
case TOK_NUMBER:
tag->type = TAG_LONG;
tag->data.l = token.data.number;
break;
case TOK_DOUBLE:
tag->type = TAG_DOUBLE;
tag->data.d = token.data.decimal;
break;
case TOK_BOOL:
tag->type = TAG_BYTE;
tag->data.b = token.data.b ? 1 : 0;
break;
}
return ok;
}
bool json_read(tag_t *tag, const stream_t *stream) {
return json_read_value(tag, stream, NULL);
}