summaryrefslogtreecommitdiff
path: root/nbt/json/read.c
diff options
context:
space:
mode:
Diffstat (limited to 'nbt/json/read.c')
-rw-r--r--nbt/json/read.c632
1 files changed, 632 insertions, 0 deletions
diff --git a/nbt/json/read.c b/nbt/json/read.c
new file mode 100644
index 0000000..3316b55
--- /dev/null
+++ b/nbt/json/read.c
@@ -0,0 +1,632 @@
+#include <ctype.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nbt.h"
+#include "lib.h"
+
+static char ret = '\0';
+
+typedef enum {
+ TOK_LBRACE,
+ TOK_RBRACE,
+ TOK_LBRACK,
+ TOK_RBRACK,
+ TOK_COLON,
+ TOK_COMMA,
+ TOK_STRING,
+ TOK_NUMBER,
+ TOK_DOUBLE,
+ TOK_BOOL,
+ TOK_NULL
+} tokentype_t ;
+
+typedef union {
+ bool b;
+ int64_t number;
+ double decimal;
+ struct {
+ uint16_t len;
+ char *data;
+ } string;
+} tokendata_t;
+
+typedef struct {
+ tokentype_t type;
+ tokendata_t data;
+} token_t;
+
+static void json_token_free(token_t *token) {
+ if (token->type == TOK_STRING)
+ free(token->data.string.data);
+}
+
+static bool json_parse_unicode(char buf[4], int *read, const stream_t *stream) {
+ char temp[5];
+ temp[4] = '\0';
+
+ if (stream_read(stream, temp, 4) == false)
+ return false;
+
+ uint16_t code_point;
+ char *end = NULL;
+
+ code_point = strtol(temp, &end, 16);
+
+ if (end != NULL)
+ return false;
+
+ int lead1 = 0b00000000;
+ int lead2 = 0b11000000;
+ int lead3 = 0b11100000;
+ int cont = 0b10000000;
+ int contmask = 0b00111111;
+
+ if (code_point < 0x0080) {
+ buf[0] = ((code_point >> 0)) | lead1;
+ *read = 1;
+ } else if (code_point < 0x0800) {
+ buf[0] = ((code_point >> 6)) | lead2;
+ buf[1] = ((code_point >> 0) & contmask) | cont;
+ *read = 2;
+ } else {
+ buf[0] = ((code_point >> 12)) | lead3;
+ buf[1] = ((code_point >> 6) & contmask) | cont;
+ buf[2] = ((code_point >> 0) & contmask) | cont;
+ *read = 3;
+ }
+
+ return true;
+}
+
+static bool json_parse_escape(char buf[4], int *read, const stream_t *stream) {
+ char n;
+
+ char *c = &buf[0];
+ *read = 1;
+
+ if (stream_read(stream, &n, 1) == false)
+ return false;
+
+ switch (n) {
+ case '"':
+ case '\\':
+ case '/':
+ *c = n;
+ return true;
+ case 'b':
+ *c = '\b';
+ return true;
+ case 'f':
+ *c = '\f';
+ return true;
+ case 'n':
+ *c = '\n';
+ return true;
+ case 'r':
+ *c = '\r';
+ return true;
+ case 't':
+ *c = '\t';
+ return true;
+ case 'u': {
+ return json_parse_unicode(buf, read, stream);
+ default:
+ // invalid escape
+ return false;
+ };
+ }
+}
+
+static bool json_parse_string(tokendata_t *token, const stream_t *stream) {
+
+ int capacity = 8;
+ int len = 0;
+ char *buf = xalloc(capacity * sizeof(char));
+
+ while (1) {
+
+ char tmp[4];
+ int read = 1;
+
+ if (stream_read(stream, tmp, 1) == false) {
+ free(buf);
+ return false;
+ }
+
+ uint8_t c = tmp[0];
+
+ if (c == '"')
+ break;
+
+ // non printable ascii character
+ if (c < 32 || c > 127) {
+ free(buf);
+ return false;
+ }
+ // an escape, dont push to buffer, get next char
+ if (c == '\\' && json_parse_escape(tmp, &read, stream) == false) {
+ free(buf);
+ return false;
+ }
+
+ if (len + read >= capacity) {
+ capacity *= 2;
+ buf = xrealloc(buf, capacity);
+ }
+
+ memcpy(buf + len, tmp, read);
+ len += read;
+
+ }
+
+ token->string.data = xalloc(len * sizeof(char));
+ token->string.len = len;
+ memcpy(token->string.data, buf, len);
+ free(buf);
+
+ return true;
+}
+
+static bool json_ident_assert(const stream_t *stream, const char *rest) {
+ char c;
+ if (stream_read(stream, &c, 1) == false)
+ return false;
+ if (c != *rest)
+ return false;
+ rest += 1;
+ if (*rest == '\0')
+ return true;
+ else
+ return json_ident_assert(stream, rest);
+}
+
+static bool json_parse_ident(token_t *token, const stream_t *stream, char first) {
+ if (first == 't' && json_ident_assert(stream, "true")) {
+ token->type = TOK_BOOL;
+ token->data.b = true;
+ } else if (first == 'f' && json_ident_assert(stream, "alse")) {
+ token->type = TOK_BOOL;
+ token->data.b = false;
+ } else if (first == 'n' && json_ident_assert(stream, "ull")) {
+ token->type = TOK_NULL;
+ } else {
+ return false;
+ }
+ return true;
+}
+
+static void push_char(char **buf, int *len, int *cap, char c) {
+ if (*len == *cap) {
+ *cap *= *cap * 2;
+ *buf = xrealloc(*buf, *cap * sizeof(char));
+ }
+ (*buf)[(*len)++] = c;
+}
+
+static bool json_parse_number(token_t *token, const stream_t *stream, char first) {
+
+ int capacity = 8;
+ int len = 0;
+ char *buf = xalloc(capacity * sizeof(char));
+ bool isdec = false;
+ bool isneg = false;
+
+ char c = first;
+
+ // PARSE DIGITS AND NEGATIVITY
+
+ while (1) {
+ if (c == '\0' && stream_read(stream, &c, 1) == false) {
+ free(buf);
+ return false;
+ }
+
+ if (c == '-' && isneg) {
+ // cannot negate twice
+ free(buf);
+ return false;
+ } else if (c == '-') {
+ isneg = true;
+ c = '\0';
+ } else if (c == '0' && len == 0) {
+ // string starting with 0 cannot not have other digits
+ push_char(&buf, &len, &capacity, c);
+ c = '\0';
+ break;
+ } else if (c >= '0' && c <= '9') {
+ push_char(&buf, &len, &capacity, c);
+ c = '\0';
+ } else if (len == 0) {
+ // invalid start of digits
+ free(buf);
+ return false;
+ } else {
+ // end of starting digits
+ break;
+ }
+ }
+
+ // SET NEXT CHAR C IF NOT READ YET
+
+ if (c == '\0' && stream_read(stream, &c, 1) == false) {
+ free(buf);
+ return false;
+ }
+
+ // THERE IS A DECIMAL
+ // READ STREAM OF DIGITS
+
+ if (c == '.') {
+ isdec = true;
+ push_char(&buf, &len, &capacity, c);
+ int declen = 0;
+ while (1) {
+ if (stream_read(stream, &c, 1) == false) {
+ free(buf);
+ return false;
+ }
+ if (c >= '0' && c <= '9') {
+ push_char(&buf, &len, &capacity, c);
+ declen++;
+ } else if (declen == 0) {
+ // invalid decimal
+ free(buf);
+ return false;
+ } else {
+ // end of decimal
+ break;
+ }
+ }
+ }
+
+ // PARSE EXPONENT
+ if (c == 'e' || c == 'E') {
+ isdec = true;
+ push_char(&buf, &len, &capacity, 'E');
+
+ int explen = 0; // the exponent len
+
+ if (stream_read(stream, &c, 1) == false) {
+ free(buf);
+ return false;
+ }
+
+ if (c == '+' || c == '-') {
+ push_char(&buf, &len, &capacity, c);
+ c = '\0';
+ }
+
+ while (1) {
+ if (c == '\0' && stream_read(stream, &c, 1) == false) {
+ free(buf);
+ return false;
+ }
+
+ if (c >= '0' && c <= '9') {
+ push_char(&buf, &len, &capacity, c);
+ explen++;
+ c = '\0';
+ } else if (explen == 0) {
+ // invalid exponent
+ free(buf);
+ return false;
+ } else {
+ break;
+ }
+ }
+
+ }
+
+ char *end = NULL;
+ push_char(&buf, &len, &capacity, '\0');
+
+ if (isdec) {
+ token->type = TOK_DOUBLE;
+ token->data.decimal = strtod(buf, &end);
+ } else {
+ token->type = TOK_NUMBER;
+ token->data.number = strtol(buf, &end, 10);
+ }
+
+ if (end != NULL && *end != 0)
+ return false;
+
+ free(buf);
+
+ ret = c;
+ return true;
+}
+
+static bool json_next_token(token_t *token, const stream_t *stream) {
+
+ memset(token, 0, sizeof(token_t));
+
+ char c;
+
+retry:
+
+ if (ret != '\0') {
+ c = ret;
+ ret = '\0';
+ } else if (stream_read(stream, &c, 1) == false) {
+ return false;
+ }
+
+ bool ok = true;
+
+ switch (c) {
+ case '{':
+ token->type = TOK_LBRACE;
+ break;
+ case '}':
+ token->type = TOK_RBRACE;
+ break;
+ case '[':
+ token->type = TOK_LBRACK;
+ break;
+ case ']':
+ token->type = TOK_RBRACK;
+ break;
+ case ':':
+ token->type = TOK_COLON;
+ break;
+ case ',':
+ token->type = TOK_COMMA;
+ break;
+ case '"':
+ token->type = TOK_STRING;
+ ok = json_parse_string(&token->data, stream);
+ break;
+ case 't':
+ case 'f':
+ case 'n':
+ // parse null or bool
+ ok = json_parse_ident(token, stream, c);
+ break;
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\r':
+ goto retry;
+ default:
+ if (isdigit(c) || c == '-') {
+ // parse number
+ ok = json_parse_number(token, stream, c);
+ } else {
+ // disallowed symbol
+ ok = false;
+ }
+ break;
+ }
+
+ return ok;
+}
+
+static bool json_get_list_type(tagtype_t *type, const tag_t *tags, int len) {
+ if (len < 1) {
+ *type = TAG_END;
+ return true;
+ }
+
+ *type = tags[0].type;
+
+ for (int i = 0; i < len; i++)
+ if (tags[i].type != *type)
+ return false;
+
+ return true;
+}
+
+static bool json_read_value(tag_t *tag, const stream_t *stream, token_t *first);
+
+static bool json_read_list(tagdata_t *data, const stream_t *stream) {
+
+ token_t next = {0};
+ if (json_next_token(&next, stream) == false) {
+ json_token_free(&next);
+ return false;
+ }
+
+ token_t *ret = &next;
+
+ if (next.type == TOK_RBRACK) {
+ data->list.tags = NULL;
+ data->list.size = 0;
+ data->list.type = TAG_END;
+ return true;
+ }
+
+ int capacity = 8;
+ int len = 0;
+ tag_t *tags = xalloc(capacity * sizeof(tag_t));
+
+ while (1) {
+
+ tag_t value;
+ value.name = "";
+ value.name_len = 0;
+
+ if (json_read_value(&value, stream, ret) == false) {
+ free(tags);
+ return false;
+ }
+
+ ret = NULL;
+
+ if (len == capacity) {
+ capacity *= 2;
+ tags = xrealloc(tags, capacity * sizeof(tag_t));
+ }
+
+ tags[len++] = value;
+
+ if (json_next_token(&next, stream) == false) {
+ free(tags);
+ json_token_free(&next);
+ return false;
+ }
+
+ if (next.type == TOK_COMMA) {
+ continue;
+ } else if (next.type == TOK_RBRACK) {
+ break;
+ } else {
+ free(tags);
+ json_token_free(&next);
+ return false;
+ }
+
+ }
+
+ tagtype_t type;
+ if (json_get_list_type(&type, tags, len) == false) {
+ free(tags);
+ return false;
+ }
+
+ data->list.type = type;
+ data->list.size = len;
+ data->list.tags = xalloc(len * sizeof(tag_t));
+ memcpy(data->list.tags, tags, len * sizeof(tag_t));
+ free(tags);
+
+ return true;
+
+}
+
+static bool json_read_compound(tagdata_t *data, const stream_t *stream) {
+
+ map_t map;
+ map_init(&map);
+
+ token_t next = {0};
+ if (json_next_token(&next, stream) == false) {
+ json_token_free(&next);
+ return false;
+ }
+
+ if (next.type == TOK_RBRACE) {
+ data->compound = map;
+ return true;
+ }
+
+ while (1) {
+
+ if (next.type != TOK_STRING) {
+ map_free(&map);
+ json_token_free(&next);
+ return false;
+ }
+
+ char *name = next.data.string.data;
+ int name_len = next.data.string.len;
+
+ if (name_len < 1) {
+ map_free(&map);
+ free(name);
+ return false;
+ }
+
+ if (json_next_token(&next, stream) == false || next.type != TOK_COLON) {
+ map_free(&map);
+ free(name);
+ return false;
+ }
+
+ tag_t value;
+ if (json_read_value(&value, stream, NULL) == false) {
+ map_free(&map);
+ free(name);
+ return false;
+ }
+
+ value.name = name;
+ value.name_len = name_len;
+
+ map_put(&map, &value);
+
+ if (json_next_token(&next, stream) == false) {
+ map_free(&map);
+ json_token_free(&next);
+ return false;
+ }
+
+ if (next.type == TOK_COMMA) {
+ if (json_next_token(&next, stream) == false) {
+ map_free(&map);
+ return false;
+ }
+ continue;
+ } else if (next.type == TOK_RBRACE) {
+ break;
+ } else {
+ map_free(&map);
+ json_token_free(&next);
+ return false;
+ }
+
+ }
+
+ data->compound = map;
+
+ return true;
+}
+
+static bool json_read_value(tag_t *tag, const stream_t *stream, token_t *first) {
+
+ token_t token;
+
+ if (first != NULL)
+ token = *first;
+ else if (json_next_token(&token, stream) == false)
+ return false;
+
+ tag->name = "";
+ tag->name_len = 0;
+
+ bool ok = true;
+
+ switch (token.type) {
+ case TOK_RBRACK:
+ case TOK_RBRACE:
+ case TOK_COLON:
+ case TOK_COMMA:
+ case TOK_NULL:
+ ok = false;
+ break;
+ case TOK_LBRACK:
+ tag->type = TAG_LIST;
+ ok = json_read_list(&tag->data, stream);
+ break;
+ case TOK_LBRACE:
+ tag->type = TAG_COMPOUND;
+ ok = json_read_compound(&tag->data, stream);
+ break;
+ case TOK_STRING:
+ tag->type = TAG_STRING;
+ tag->data.string.data = token.data.string.data;
+ tag->data.string.size = token.data.string.len;
+ break;
+ case TOK_NUMBER:
+ tag->type = TAG_LONG;
+ tag->data.l = token.data.number;
+ break;
+ case TOK_DOUBLE:
+ tag->type = TAG_DOUBLE;
+ tag->data.d = token.data.decimal;
+ break;
+ case TOK_BOOL:
+ tag->type = TAG_BYTE;
+ tag->data.b = token.data.b ? 1 : 0;
+ break;
+ }
+
+ return ok;
+}
+
+bool json_read(tag_t *tag, const stream_t *stream) {
+ return json_read_value(tag, stream, NULL);
+}