1 files changed, 632 insertions, 0 deletions
diff --git a/nbt/json/read.c b/nbt/json/read.c
new file mode 100644
index 0000000..3316b55
--- /dev/null
+++ b/nbt/json/read.c
@@ -0,0 +1,632 @@
+#include <ctype.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nbt.h"
+#include "lib.h"
+
+static char ret = '\0';
+
+typedef enum {
+	TOK_LBRACE,
+	TOK_RBRACE,
+	TOK_LBRACK,
+	TOK_RBRACK,
+	TOK_COLON,
+	TOK_COMMA,
+	TOK_STRING,
+	TOK_NUMBER,
+	TOK_DOUBLE,
+	TOK_BOOL,
+	TOK_NULL
+} tokentype_t ;
+
+typedef union {
+	bool b;
+	int64_t number;
+	double decimal;
+	struct {
+		uint16_t len;
+		char *data;
+	} string;
+} tokendata_t;
+
+typedef struct {
+	tokentype_t type;
+	tokendata_t data;
+} token_t;
+
+static void json_token_free(token_t *token) {
+	if (token->type == TOK_STRING)
+		free(token->data.string.data);
+}
+
+static bool json_parse_unicode(char buf[4], int *read, const stream_t *stream) {
+	char temp[5];
+	temp[4] = '\0';
+	
+	if (stream_read(stream, temp, 4) == false)
+		return false;
+
+	uint16_t code_point;
+	char *end = NULL;
+	
+	code_point = strtol(temp, &end, 16);
+	
+	if (end != NULL)
+		return false;
+
+	int lead1		= 0b00000000;
+	int lead2		= 0b11000000;
+	int lead3		= 0b11100000;
+	int cont		= 0b10000000;
+	int contmask   = 0b00111111;
+
+	if (code_point < 0x0080) {
+		buf[0] = ((code_point >> 0)) | lead1;
+		*read = 1;
+	} else if (code_point < 0x0800) {
+		buf[0] = ((code_point >> 6)) | lead2;
+		buf[1] = ((code_point >> 0) & contmask) | cont;
+		*read = 2;
+	} else {
+		buf[0] = ((code_point >> 12)) | lead3;
+		buf[1] = ((code_point >> 6) & contmask) | cont;
+		buf[2] = ((code_point >> 0) & contmask) | cont;
+		*read = 3;
+	}
+
+	return true;
+}
+
+static bool json_parse_escape(char buf[4], int *read, const stream_t *stream) {
+	char n;
+	
+	char *c = &buf[0];
+	*read = 1;
+
+	if (stream_read(stream, &n, 1) == false)
+		return false;
+
+	switch (n) {
+		case '"':
+		case '\\':
+		case '/':
+			*c = n;
+			return true;
+		case 'b':
+			*c = '\b';
+			return true;
+		case 'f':
+			*c = '\f';
+			return true;
+		case 'n':
+			*c = '\n';
+			return true;
+		case 'r':
+			*c = '\r';
+			return true;
+		case 't':
+			*c = '\t';
+			return true;
+		case 'u': {
+			return json_parse_unicode(buf, read, stream);
+		default:
+			// invalid escape
+			return false;
+		};
+	}
+}
+
+static bool json_parse_string(tokendata_t *token, const stream_t *stream) {
+	
+	int capacity = 8;
+	int len = 0;
+	char *buf = xalloc(capacity * sizeof(char));
+
+	while (1) {
+
+		char tmp[4];
+		int read = 1;
+
+		if (stream_read(stream, tmp, 1) == false) {
+			free(buf);
+			return false;
+		}
+
+		uint8_t c = tmp[0];
+
+		if (c == '"')
+			break;
+
+		// non printable ascii character
+		if (c < 32 || c > 127) {
+			free(buf);
+			return false;
+		}
+		// an escape, dont push to buffer, get next char
+		if (c == '\\' && json_parse_escape(tmp, &read, stream) == false) {
+			free(buf);
+			return false;
+		}
+
+		if (len + read >= capacity) {
+			capacity *= 2;
+			buf = xrealloc(buf, capacity);
+		}
+
+		memcpy(buf + len, tmp, read);
+		len += read;
+
+	}
+
+	token->string.data = xalloc(len * sizeof(char));
+	token->string.len = len;
+	memcpy(token->string.data, buf, len);
+	free(buf);
+
+	return true;
+}
+
+static bool json_ident_assert(const stream_t *stream, const char *rest) {
+	char c;
+	if (stream_read(stream, &c, 1) == false)
+		return false;
+	if (c != *rest)
+		return false;
+	rest += 1;
+	if (*rest == '\0')
+		return true;
+	else 
+		return json_ident_assert(stream, rest);
+}
+
+static bool json_parse_ident(token_t *token, const stream_t *stream, char first) {
+	if (first == 't' && json_ident_assert(stream, "true")) {
+		token->type = TOK_BOOL;
+		token->data.b = true;
+	} else if (first == 'f' && json_ident_assert(stream, "alse")) {
+		token->type = TOK_BOOL;
+		token->data.b = false;
+	} else if (first == 'n' && json_ident_assert(stream, "ull")) {
+		token->type = TOK_NULL;
+	} else {
+		return false;
+	}
+	return true;
+}
+
+static void push_char(char **buf, int *len, int *cap, char c) {
+	if (*len == *cap) {
+		*cap *= *cap * 2;
+		*buf = xrealloc(*buf, *cap * sizeof(char));
+	}
+	(*buf)[(*len)++] = c;
+}
+
+static bool json_parse_number(token_t *token, const stream_t *stream, char first) {
+
+	int capacity = 8;
+	int len = 0; 
+	char *buf = xalloc(capacity * sizeof(char));
+	bool isdec = false;
+	bool isneg = false;
+
+	char c = first;
+
+	// PARSE DIGITS AND NEGATIVITY
+
+	while (1) {
+		if (c == '\0' && stream_read(stream, &c, 1) == false) {
+			free(buf);
+			return false;
+		}
+
+		if (c == '-' && isneg) { 
+			// cannot negate twice
+			free(buf);
+			return false;
+		} else if (c == '-') {
+			isneg = true;
+			c = '\0';
+		} else if (c == '0' && len == 0) { 
+			// string starting with 0 cannot not have other digits
+			push_char(&buf, &len, &capacity, c);
+			c = '\0';
+			break;
+		} else if (c >= '0' && c <= '9') {
+			push_char(&buf, &len, &capacity, c);
+			c = '\0';
+		} else if (len == 0) {
+			// invalid start of digits
+			free(buf);
+			return false;
+		} else {
+			// end of starting digits
+			break;
+		}
+	}
+
+	// SET NEXT CHAR C IF NOT READ YET
+
+	if (c == '\0' && stream_read(stream, &c, 1) == false) {
+		free(buf);
+		return false;
+	}
+
+	// THERE IS A DECIMAL
+	// READ STREAM OF DIGITS
+
+	if (c == '.') {
+		isdec = true;
+		push_char(&buf,  &len, &capacity, c);
+		int declen = 0;
+		while (1) {
+			if (stream_read(stream, &c, 1) == false) {
+				free(buf);
+				return false;
+			}
+			if (c >= '0' && c <= '9') {
+				push_char(&buf,  &len, &capacity, c);
+				declen++;
+			} else if (declen == 0) {
+				// invalid decimal
+				free(buf);
+				return false;
+			} else {
+				// end of decimal
+				break;
+			}
+		}
+	}
+
+	// PARSE EXPONENT
+	if (c == 'e' || c == 'E') {
+		isdec = true;
+		push_char(&buf, &len, &capacity, 'E');
+
+		int explen = 0; // the exponent len
+
+		if (stream_read(stream, &c, 1) == false) {
+			free(buf);
+			return false;
+		}
+
+		if (c == '+' || c == '-') {
+			push_char(&buf, &len, &capacity, c);
+			c = '\0';
+		}
+
+		while (1) {
+			if (c == '\0' && stream_read(stream, &c, 1) == false) {
+				free(buf);
+				return false;
+			}
+
+			if (c >= '0' && c <= '9') {
+				push_char(&buf, &len, &capacity, c);
+				explen++;
+				c = '\0';
+			} else if (explen == 0) {
+				// invalid exponent
+				free(buf);
+				return false;
+			} else {
+				break;
+			}
+		}
+
+	}
+
+	char *end = NULL;
+	push_char(&buf, &len, &capacity, '\0');
+
+	if (isdec) {
+		token->type = TOK_DOUBLE;
+		token->data.decimal = strtod(buf, &end);
+	} else {
+		token->type = TOK_NUMBER;
+		token->data.number = strtol(buf, &end, 10);
+	}
+
+	if (end != NULL && *end != 0)
+		return false;
+	
+	free(buf);
+
+	ret = c;
+	return true;
+}
+
+static bool json_next_token(token_t *token, const stream_t *stream) {
+
+	memset(token, 0, sizeof(token_t));
+
+	char c;
+
+retry:
+
+	if (ret != '\0') {
+		c = ret;
+		ret = '\0';
+	} else if (stream_read(stream, &c, 1) == false) {
+		return false;
+	}
+
+	bool ok = true;
+
+	switch (c) {
+		case '{':
+			token->type = TOK_LBRACE;
+			break;
+		case '}':
+			token->type = TOK_RBRACE;
+			break;
+		case '[':
+			token->type = TOK_LBRACK;
+			break;
+		case ']':
+			token->type = TOK_RBRACK;
+			break;
+		case ':':
+			token->type = TOK_COLON;
+			break;
+		case ',':
+			token->type = TOK_COMMA;
+			break;
+		case '"':
+			token->type = TOK_STRING;
+			ok = json_parse_string(&token->data, stream);
+			break;
+		case 't':
+		case 'f':
+		case 'n':
+			// parse null or bool
+			ok = json_parse_ident(token, stream, c);
+			break;
+		case ' ':
+		case '\n':
+		case '\t':
+		case '\r':
+			goto retry;
+		default:
+			if (isdigit(c) || c == '-') {
+				// parse number
+				ok = json_parse_number(token, stream, c);
+			} else {
+				// disallowed symbol
+				ok = false;
+			}
+			break;
+	}
+
+	return ok;
+}
+
+static bool json_get_list_type(tagtype_t *type, const tag_t *tags, int len) {
+	if (len < 1) {
+		*type = TAG_END;
+		return true;
+	}
+
+	*type = tags[0].type;
+
+	for (int i = 0; i < len; i++)
+		if (tags[i].type != *type)
+			return false;
+
+	return true;
+}
+
+static bool json_read_value(tag_t *tag, const stream_t *stream, token_t *first);
+
+static bool json_read_list(tagdata_t *data, const stream_t *stream) {
+
+	token_t next = {0};
+	if (json_next_token(&next, stream) == false) {
+		json_token_free(&next);
+		return false;
+	}
+
+	token_t *ret = &next;
+
+	if (next.type == TOK_RBRACK) {
+		data->list.tags = NULL;
+		data->list.size = 0;
+		data->list.type = TAG_END;
+		return true;
+	}
+
+	int capacity = 8;
+	int len = 0;
+	tag_t *tags = xalloc(capacity * sizeof(tag_t));
+
+	while (1) {
+
+		tag_t value;
+		value.name = "";
+		value.name_len = 0;
+		
+		if (json_read_value(&value, stream, ret) == false) {
+			free(tags);
+			return false;
+		}
+
+		ret = NULL;
+		
+		if (len == capacity) {
+			capacity *= 2;
+			tags = xrealloc(tags, capacity * sizeof(tag_t));
+		}
+
+		tags[len++] = value;
+
+		if (json_next_token(&next, stream) == false) {
+			free(tags);
+			json_token_free(&next);
+			return false;
+		}
+
+		if (next.type == TOK_COMMA) {
+			continue;
+		} else if (next.type == TOK_RBRACK) {
+			break;
+		} else {
+			free(tags);
+			json_token_free(&next);
+			return false;
+		}
+
+	}
+
+	tagtype_t type;
+	if (json_get_list_type(&type, tags, len) == false) {
+		free(tags);
+		return false;
+	}
+
+	data->list.type = type;
+	data->list.size = len;
+	data->list.tags = xalloc(len * sizeof(tag_t));
+	memcpy(data->list.tags, tags, len * sizeof(tag_t));
+	free(tags);
+
+	return true;
+
+}
+
+static bool json_read_compound(tagdata_t *data, const stream_t *stream) {
+
+	map_t map;
+	map_init(&map);
+
+	token_t next = {0};
+	if (json_next_token(&next, stream) == false) {
+		json_token_free(&next);
+		return false;
+	}
+
+	if (next.type == TOK_RBRACE) {
+		data->compound = map;
+		return true;
+	}
+
+	while (1) {
+
+		if (next.type != TOK_STRING) {
+			map_free(&map);
+			json_token_free(&next);
+			return false;
+		}
+
+		char *name = next.data.string.data;
+		int name_len = next.data.string.len;
+
+		if (name_len < 1) {
+			map_free(&map);
+			free(name);
+			return false;
+		}
+		
+		if (json_next_token(&next, stream) == false || next.type != TOK_COLON) {
+			map_free(&map);
+			free(name);
+			return false;
+		}
+		
+		tag_t value;
+		if (json_read_value(&value, stream, NULL) == false) {
+			map_free(&map);
+			free(name);
+			return false;
+		}
+
+		value.name = name;
+		value.name_len = name_len;
+
+		map_put(&map, &value);
+
+		if (json_next_token(&next, stream) == false) {
+			map_free(&map);
+			json_token_free(&next);
+			return false;
+		}
+
+		if (next.type == TOK_COMMA) {
+			if (json_next_token(&next, stream) == false) {
+				map_free(&map);
+				return false;
+			}
+			continue;
+		} else if (next.type == TOK_RBRACE) {
+			break;
+		} else {
+			map_free(&map);
+			json_token_free(&next);
+			return false;
+		}
+
+	}
+
+	data->compound = map;
+
+	return true;
+}
+
+static bool json_read_value(tag_t *tag, const stream_t *stream, token_t *first) {
+	
+	token_t token;
+
+	if (first != NULL)
+		token = *first;
+	else if (json_next_token(&token, stream) == false)
+		return false;
+
+	tag->name = "";
+	tag->name_len = 0;
+
+	bool ok = true;
+
+	switch (token.type) {
+        case TOK_RBRACK:
+        case TOK_RBRACE:
+        case TOK_COLON:
+        case TOK_COMMA:
+        case TOK_NULL:
+			ok = false;
+			break;
+        case TOK_LBRACK:
+			tag->type = TAG_LIST;
+			ok = json_read_list(&tag->data, stream);
+			break;
+        case TOK_LBRACE:
+			tag->type = TAG_COMPOUND;
+			ok = json_read_compound(&tag->data, stream);
+			break;
+        case TOK_STRING:
+			tag->type = TAG_STRING;
+			tag->data.string.data = token.data.string.data;
+			tag->data.string.size = token.data.string.len;
+			break;
+        case TOK_NUMBER:
+			tag->type = TAG_LONG;
+			tag->data.l = token.data.number;
+			break;
+        case TOK_DOUBLE:
+			tag->type = TAG_DOUBLE;
+			tag->data.d = token.data.decimal;
+			break;
+        case TOK_BOOL:
+			tag->type = TAG_BYTE;
+			tag->data.b = token.data.b ? 1 : 0;
+			break;
+    }
+
+	return ok;
+}
+
+bool json_read(tag_t *tag, const stream_t *stream) {
+	return json_read_value(tag, stream, NULL);
+}