From 7da60a0147116130a94274e4a20ae54dd7d59dea Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Sat, 31 Mar 2018 19:53:30 +0900 Subject: Store texts as HTML --- src/common/text/parse/core/syntax-highlighter.ts | 334 +++++++++++++++++++++++ src/common/text/parse/elements/bold.ts | 14 + src/common/text/parse/elements/code.ts | 17 ++ src/common/text/parse/elements/emoji.ts | 14 + src/common/text/parse/elements/hashtag.ts | 19 ++ src/common/text/parse/elements/inline-code.ts | 17 ++ src/common/text/parse/elements/link.ts | 19 ++ src/common/text/parse/elements/mention.ts | 17 ++ src/common/text/parse/elements/quote.ts | 14 + src/common/text/parse/elements/url.ts | 14 + src/common/text/parse/index.ts | 72 +++++ 11 files changed, 551 insertions(+) create mode 100644 src/common/text/parse/core/syntax-highlighter.ts create mode 100644 src/common/text/parse/elements/bold.ts create mode 100644 src/common/text/parse/elements/code.ts create mode 100644 src/common/text/parse/elements/emoji.ts create mode 100644 src/common/text/parse/elements/hashtag.ts create mode 100644 src/common/text/parse/elements/inline-code.ts create mode 100644 src/common/text/parse/elements/link.ts create mode 100644 src/common/text/parse/elements/mention.ts create mode 100644 src/common/text/parse/elements/quote.ts create mode 100644 src/common/text/parse/elements/url.ts create mode 100644 src/common/text/parse/index.ts (limited to 'src/common/text/parse') diff --git a/src/common/text/parse/core/syntax-highlighter.ts b/src/common/text/parse/core/syntax-highlighter.ts new file mode 100644 index 0000000000..c0396b1fc6 --- /dev/null +++ b/src/common/text/parse/core/syntax-highlighter.ts @@ -0,0 +1,334 @@ +function escape(text) { + return text + .replace(/>/g, '>') + .replace(/ k[0].toUpperCase() + k.substr(1))) + .concat(_keywords.map(k => k.toUpperCase())) + .sort((a, b) => b.length - a.length); + +const symbols = [ + '=', + '+', + '-', + '*', + '/', + '%', + '~', + '^', + '&', + '|', + '>', + '<', + '!', + '?' +]; + +const elements = [ + // comment + code => { + if (code.substr(0, 2) != '//') return null; + const match = code.match(/^\/\/(.+?)(\n|$)/); + if (!match) return null; + const comment = match[0]; + return { + html: `${escape(comment)}`, + next: comment.length + }; + }, + + // block comment + code => { + const match = code.match(/^\/\*([\s\S]+?)\*\//); + if (!match) return null; + return { + html: `${escape(match[0])}`, + next: match[0].length + }; + }, + + // string + code => { + if (!/^['"`]/.test(code)) return null; + const begin = code[0]; + let str = begin; + let thisIsNotAString = false; + for (let i = 1; i < code.length; i++) { + const char = code[i]; + if (char == '\\') { + str += char; + str += code[i + 1] || ''; + i++; + continue; + } else if (char == begin) { + str += char; + break; + } else if (char == '\n' || i == (code.length - 1)) { + thisIsNotAString = true; + break; + } else { + str += char; + } + } + if (thisIsNotAString) { + return null; + } else { + return { + html: `${escape(str)}`, + next: str.length + }; + } + }, + + // regexp + code => { + if (code[0] != '/') return null; + let regexp = ''; + let thisIsNotARegexp = false; + for (let i = 1; i < code.length; i++) { + const char = code[i]; + if (char == '\\') { + regexp += char; + regexp += code[i + 1] || ''; + i++; + continue; + } else if (char == '/') { + break; + } else if (char == '\n' || i == (code.length - 1)) { + thisIsNotARegexp = true; + break; + } else { + regexp += char; + } + } + + if (thisIsNotARegexp) return null; + if (regexp == '') return null; + if (regexp[0] == ' ' && regexp[regexp.length - 1] == ' ') return null; + + return { + html: `/${escape(regexp)}/`, + next: regexp.length + 2 + }; + }, + + // label + code => { + if (code[0] != '@') return null; + const match = code.match(/^@([a-zA-Z_-]+?)\n/); + if (!match) return null; + const label = match[0]; + return { + html: `${label}`, + next: label.length + }; + }, + + // number + (code, i, source) => { + const prev = source[i - 1]; + if (prev && /[a-zA-Z]/.test(prev)) return null; + if (!/^[\-\+]?[0-9\.]+/.test(code)) return null; + const match = code.match(/^[\-\+]?[0-9\.]+/)[0]; + if (match) { + return { + html: `${match}`, + next: match.length + }; + } else { + return null; + } + }, + + // nan + (code, i, source) => { + const prev = source[i - 1]; + if (prev && /[a-zA-Z]/.test(prev)) return null; + if (code.substr(0, 3) == 'NaN') { + return { + html: `NaN`, + next: 3 + }; + } else { + return null; + } + }, + + // method + code => { + const match = code.match(/^([a-zA-Z_-]+?)\(/); + if (!match) return null; + + if (match[1] == '-') return null; + + return { + html: `${match[1]}`, + next: match[1].length + }; + }, + + // property + (code, i, source) => { + const prev = source[i - 1]; + if (prev != '.') return null; + + const match = code.match(/^[a-zA-Z0-9_-]+/); + if (!match) return null; + + return { + html: `${match[0]}`, + next: match[0].length + }; + }, + + // keyword + (code, i, source) => { + const prev = source[i - 1]; + if (prev && /[a-zA-Z]/.test(prev)) return null; + + const match = keywords.filter(k => code.substr(0, k.length) == k)[0]; + if (match) { + if (/^[a-zA-Z]/.test(code.substr(match.length))) return null; + return { + html: `${match}`, + next: match.length + }; + } else { + return null; + } + }, + + // symbol + code => { + const match = symbols.filter(s => code[0] == s)[0]; + if (match) { + return { + html: `${match}`, + next: 1 + }; + } else { + return null; + } + } +]; + +// specify lang is todo +export default (source: string, lang?: string) => { + let code = source; + let html = ''; + + let i = 0; + + function push(token) { + html += token.html; + code = code.substr(token.next); + i += token.next; + } + + while (code != '') { + const parsed = elements.some(el => { + const e = el(code, i, source); + if (e) { + push(e); + return true; + } else { + return false; + } + }); + + if (!parsed) { + push({ + html: escape(code[0]), + next: 1 + }); + } + } + + return html; +}; diff --git a/src/common/text/parse/elements/bold.ts b/src/common/text/parse/elements/bold.ts new file mode 100644 index 0000000000..ce25764457 --- /dev/null +++ b/src/common/text/parse/elements/bold.ts @@ -0,0 +1,14 @@ +/** + * Bold + */ + +module.exports = text => { + const match = text.match(/^\*\*(.+?)\*\*/); + if (!match) return null; + const bold = match[0]; + return { + type: 'bold', + content: bold, + bold: bold.substr(2, bold.length - 4) + }; +}; diff --git a/src/common/text/parse/elements/code.ts b/src/common/text/parse/elements/code.ts new file mode 100644 index 0000000000..4821e95fe2 --- /dev/null +++ b/src/common/text/parse/elements/code.ts @@ -0,0 +1,17 @@ +/** + * Code (block) + */ + +import genHtml from '../core/syntax-highlighter'; + +module.exports = text => { + const match = text.match(/^```([\s\S]+?)```/); + if (!match) return null; + const code = match[0]; + return { + type: 'code', + content: code, + code: code.substr(3, code.length - 6).trim(), + html: genHtml(code.substr(3, code.length - 6).trim()) + }; +}; diff --git a/src/common/text/parse/elements/emoji.ts b/src/common/text/parse/elements/emoji.ts new file mode 100644 index 0000000000..e24231a223 --- /dev/null +++ b/src/common/text/parse/elements/emoji.ts @@ -0,0 +1,14 @@ +/** + * Emoji + */ + +module.exports = text => { + const match = text.match(/^:[a-zA-Z0-9+-_]+:/); + if (!match) return null; + const emoji = match[0]; + return { + type: 'emoji', + content: emoji, + emoji: emoji.substr(1, emoji.length - 2) + }; +}; diff --git a/src/common/text/parse/elements/hashtag.ts b/src/common/text/parse/elements/hashtag.ts new file mode 100644 index 0000000000..ee57b140b8 --- /dev/null +++ b/src/common/text/parse/elements/hashtag.ts @@ -0,0 +1,19 @@ +/** + * Hashtag + */ + +module.exports = (text, i) => { + if (!(/^\s#[^\s]+/.test(text) || (i == 0 && /^#[^\s]+/.test(text)))) return null; + const isHead = text[0] == '#'; + const hashtag = text.match(/^\s?#[^\s]+/)[0]; + const res: any[] = !isHead ? [{ + type: 'text', + content: text[0] + }] : []; + res.push({ + type: 'hashtag', + content: isHead ? hashtag : hashtag.substr(1), + hashtag: isHead ? hashtag.substr(1) : hashtag.substr(2) + }); + return res; +}; diff --git a/src/common/text/parse/elements/inline-code.ts b/src/common/text/parse/elements/inline-code.ts new file mode 100644 index 0000000000..9f9ef51a2b --- /dev/null +++ b/src/common/text/parse/elements/inline-code.ts @@ -0,0 +1,17 @@ +/** + * Code (inline) + */ + +import genHtml from '../core/syntax-highlighter'; + +module.exports = text => { + const match = text.match(/^`(.+?)`/); + if (!match) return null; + const code = match[0]; + return { + type: 'inline-code', + content: code, + code: code.substr(1, code.length - 2).trim(), + html: genHtml(code.substr(1, code.length - 2).trim()) + }; +}; diff --git a/src/common/text/parse/elements/link.ts b/src/common/text/parse/elements/link.ts new file mode 100644 index 0000000000..35563ddc3d --- /dev/null +++ b/src/common/text/parse/elements/link.ts @@ -0,0 +1,19 @@ +/** + * Link + */ + +module.exports = text => { + const match = text.match(/^\??\[([^\[\]]+?)\]\((https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.=\+\-]+?)\)/); + if (!match) return null; + const silent = text[0] == '?'; + const link = match[0]; + const title = match[1]; + const url = match[2]; + return { + type: 'link', + content: link, + title: title, + url: url, + silent: silent + }; +}; diff --git a/src/common/text/parse/elements/mention.ts b/src/common/text/parse/elements/mention.ts new file mode 100644 index 0000000000..2025dfdaad --- /dev/null +++ b/src/common/text/parse/elements/mention.ts @@ -0,0 +1,17 @@ +/** + * Mention + */ +import parseAcct from '../../../../common/user/parse-acct'; + +module.exports = text => { + const match = text.match(/^(?:@[a-zA-Z0-9\-]+){1,2}/); + if (!match) return null; + const mention = match[0]; + const { username, host } = parseAcct(mention.substr(1)); + return { + type: 'mention', + content: mention, + username, + host + }; +}; diff --git a/src/common/text/parse/elements/quote.ts b/src/common/text/parse/elements/quote.ts new file mode 100644 index 0000000000..cc8cfffdc4 --- /dev/null +++ b/src/common/text/parse/elements/quote.ts @@ -0,0 +1,14 @@ +/** + * Quoted text + */ + +module.exports = text => { + const match = text.match(/^"([\s\S]+?)\n"/); + if (!match) return null; + const quote = match[0]; + return { + type: 'quote', + content: quote, + quote: quote.substr(1, quote.length - 2).trim(), + }; +}; diff --git a/src/common/text/parse/elements/url.ts b/src/common/text/parse/elements/url.ts new file mode 100644 index 0000000000..1003aff9c3 --- /dev/null +++ b/src/common/text/parse/elements/url.ts @@ -0,0 +1,14 @@ +/** + * URL + */ + +module.exports = text => { + const match = text.match(/^https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.=\+\-]+/); + if (!match) return null; + const url = match[0]; + return { + type: 'url', + content: url, + url: url + }; +}; diff --git a/src/common/text/parse/index.ts b/src/common/text/parse/index.ts new file mode 100644 index 0000000000..1e2398dc38 --- /dev/null +++ b/src/common/text/parse/index.ts @@ -0,0 +1,72 @@ +/** + * Misskey Text Analyzer + */ + +const elements = [ + require('./elements/bold'), + require('./elements/url'), + require('./elements/link'), + require('./elements/mention'), + require('./elements/hashtag'), + require('./elements/code'), + require('./elements/inline-code'), + require('./elements/quote'), + require('./elements/emoji') +]; + +export default (source: string) => { + + if (source == '') { + return null; + } + + const tokens = []; + + function push(token) { + if (token != null) { + tokens.push(token); + source = source.substr(token.content.length); + } + } + + let i = 0; + + // パース + while (source != '') { + const parsed = elements.some(el => { + let _tokens = el(source, i); + if (_tokens) { + if (!Array.isArray(_tokens)) { + _tokens = [_tokens]; + } + _tokens.forEach(push); + return true; + } else { + return false; + } + }); + + if (!parsed) { + push({ + type: 'text', + content: source[0] + }); + } + + i++; + } + + // テキストを纏める + tokens[0] = [tokens[0]]; + return tokens.reduce((a, b) => { + if (a[a.length - 1].type == 'text' && b.type == 'text') { + const tail = a.pop(); + return a.concat({ + type: 'text', + content: tail.content + b.content + }); + } else { + return a.concat(b); + } + }); +}; -- cgit v1.2.3-freya