summaryrefslogtreecommitdiff
path: root/packages/backend/src/core
diff options
context:
space:
mode:
authorsyuilo <4439005+syuilo@users.noreply.github.com>2025-11-29 21:19:55 +0900
committerGitHub <noreply@github.com>2025-11-29 21:19:55 +0900
commit4bdbe794a69d196dfb94ee28180b2680531b400c (patch)
tree9e7ecb33b870ecbb3c3a8d5822e781fc2c0ffa47 /packages/backend/src/core
parentRevert "chore(backend): remove jsdom" (diff)
downloadmisskey-4bdbe794a69d196dfb94ee28180b2680531b400c.tar.gz
misskey-4bdbe794a69d196dfb94ee28180b2680531b400c.tar.bz2
misskey-4bdbe794a69d196dfb94ee28180b2680531b400c.zip
perf(backend): parse5をやめて軽量な実装にし、メモリ削減・高速化 (#16892)
* wip * test * Revert "test" This reverts commit b7c5ae72144874a378cef7a13320e8b89f40d6e9. * Update MfmService.ts
Diffstat (limited to 'packages/backend/src/core')
-rw-r--r--packages/backend/src/core/MfmService.ts155
1 files changed, 79 insertions, 76 deletions
diff --git a/packages/backend/src/core/MfmService.ts b/packages/backend/src/core/MfmService.ts
index a359d5c838..b9f1c62d9d 100644
--- a/packages/backend/src/core/MfmService.ts
+++ b/packages/backend/src/core/MfmService.ts
@@ -5,7 +5,7 @@
import { URL } from 'node:url';
import { Inject, Injectable } from '@nestjs/common';
-import * as parse5 from 'parse5';
+import * as htmlParser from 'node-html-parser';
import { DI } from '@/di-symbols.js';
import type { Config } from '@/config.js';
import { intersperse } from '@/misc/prelude/array.js';
@@ -13,13 +13,8 @@ import { normalizeForSearch } from '@/misc/normalize-for-search.js';
import type { IMentionedRemoteUsers } from '@/models/Note.js';
import { bindThis } from '@/decorators.js';
import { escapeHtml } from '@/misc/escape-html.js';
-import type { DefaultTreeAdapterMap } from 'parse5';
import type * as mfm from 'mfm-js';
-const treeAdapter = parse5.defaultTreeAdapter;
-type Node = DefaultTreeAdapterMap['node'];
-type ChildNode = DefaultTreeAdapterMap['childNode'];
-
const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/;
const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/;
@@ -38,68 +33,68 @@ export class MfmService {
const normalizedHashtagNames = hashtagNames == null ? undefined : new Set<string>(hashtagNames.map(x => normalizeForSearch(x)));
- const dom = parse5.parseFragment(html);
+ const doc = htmlParser.parse(`<div>${html}</div>`);
let text = '';
- for (const n of dom.childNodes) {
+ for (const n of doc.childNodes) {
analyze(n);
}
return text.trim();
- function getText(node: Node): string {
- if (treeAdapter.isTextNode(node)) return node.value;
- if (!treeAdapter.isElementNode(node)) return '';
- if (node.nodeName === 'br') return '\n';
+ function getText(node: htmlParser.Node): string {
+ if (node instanceof htmlParser.TextNode) return node.textContent;
+ if (!(node instanceof htmlParser.HTMLElement)) return '';
+ if (node.tagName === 'BR') return '\n';
- if (node.childNodes) {
+ if (node.childNodes != null) {
return node.childNodes.map(n => getText(n)).join('');
}
return '';
}
- function appendChildren(childNodes: ChildNode[]): void {
- if (childNodes) {
+ function analyzeChildren(childNodes: htmlParser.Node[] | null): void {
+ if (childNodes != null) {
for (const n of childNodes) {
analyze(n);
}
}
}
- function analyze(node: Node) {
- if (treeAdapter.isTextNode(node)) {
- text += node.value;
+ function analyze(node: htmlParser.Node) {
+ if (node instanceof htmlParser.TextNode) {
+ text += node.textContent;
return;
}
// Skip comment or document type node
- if (!treeAdapter.isElementNode(node)) {
+ if (!(node instanceof htmlParser.HTMLElement)) {
return;
}
- switch (node.nodeName) {
- case 'br': {
+ switch (node.tagName) {
+ case 'BR': {
text += '\n';
break;
}
- case 'a': {
+ case 'A': {
const txt = getText(node);
- const rel = node.attrs.find(x => x.name === 'rel');
- const href = node.attrs.find(x => x.name === 'href');
+ const rel = node.attributes.rel;
+ const href = node.attributes.href;
// ハッシュタグ
- if (normalizedHashtagNames && href && normalizedHashtagNames.has(normalizeForSearch(txt))) {
+ if (normalizedHashtagNames && href != null && normalizedHashtagNames.has(normalizeForSearch(txt))) {
text += txt;
// メンション
- } else if (txt.startsWith('@') && !(rel && rel.value.startsWith('me '))) {
+ } else if (txt.startsWith('@') && !(rel != null && rel.startsWith('me '))) {
const part = txt.split('@');
if (part.length === 2 && href) {
//#region ホスト名部分が省略されているので復元する
- const acct = `${txt}@${(new URL(href.value)).hostname}`;
+ const acct = `${txt}@${(new URL(href)).hostname}`;
text += acct;
//#endregion
} else if (part.length === 3) {
@@ -114,17 +109,17 @@ export class MfmService {
if (!href) {
return txt;
}
- if (!txt || txt === href.value) { // #6383: Missing text node
- if (href.value.match(urlRegexFull)) {
- return href.value;
+ if (!txt || txt === href) { // #6383: Missing text node
+ if (href.match(urlRegexFull)) {
+ return href;
} else {
- return `<${href.value}>`;
+ return `<${href}>`;
}
}
- if (href.value.match(urlRegex) && !href.value.match(urlRegexFull)) {
- return `[${txt}](<${href.value}>)`; // #6846
+ if (href.match(urlRegex) && !href.match(urlRegexFull)) {
+ return `[${txt}](<${href}>)`; // #6846
} else {
- return `[${txt}](${href.value})`;
+ return `[${txt}](${href})`;
}
};
@@ -133,60 +128,64 @@ export class MfmService {
break;
}
- case 'h1': {
+ case 'H1': {
text += '【';
- appendChildren(node.childNodes);
+ analyzeChildren(node.childNodes);
text += '】\n';
break;
}
- case 'b':
- case 'strong': {
+ case 'B':
+ case 'STRONG': {
text += '**';
- appendChildren(node.childNodes);
+ analyzeChildren(node.childNodes);
text += '**';
break;
}
- case 'small': {
+ case 'SMALL': {
text += '<small>';
- appendChildren(node.childNodes);
+ analyzeChildren(node.childNodes);
text += '</small>';
break;
}
- case 's':
- case 'del': {
+ case 'S':
+ case 'DEL': {
text += '~~';
- appendChildren(node.childNodes);
+ analyzeChildren(node.childNodes);
text += '~~';
break;
}
- case 'i':
- case 'em': {
+ case 'I':
+ case 'EM': {
text += '<i>';
- appendChildren(node.childNodes);
+ analyzeChildren(node.childNodes);
text += '</i>';
break;
}
- case 'ruby': {
+ case 'RUBY': {
let ruby: [string, string][] = [];
for (const child of node.childNodes) {
- if (child.nodeName === 'rp') {
+ if ((child instanceof htmlParser.TextNode) && !/\s|\[|\]/.test(child.textContent)) {
+ ruby.push([child.textContent, '']);
continue;
}
- if (treeAdapter.isTextNode(child) && !/\s|\[|\]/.test(child.value)) {
- ruby.push([child.value, '']);
+
+ if (!(child instanceof htmlParser.HTMLElement)) continue;
+
+ if (child.tagName === 'RP') {
continue;
}
- if (child.nodeName === 'rt' && ruby.length > 0) {
+
+ if (child.tagName === 'RT' && ruby.length > 0) {
const rt = getText(child);
if (/\s|\[|\]/.test(rt)) {
// If any space is included in rt, it is treated as a normal text
ruby = [];
- appendChildren(node.childNodes);
+ analyzeChildren(node.childNodes);
break;
} else {
ruby.at(-1)![1] = rt;
@@ -195,7 +194,7 @@ export class MfmService {
}
// If any other element is included in ruby, it is treated as a normal text
ruby = [];
- appendChildren(node.childNodes);
+ analyzeChildren(node.childNodes);
break;
}
for (const [base, rt] of ruby) {
@@ -205,26 +204,30 @@ export class MfmService {
}
// block code (<pre><code>)
- case 'pre': {
- if (node.childNodes.length === 1 && node.childNodes[0].nodeName === 'code') {
+ case 'PRE': {
+ if (node.childNodes.length === 1 && (node.childNodes[0] instanceof htmlParser.HTMLElement) && node.childNodes[0].tagName === 'CODE') {
text += '\n```\n';
text += getText(node.childNodes[0]);
text += '\n```\n';
+ } else if (node.childNodes.length === 1 && (node.childNodes[0] instanceof htmlParser.TextNode) && node.childNodes[0].textContent.startsWith('<code>') && node.childNodes[0].textContent.endsWith('</code>')) {
+ text += '\n```\n';
+ text += node.childNodes[0].textContent.slice(6, -7);
+ text += '\n```\n';
} else {
- appendChildren(node.childNodes);
+ analyzeChildren(node.childNodes);
}
break;
}
// inline code (<code>)
- case 'code': {
+ case 'CODE': {
text += '`';
- appendChildren(node.childNodes);
+ analyzeChildren(node.childNodes);
text += '`';
break;
}
- case 'blockquote': {
+ case 'BLOCKQUOTE': {
const t = getText(node);
if (t) {
text += '\n> ';
@@ -233,33 +236,33 @@ export class MfmService {
break;
}
- case 'p':
- case 'h2':
- case 'h3':
- case 'h4':
- case 'h5':
- case 'h6': {
+ case 'P':
+ case 'H2':
+ case 'H3':
+ case 'H4':
+ case 'H5':
+ case 'H6': {
text += '\n\n';
- appendChildren(node.childNodes);
+ analyzeChildren(node.childNodes);
break;
}
// other block elements
- case 'div':
- case 'header':
- case 'footer':
- case 'article':
- case 'li':
- case 'dt':
- case 'dd': {
+ case 'DIV':
+ case 'HEADER':
+ case 'FOOTER':
+ case 'ARTICLE':
+ case 'LI':
+ case 'DT':
+ case 'DD': {
text += '\n';
- appendChildren(node.childNodes);
+ analyzeChildren(node.childNodes);
break;
}
default: // includes inline elements
{
- appendChildren(node.childNodes);
+ analyzeChildren(node.childNodes);
break;
}
}