diff options
| author | syuilo <4439005+syuilo@users.noreply.github.com> | 2025-11-29 21:19:55 +0900 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-11-29 21:19:55 +0900 |
| commit | 4bdbe794a69d196dfb94ee28180b2680531b400c (patch) | |
| tree | 9e7ecb33b870ecbb3c3a8d5822e781fc2c0ffa47 /packages/backend/src | |
| parent | Revert "chore(backend): remove jsdom" (diff) | |
| download | misskey-4bdbe794a69d196dfb94ee28180b2680531b400c.tar.gz misskey-4bdbe794a69d196dfb94ee28180b2680531b400c.tar.bz2 misskey-4bdbe794a69d196dfb94ee28180b2680531b400c.zip | |
perf(backend): parse5をやめて軽量な実装にし、メモリ削減・高速化 (#16892)
* wip
* test
* Revert "test"
This reverts commit b7c5ae72144874a378cef7a13320e8b89f40d6e9.
* Update MfmService.ts
Diffstat (limited to 'packages/backend/src')
| -rw-r--r-- | packages/backend/src/core/MfmService.ts | 155 | ||||
| -rw-r--r-- | packages/backend/src/server/oauth/OAuth2ProviderService.ts | 2 |
2 files changed, 80 insertions, 77 deletions
diff --git a/packages/backend/src/core/MfmService.ts b/packages/backend/src/core/MfmService.ts index a359d5c838..b9f1c62d9d 100644 --- a/packages/backend/src/core/MfmService.ts +++ b/packages/backend/src/core/MfmService.ts @@ -5,7 +5,7 @@ import { URL } from 'node:url'; import { Inject, Injectable } from '@nestjs/common'; -import * as parse5 from 'parse5'; +import * as htmlParser from 'node-html-parser'; import { DI } from '@/di-symbols.js'; import type { Config } from '@/config.js'; import { intersperse } from '@/misc/prelude/array.js'; @@ -13,13 +13,8 @@ import { normalizeForSearch } from '@/misc/normalize-for-search.js'; import type { IMentionedRemoteUsers } from '@/models/Note.js'; import { bindThis } from '@/decorators.js'; import { escapeHtml } from '@/misc/escape-html.js'; -import type { DefaultTreeAdapterMap } from 'parse5'; import type * as mfm from 'mfm-js'; -const treeAdapter = parse5.defaultTreeAdapter; -type Node = DefaultTreeAdapterMap['node']; -type ChildNode = DefaultTreeAdapterMap['childNode']; - const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/; const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/; @@ -38,68 +33,68 @@ export class MfmService { const normalizedHashtagNames = hashtagNames == null ? undefined : new Set<string>(hashtagNames.map(x => normalizeForSearch(x))); - const dom = parse5.parseFragment(html); + const doc = htmlParser.parse(`<div>${html}</div>`); let text = ''; - for (const n of dom.childNodes) { + for (const n of doc.childNodes) { analyze(n); } return text.trim(); - function getText(node: Node): string { - if (treeAdapter.isTextNode(node)) return node.value; - if (!treeAdapter.isElementNode(node)) return ''; - if (node.nodeName === 'br') return '\n'; + function getText(node: htmlParser.Node): string { + if (node instanceof htmlParser.TextNode) return node.textContent; + if (!(node instanceof htmlParser.HTMLElement)) return ''; + if (node.tagName === 'BR') return '\n'; - if (node.childNodes) { + if (node.childNodes != null) { return node.childNodes.map(n => getText(n)).join(''); } return ''; } - function appendChildren(childNodes: ChildNode[]): void { - if (childNodes) { + function analyzeChildren(childNodes: htmlParser.Node[] | null): void { + if (childNodes != null) { for (const n of childNodes) { analyze(n); } } } - function analyze(node: Node) { - if (treeAdapter.isTextNode(node)) { - text += node.value; + function analyze(node: htmlParser.Node) { + if (node instanceof htmlParser.TextNode) { + text += node.textContent; return; } // Skip comment or document type node - if (!treeAdapter.isElementNode(node)) { + if (!(node instanceof htmlParser.HTMLElement)) { return; } - switch (node.nodeName) { - case 'br': { + switch (node.tagName) { + case 'BR': { text += '\n'; break; } - case 'a': { + case 'A': { const txt = getText(node); - const rel = node.attrs.find(x => x.name === 'rel'); - const href = node.attrs.find(x => x.name === 'href'); + const rel = node.attributes.rel; + const href = node.attributes.href; // ハッシュタグ - if (normalizedHashtagNames && href && normalizedHashtagNames.has(normalizeForSearch(txt))) { + if (normalizedHashtagNames && href != null && normalizedHashtagNames.has(normalizeForSearch(txt))) { text += txt; // メンション - } else if (txt.startsWith('@') && !(rel && rel.value.startsWith('me '))) { + } else if (txt.startsWith('@') && !(rel != null && rel.startsWith('me '))) { const part = txt.split('@'); if (part.length === 2 && href) { //#region ホスト名部分が省略されているので復元する - const acct = `${txt}@${(new URL(href.value)).hostname}`; + const acct = `${txt}@${(new URL(href)).hostname}`; text += acct; //#endregion } else if (part.length === 3) { @@ -114,17 +109,17 @@ export class MfmService { if (!href) { return txt; } - if (!txt || txt === href.value) { // #6383: Missing text node - if (href.value.match(urlRegexFull)) { - return href.value; + if (!txt || txt === href) { // #6383: Missing text node + if (href.match(urlRegexFull)) { + return href; } else { - return `<${href.value}>`; + return `<${href}>`; } } - if (href.value.match(urlRegex) && !href.value.match(urlRegexFull)) { - return `[${txt}](<${href.value}>)`; // #6846 + if (href.match(urlRegex) && !href.match(urlRegexFull)) { + return `[${txt}](<${href}>)`; // #6846 } else { - return `[${txt}](${href.value})`; + return `[${txt}](${href})`; } }; @@ -133,60 +128,64 @@ export class MfmService { break; } - case 'h1': { + case 'H1': { text += '【'; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); text += '】\n'; break; } - case 'b': - case 'strong': { + case 'B': + case 'STRONG': { text += '**'; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); text += '**'; break; } - case 'small': { + case 'SMALL': { text += '<small>'; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); text += '</small>'; break; } - case 's': - case 'del': { + case 'S': + case 'DEL': { text += '~~'; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); text += '~~'; break; } - case 'i': - case 'em': { + case 'I': + case 'EM': { text += '<i>'; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); text += '</i>'; break; } - case 'ruby': { + case 'RUBY': { let ruby: [string, string][] = []; for (const child of node.childNodes) { - if (child.nodeName === 'rp') { + if ((child instanceof htmlParser.TextNode) && !/\s|\[|\]/.test(child.textContent)) { + ruby.push([child.textContent, '']); continue; } - if (treeAdapter.isTextNode(child) && !/\s|\[|\]/.test(child.value)) { - ruby.push([child.value, '']); + + if (!(child instanceof htmlParser.HTMLElement)) continue; + + if (child.tagName === 'RP') { continue; } - if (child.nodeName === 'rt' && ruby.length > 0) { + + if (child.tagName === 'RT' && ruby.length > 0) { const rt = getText(child); if (/\s|\[|\]/.test(rt)) { // If any space is included in rt, it is treated as a normal text ruby = []; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); break; } else { ruby.at(-1)![1] = rt; @@ -195,7 +194,7 @@ export class MfmService { } // If any other element is included in ruby, it is treated as a normal text ruby = []; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); break; } for (const [base, rt] of ruby) { @@ -205,26 +204,30 @@ export class MfmService { } // block code (<pre><code>) - case 'pre': { - if (node.childNodes.length === 1 && node.childNodes[0].nodeName === 'code') { + case 'PRE': { + if (node.childNodes.length === 1 && (node.childNodes[0] instanceof htmlParser.HTMLElement) && node.childNodes[0].tagName === 'CODE') { text += '\n```\n'; text += getText(node.childNodes[0]); text += '\n```\n'; + } else if (node.childNodes.length === 1 && (node.childNodes[0] instanceof htmlParser.TextNode) && node.childNodes[0].textContent.startsWith('<code>') && node.childNodes[0].textContent.endsWith('</code>')) { + text += '\n```\n'; + text += node.childNodes[0].textContent.slice(6, -7); + text += '\n```\n'; } else { - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); } break; } // inline code (<code>) - case 'code': { + case 'CODE': { text += '`'; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); text += '`'; break; } - case 'blockquote': { + case 'BLOCKQUOTE': { const t = getText(node); if (t) { text += '\n> '; @@ -233,33 +236,33 @@ export class MfmService { break; } - case 'p': - case 'h2': - case 'h3': - case 'h4': - case 'h5': - case 'h6': { + case 'P': + case 'H2': + case 'H3': + case 'H4': + case 'H5': + case 'H6': { text += '\n\n'; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); break; } // other block elements - case 'div': - case 'header': - case 'footer': - case 'article': - case 'li': - case 'dt': - case 'dd': { + case 'DIV': + case 'HEADER': + case 'FOOTER': + case 'ARTICLE': + case 'LI': + case 'DT': + case 'DD': { text += '\n'; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); break; } default: // includes inline elements { - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); break; } } diff --git a/packages/backend/src/server/oauth/OAuth2ProviderService.ts b/packages/backend/src/server/oauth/OAuth2ProviderService.ts index 102998e8be..2b0b303b98 100644 --- a/packages/backend/src/server/oauth/OAuth2ProviderService.ts +++ b/packages/backend/src/server/oauth/OAuth2ProviderService.ts @@ -120,7 +120,7 @@ async function discoverClientInformation(logger: Logger, httpRequestService: Htt } const text = await res.text(); - const fragment = htmlParser.parse(text); + const fragment = htmlParser.parse(`<div>${text}</div>`); redirectUris.push(...[...fragment.querySelectorAll('link[rel=redirect_uri][href]')].map(el => el.attributes.href)); |