From 231ef297b5b650048b1daaaafa9fd78ca917eeec Mon Sep 17 00:00:00 2001 From: Hazelnoot Date: Wed, 30 Apr 2025 11:12:54 -0400 Subject: replace JSDOM with cheerio --- packages/backend/package.json | 7 ++-- .../src/core/FetchInstanceMetadataService.ts | 44 +++++++++++----------- packages/backend/src/misc/verify-field-link.ts | 23 +++++------ .../backend/src/server/api/endpoints/i/update.ts | 3 +- packages/backend/test/e2e/oauth.ts | 10 ++--- packages/backend/test/utils.ts | 6 +-- 6 files changed, 46 insertions(+), 47 deletions(-) (limited to 'packages/backend') diff --git a/packages/backend/package.json b/packages/backend/package.json index 9aa26033d0..4bd940c957 100644 --- a/packages/backend/package.json +++ b/packages/backend/package.json @@ -91,8 +91,6 @@ "@swc/core": "1.11.24", "@transfem-org/sfm-js": "0.24.6", "@twemoji/parser": "15.1.1", - "@types/redis-info": "3.0.3", - "@types/psl": "^1.1.3", "accepts": "1.3.8", "ajv": "8.17.1", "archiver": "7.0.1", @@ -107,6 +105,7 @@ "cbor": "9.0.2", "chalk": "5.4.1", "chalk-template": "1.1.0", + "cheerio": "1.0.0", "chokidar": "3.6.0", "cli-highlight": "2.1.11", "color-convert": "2.0.1", @@ -132,7 +131,6 @@ "ipaddr.js": "2.2.0", "is-svg": "5.1.0", "js-yaml": "4.1.0", - "jsdom": "26.1.0", "json5": "2.2.3", "jsonld": "8.3.3", "jsrsasign": "11.1.0", @@ -209,7 +207,6 @@ "@types/http-link-header": "1.0.7", "@types/jest": "29.5.14", "@types/js-yaml": "4.0.9", - "@types/jsdom": "21.1.7", "@types/jsonld": "1.5.15", "@types/jsrsasign": "10.5.15", "@types/mime-types": "2.1.4", @@ -221,10 +218,12 @@ "@types/oauth2orize-pkce": "0.1.2", "@types/pg": "8.11.14", "@types/proxy-addr": "^2.0.3", + "@types/psl": "^1.1.3", "@types/pug": "2.0.10", "@types/qrcode": "1.5.5", "@types/random-seed": "0.3.5", "@types/ratelimiter": "3.4.6", + "@types/redis-info": "3.0.3", "@types/rename": "1.0.7", "@types/sanitize-html": "2.15.0", "@types/semver": "7.7.0", diff --git a/packages/backend/src/core/FetchInstanceMetadataService.ts b/packages/backend/src/core/FetchInstanceMetadataService.ts index ce3af7c774..5bfcfc5c98 100644 --- a/packages/backend/src/core/FetchInstanceMetadataService.ts +++ b/packages/backend/src/core/FetchInstanceMetadataService.ts @@ -5,9 +5,9 @@ import { URL } from 'node:url'; import { Inject, Injectable } from '@nestjs/common'; -import { JSDOM } from 'jsdom'; import tinycolor from 'tinycolor2'; import * as Redis from 'ioredis'; +import { load as cheerio, CheerioAPI } from 'cheerio'; import type { MiInstance } from '@/models/Instance.js'; import type Logger from '@/logger.js'; import { DI } from '@/di-symbols.js'; @@ -15,7 +15,6 @@ import { LoggerService } from '@/core/LoggerService.js'; import { HttpRequestService } from '@/core/HttpRequestService.js'; import { bindThis } from '@/decorators.js'; import { FederatedInstanceService } from '@/core/FederatedInstanceService.js'; -import type { DOMWindow } from 'jsdom'; type NodeInfo = { openRegistrations?: unknown; @@ -181,17 +180,14 @@ export class FetchInstanceMetadataService { } @bindThis - private async fetchDom(instance: MiInstance): Promise { + private async fetchDom(instance: MiInstance): Promise { this.logger.info(`Fetching HTML of ${instance.host} ...`); const url = 'https://' + instance.host; const html = await this.httpRequestService.getHtml(url); - const { window } = new JSDOM(html); - const doc = window.document; - - return doc; + return cheerio(html); } @bindThis @@ -206,12 +202,15 @@ export class FetchInstanceMetadataService { } @bindThis - private async fetchFaviconUrl(instance: MiInstance, doc: Document | null): Promise { + private async fetchFaviconUrl(instance: MiInstance, doc: CheerioAPI | null): Promise { const url = 'https://' + instance.host; if (doc) { // https://github.com/misskey-dev/misskey/pull/8220#issuecomment-1025104043 - const href = Array.from(doc.getElementsByTagName('link')).reverse().find(link => link.relList.contains('icon'))?.href; + const href = doc('link[rel][href]') + .filter((_, link) => link.attribs.rel.split(' ').includes('icon')) + .last() + .attr('href'); if (href) { return (new URL(href, url)).href; @@ -232,7 +231,7 @@ export class FetchInstanceMetadataService { } @bindThis - private async fetchIconUrl(instance: MiInstance, doc: Document | null, manifest: Record | null): Promise { + private async fetchIconUrl(instance: MiInstance, doc: CheerioAPI | null, manifest: Record | null): Promise { if (manifest && manifest.icons && manifest.icons.length > 0 && manifest.icons[0].src) { const url = 'https://' + instance.host; return (new URL(manifest.icons[0].src, url)).href; @@ -242,13 +241,16 @@ export class FetchInstanceMetadataService { const url = 'https://' + instance.host; // https://github.com/misskey-dev/misskey/pull/8220#issuecomment-1025104043 - const links = Array.from(doc.getElementsByTagName('link')).reverse(); + const links = Array.from(doc('link[rel][href]')).reverse().map(link => ({ + rel: link.attribs.rel.split(' '), + href: link.attribs.href, + })); // https://github.com/misskey-dev/misskey/pull/8220/files/0ec4eba22a914e31b86874f12448f88b3e58dd5a#r796487559 const href = [ - links.find(link => link.relList.contains('apple-touch-icon-precomposed'))?.href, - links.find(link => link.relList.contains('apple-touch-icon'))?.href, - links.find(link => link.relList.contains('icon'))?.href, + links.find(link => link.rel.includes('apple-touch-icon-precomposed'))?.href, + links.find(link => link.rel.includes('apple-touch-icon'))?.href, + links.find(link => link.rel.includes('icon'))?.href, ] .find(href => href); @@ -261,8 +263,8 @@ export class FetchInstanceMetadataService { } @bindThis - private async getThemeColor(info: NodeInfo | null, doc: Document | null, manifest: Record | null): Promise { - const themeColor = info?.metadata?.themeColor ?? doc?.querySelector('meta[name="theme-color"]')?.getAttribute('content') ?? manifest?.theme_color; + private async getThemeColor(info: NodeInfo | null, doc: CheerioAPI | null, manifest: Record | null): Promise { + const themeColor = info?.metadata?.themeColor ?? doc?.('meta[name="theme-color"][content]').attr('content') ?? manifest?.theme_color; if (themeColor) { const color = new tinycolor(themeColor); @@ -273,7 +275,7 @@ export class FetchInstanceMetadataService { } @bindThis - private async getSiteName(info: NodeInfo | null, doc: Document | null, manifest: Record | null): Promise { + private async getSiteName(info: NodeInfo | null, doc: CheerioAPI | null, manifest: Record | null): Promise { if (info && info.metadata) { if (typeof info.metadata.nodeName === 'string') { return info.metadata.nodeName; @@ -283,7 +285,7 @@ export class FetchInstanceMetadataService { } if (doc) { - const og = doc.querySelector('meta[property="og:title"]')?.getAttribute('content'); + const og = doc('meta[property="og:title"][content]').attr('content'); if (og) { return og; @@ -298,7 +300,7 @@ export class FetchInstanceMetadataService { } @bindThis - private async getDescription(info: NodeInfo | null, doc: Document | null, manifest: Record | null): Promise { + private async getDescription(info: NodeInfo | null, doc: CheerioAPI | null, manifest: Record | null): Promise { if (info && info.metadata) { if (typeof info.metadata.nodeDescription === 'string') { return info.metadata.nodeDescription; @@ -308,12 +310,12 @@ export class FetchInstanceMetadataService { } if (doc) { - const meta = doc.querySelector('meta[name="description"]')?.getAttribute('content'); + const meta = doc('meta[name="description"][content]').attr('content'); if (meta) { return meta; } - const og = doc.querySelector('meta[property="og:description"]')?.getAttribute('content'); + const og = doc('meta[property="og:description"][content]').attr('content'); if (og) { return og; } diff --git a/packages/backend/src/misc/verify-field-link.ts b/packages/backend/src/misc/verify-field-link.ts index f519acfba0..62542eaaa0 100644 --- a/packages/backend/src/misc/verify-field-link.ts +++ b/packages/backend/src/misc/verify-field-link.ts @@ -3,32 +3,29 @@ * SPDX-License-Identifier: AGPL-3.0-only */ -import { JSDOM } from 'jsdom'; +import { load as cheerio } from 'cheerio'; import type { HttpRequestService } from '@/core/HttpRequestService.js'; type Field = { name: string, value: string }; export async function verifyFieldLinks(fields: Field[], profile_url: string, httpRequestService: HttpRequestService): Promise { const verified_links = []; - for (const field_url of fields - .filter(x => URL.canParse(x.value) && ['http:', 'https:'].includes((new URL(x.value).protocol)))) { + for (const field_url of fields.filter(x => URL.canParse(x.value) && ['http:', 'https:'].includes((new URL(x.value).protocol)))) { try { const html = await httpRequestService.getHtml(field_url.value); - const { window } = new JSDOM(html); - const doc: Document = window.document; + const doc = cheerio(html); - const aEls = Array.from(doc.getElementsByTagName('a')); - const linkEls = Array.from(doc.getElementsByTagName('link')); + const links = doc('a[rel~="me"][href], link[rel~="me"][href]').toArray(); - const includesProfileLinks = [...aEls, ...linkEls].some(link => link.rel === 'me' && link.href === profile_url); - if (includesProfileLinks) { verified_links.push(field_url.value); } - - window.close(); - } catch (err) { + const includesProfileLinks = links.some(link => link.attribs.href === profile_url); + if (includesProfileLinks) { + verified_links.push(field_url.value); + } + } catch { // don't do anything. - continue; } } + return verified_links; } diff --git a/packages/backend/src/server/api/endpoints/i/update.ts b/packages/backend/src/server/api/endpoints/i/update.ts index 5f93597fd7..ad8f38703b 100644 --- a/packages/backend/src/server/api/endpoints/i/update.ts +++ b/packages/backend/src/server/api/endpoints/i/update.ts @@ -6,7 +6,6 @@ import * as mfm from '@transfem-org/sfm-js'; import { Inject, Injectable } from '@nestjs/common'; import ms from 'ms'; -import { JSDOM } from 'jsdom'; import { extractCustomEmojisFromMfm } from '@/misc/extract-custom-emojis-from-mfm.js'; import { extractHashtags } from '@/misc/extract-hashtags.js'; import * as Acct from '@/misc/acct.js'; @@ -622,6 +621,7 @@ export default class extends Endpoint { // eslint- } // this function is superseded by '@/misc/verify-field-link.ts' + /* private async verifyLink(url: string, user: MiLocalUser) { if (!safeForSql(url)) return; @@ -653,6 +653,7 @@ export default class extends Endpoint { // eslint- // なにもしない } } + */ // these two methods need to be kept in sync with // `ApRendererService.renderPerson` diff --git a/packages/backend/test/e2e/oauth.ts b/packages/backend/test/e2e/oauth.ts index 7434701e67..47851e9474 100644 --- a/packages/backend/test/e2e/oauth.ts +++ b/packages/backend/test/e2e/oauth.ts @@ -19,7 +19,7 @@ import { ResourceOwnerPassword, } from 'simple-oauth2'; import pkceChallenge from 'pkce-challenge'; -import { JSDOM } from 'jsdom'; +import { load as cheerio } from 'cheerio'; import Fastify, { type FastifyInstance, type FastifyReply } from 'fastify'; import { api, port, sendEnvUpdateRequest, signup } from '../utils.js'; import type * as misskey from 'misskey-js'; @@ -73,11 +73,11 @@ const clientConfig: ModuleOptions<'client_id'> = { }; function getMeta(html: string): { transactionId: string | undefined, clientName: string | undefined, clientLogo: string | undefined } { - const fragment = JSDOM.fragment(html); + const fragment = cheerio(html); return { - transactionId: fragment.querySelector('meta[name="misskey:oauth:transaction-id"]')?.content, - clientName: fragment.querySelector('meta[name="misskey:oauth:client-name"]')?.content, - clientLogo: fragment.querySelector('meta[name="misskey:oauth:client-logo"]')?.content, + transactionId: fragment('meta[name="misskey:oauth:transaction-id"][content]').attr('content'), + clientName: fragment('meta[name="misskey:oauth:client-name"][content]').attr('content'), + clientLogo: fragment('meta[name="misskey:oauth:client-logo"][content]').attr('content'), }; } diff --git a/packages/backend/test/utils.ts b/packages/backend/test/utils.ts index 7b69cb04f4..70deff2e2d 100644 --- a/packages/backend/test/utils.ts +++ b/packages/backend/test/utils.ts @@ -11,7 +11,7 @@ import { inspect } from 'node:util'; import WebSocket, { ClientOptions } from 'ws'; import fetch, { File, RequestInit, type Headers } from 'node-fetch'; import { DataSource } from 'typeorm'; -import { JSDOM } from 'jsdom'; +import { load as cheerio, CheerioAPI } from 'cheerio'; import { type Response } from 'node-fetch'; import Fastify from 'fastify'; import { entities } from '../src/postgres.js'; @@ -464,7 +464,7 @@ export function makeStreamCatcher( export type SimpleGetResponse = { status: number, - body: any | JSDOM | null, + body: any | CheerioAPI | null, type: string | null, location: string | null }; @@ -495,7 +495,7 @@ export const simpleGet = async (path: string, accept = '*/*', cookie: any = unde const body = jsonTypes.includes(res.headers.get('content-type') ?? '') ? await res.json() : - htmlTypes.includes(res.headers.get('content-type') ?? '') ? new JSDOM(await res.text()) : + htmlTypes.includes(res.headers.get('content-type') ?? '') ? cheerio(await res.text()) : await bodyExtractor(res); return { -- cgit v1.2.3-freya