Make html-to-mfm and its callstack async

This commit is contained in:
Laura Hausmann 2023-09-11 22:34:00 +02:00
parent 86defdffd3
commit 552041726b
No known key found for this signature in database
GPG key ID: D044E84C5BE01605
6 changed files with 60 additions and 60 deletions

View file

@ -5,7 +5,7 @@ import { defaultTreeAdapter as treeAdapter } from "parse5";
const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/; const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/;
const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/; const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/;
export function fromHtml(html: string, hashtagNames?: string[]): string { export async function fromHtml(html: string, hashtagNames?: string[]): Promise<string> {
// some AP servers like Pixelfed use br tags as well as newlines // some AP servers like Pixelfed use br tags as well as newlines
html = html.replace(/<br\s?\/?>\r?\n/gi, "\n"); html = html.replace(/<br\s?\/?>\r?\n/gi, "\n");
@ -14,7 +14,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
let text = ""; let text = "";
for (const n of dom.childNodes) { for (const n of dom.childNodes) {
analyze(n); await analyze(n);
} }
return text.trim(); return text.trim();
@ -31,15 +31,15 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
return ""; return "";
} }
function appendChildren(childNodes: TreeAdapter.ChildNode[]): void { async function appendChildren(childNodes: TreeAdapter.ChildNode[]): Promise<void> {
if (childNodes) { if (childNodes) {
for (const n of childNodes) { for (const n of childNodes) {
analyze(n); await analyze(n);
} }
} }
} }
function analyze(node: TreeAdapter.Node) { async function analyze(node: TreeAdapter.Node) {
if (treeAdapter.isTextNode(node)) { if (treeAdapter.isTextNode(node)) {
text += node.value; text += node.value;
return; return;
@ -109,7 +109,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
case "h1": { case "h1": {
text += "【"; text += "【";
appendChildren(node.childNodes); await appendChildren(node.childNodes);
text += "】\n"; text += "】\n";
break; break;
} }
@ -117,14 +117,14 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
case "b": case "b":
case "strong": { case "strong": {
text += "**"; text += "**";
appendChildren(node.childNodes); await appendChildren(node.childNodes);
text += "**"; text += "**";
break; break;
} }
case "small": { case "small": {
text += "<small>"; text += "<small>";
appendChildren(node.childNodes); await appendChildren(node.childNodes);
text += "</small>"; text += "</small>";
break; break;
} }
@ -132,7 +132,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
case "s": case "s":
case "del": { case "del": {
text += "~~"; text += "~~";
appendChildren(node.childNodes); await appendChildren(node.childNodes);
text += "~~"; text += "~~";
break; break;
} }
@ -140,7 +140,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
case "i": case "i":
case "em": { case "em": {
text += "<i>"; text += "<i>";
appendChildren(node.childNodes); await appendChildren(node.childNodes);
text += "</i>"; text += "</i>";
break; break;
} }
@ -155,7 +155,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
text += getText(node.childNodes[0]); text += getText(node.childNodes[0]);
text += "\n```\n"; text += "\n```\n";
} else { } else {
appendChildren(node.childNodes); await appendChildren(node.childNodes);
} }
break; break;
} }
@ -163,7 +163,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
// inline code (<code>) // inline code (<code>)
case "code": { case "code": {
text += "`"; text += "`";
appendChildren(node.childNodes); await appendChildren(node.childNodes);
text += "`"; text += "`";
break; break;
} }
@ -184,7 +184,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
case "h5": case "h5":
case "h6": { case "h6": {
text += "\n\n"; text += "\n\n";
appendChildren(node.childNodes); await appendChildren(node.childNodes);
break; break;
} }
@ -197,13 +197,13 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
case "dt": case "dt":
case "dd": { case "dd": {
text += "\n"; text += "\n";
appendChildren(node.childNodes); await appendChildren(node.childNodes);
break; break;
} }
default: { default: {
// includes inline elements // includes inline elements
appendChildren(node.childNodes); await appendChildren(node.childNodes);
break; break;
} }
} }

View file

@ -40,7 +40,7 @@ export async function importMastoPost(
job.progress(60); job.progress(60);
let text; let text;
try { try {
text = htmlToMfm(post.object.content, post.object.tag); text = await htmlToMfm(post.object.content, post.object.tag);
} catch (e) { } catch (e) {
throw e; throw e;
} }

View file

@ -2,10 +2,10 @@ import type { IObject } from "../type.js";
import { extractApHashtagObjects } from "../models/tag.js"; import { extractApHashtagObjects } from "../models/tag.js";
import { fromHtml } from "../../../mfm/from-html.js"; import { fromHtml } from "../../../mfm/from-html.js";
export function htmlToMfm(html: string, tag?: IObject | IObject[]) { export async function htmlToMfm(html: string, tag?: IObject | IObject[]) {
const hashtagNames = extractApHashtagObjects(tag) const hashtagNames = extractApHashtagObjects(tag)
.map((x) => x.name) .map((x) => x.name)
.filter((x): x is string => x != null); .filter((x): x is string => x != null);
return fromHtml(html, hashtagNames); return await fromHtml(html, hashtagNames);
} }

View file

@ -313,7 +313,7 @@ export async function createNote(
} else if (typeof note._misskey_content !== "undefined") { } else if (typeof note._misskey_content !== "undefined") {
text = note._misskey_content; text = note._misskey_content;
} else if (typeof note.content === "string") { } else if (typeof note.content === "string") {
text = htmlToMfm(note.content, note.tag); text = await htmlToMfm(note.content, note.tag);
} }
// vote // vote
@ -575,7 +575,7 @@ export async function updateNote(value: string | IObject, resolver?: Resolver) {
} else if (typeof post._misskey_content !== "undefined") { } else if (typeof post._misskey_content !== "undefined") {
text = post._misskey_content; text = post._misskey_content;
} else if (typeof post.content === "string") { } else if (typeof post.content === "string") {
text = htmlToMfm(post.content, post.tag); text = await htmlToMfm(post.content, post.tag);
} }
const cw = post.sensitive && post.summary; const cw = post.sensitive && post.summary;

View file

@ -234,7 +234,7 @@ export async function createPerson(
} }
} }
const { fields } = analyzeAttachments(person.attachment || []); const { fields } = await analyzeAttachments(person.attachment || []);
const tags = extractApHashtags(person.tag) const tags = extractApHashtags(person.tag)
.map((tag) => normalizeForSearch(tag)) .map((tag) => normalizeForSearch(tag))
@ -335,7 +335,7 @@ export async function createPerson(
new UserProfile({ new UserProfile({
userId: user.id, userId: user.id,
description: person.summary description: person.summary
? htmlToMfm(truncate(person.summary, summaryLength), person.tag) ? await htmlToMfm(truncate(person.summary, summaryLength), person.tag)
: null, : null,
url: url, url: url,
fields, fields,
@ -481,7 +481,7 @@ export async function updatePerson(
const emojiNames = emojis.map((emoji) => emoji.name); const emojiNames = emojis.map((emoji) => emoji.name);
const { fields } = analyzeAttachments(person.attachment || []); const { fields } = await analyzeAttachments(person.attachment || []);
const tags = extractApHashtags(person.tag) const tags = extractApHashtags(person.tag)
.map((tag) => normalizeForSearch(tag)) .map((tag) => normalizeForSearch(tag))
@ -591,7 +591,7 @@ export async function updatePerson(
url: url, url: url,
fields, fields,
description: person.summary description: person.summary
? htmlToMfm(truncate(person.summary, summaryLength), person.tag) ? await htmlToMfm(truncate(person.summary, summaryLength), person.tag)
: null, : null,
birthday: bday ? bday[0] : null, birthday: bday ? bday[0] : null,
location: person["vcard:Address"] || null, location: person["vcard:Address"] || null,
@ -676,7 +676,7 @@ function addService(target: { [x: string]: any }, source: IApPropertyValue) {
} }
} }
export function analyzeAttachments( export async function analyzeAttachments(
attachments: IObject | IObject[] | undefined, attachments: IObject | IObject[] | undefined,
) { ) {
const fields: { const fields: {
@ -692,7 +692,7 @@ export function analyzeAttachments(
} else { } else {
fields.push({ fields.push({
name: attachment.name, name: attachment.name,
value: fromHtml(attachment.value), value: await fromHtml(attachment.value),
}); });
} }
} }

View file

@ -19,106 +19,106 @@ describe("toHtml", () => {
}); });
describe("fromHtml", () => { describe("fromHtml", () => {
it("p", () => { it("p", async () => {
assert.deepStrictEqual(fromHtml("<p>a</p><p>b</p>"), "a\n\nb"); assert.deepStrictEqual(await fromHtml("<p>a</p><p>b</p>"), "a\n\nb");
}); });
it("block element", () => { it("block element", async () => {
assert.deepStrictEqual(fromHtml("<div>a</div><div>b</div>"), "a\nb"); assert.deepStrictEqual(await fromHtml("<div>a</div><div>b</div>"), "a\nb");
}); });
it("inline element", () => { it("inline element", async () => {
assert.deepStrictEqual(fromHtml("<ul><li>a</li><li>b</li></ul>"), "a\nb"); assert.deepStrictEqual(await fromHtml("<ul><li>a</li><li>b</li></ul>"), "a\nb");
}); });
it("block code", () => { it("block code", async () => {
assert.deepStrictEqual( assert.deepStrictEqual(
fromHtml("<pre><code>a\nb</code></pre>"), await fromHtml("<pre><code>a\nb</code></pre>"),
"```\na\nb\n```", "```\na\nb\n```",
); );
}); });
it("inline code", () => { it("inline code", async () => {
assert.deepStrictEqual(fromHtml("<code>a</code>"), "`a`"); assert.deepStrictEqual(await fromHtml("<code>a</code>"), "`a`");
}); });
it("quote", () => { it("quote", async () => {
assert.deepStrictEqual( assert.deepStrictEqual(
fromHtml("<blockquote>a\nb</blockquote>"), await fromHtml("<blockquote>a\nb</blockquote>"),
"> a\n> b", "> a\n> b",
); );
}); });
it("br", () => { it("br", async () => {
assert.deepStrictEqual(fromHtml("<p>abc<br><br/>d</p>"), "abc\n\nd"); assert.deepStrictEqual(await fromHtml("<p>abc<br><br/>d</p>"), "abc\n\nd");
}); });
it("link with different text", () => { it("link with different text", async () => {
assert.deepStrictEqual( assert.deepStrictEqual(
fromHtml('<p>a <a href="https://iceshrimp.dev/b">c</a> d</p>'), await fromHtml('<p>a <a href="https://iceshrimp.dev/b">c</a> d</p>'),
"a [c](https://iceshrimp.dev/b) d", "a [c](https://iceshrimp.dev/b) d",
); );
}); });
it("link with different text, but not encoded", () => { it("link with different text, but not encoded", async () => {
assert.deepStrictEqual( assert.deepStrictEqual(
fromHtml('<p>a <a href="https://iceshrimp.dev/ä">c</a> d</p>'), await fromHtml('<p>a <a href="https://iceshrimp.dev/ä">c</a> d</p>'),
"a [c](<https://iceshrimp.dev/ä>) d", "a [c](<https://iceshrimp.dev/ä>) d",
); );
}); });
it("link with same text", () => { it("link with same text", async () => {
assert.deepStrictEqual( assert.deepStrictEqual(
fromHtml( await fromHtml(
'<p>a <a href="https://joiniceshrimp.org/b">https://joiniceshrimp.org/b</a> d</p>', '<p>a <a href="https://joiniceshrimp.org/b">https://joiniceshrimp.org/b</a> d</p>',
), ),
"a https://joiniceshrimp.org/b d", "a https://joiniceshrimp.org/b d",
); );
}); });
it("link with same text, but not encoded", () => { it("link with same text, but not encoded", async () => {
assert.deepStrictEqual( assert.deepStrictEqual(
fromHtml( await fromHtml(
'<p>a <a href="https://joiniceshrimp.org/ä">https://joiniceshrimp.org/ä</a> d</p>', '<p>a <a href="https://joiniceshrimp.org/ä">https://joiniceshrimp.org/ä</a> d</p>',
), ),
"a <https://joiniceshrimp.org/ä> d", "a <https://joiniceshrimp.org/ä> d",
); );
}); });
it("link with no url", () => { it("link with no url", async () => {
assert.deepStrictEqual( assert.deepStrictEqual(
fromHtml('<p>a <a href="b">c</a> d</p>'), await fromHtml('<p>a <a href="b">c</a> d</p>'),
"a [c](b) d", "a [c](b) d",
); );
}); });
it("link without href", () => { it("link without href", async () => {
assert.deepStrictEqual(fromHtml("<p>a <a>c</a> d</p>"), "a c d"); assert.deepStrictEqual(await fromHtml("<p>a <a>c</a> d</p>"), "a c d");
}); });
it("link without text", () => { it("link without text", async () => {
assert.deepStrictEqual( assert.deepStrictEqual(
fromHtml('<p>a <a href="https://joiniceshrimp.org/b"></a> d</p>'), await fromHtml('<p>a <a href="https://joiniceshrimp.org/b"></a> d</p>'),
"a https://joiniceshrimp.org/b d", "a https://joiniceshrimp.org/b d",
); );
}); });
it("link without both", () => { it("link without both", async () => {
assert.deepStrictEqual(fromHtml("<p>a <a></a> d</p>"), "a d"); assert.deepStrictEqual(await fromHtml("<p>a <a></a> d</p>"), "a d");
}); });
it("mention", () => { it("mention", async () => {
assert.deepStrictEqual( assert.deepStrictEqual(
fromHtml( await fromHtml(
'<p>a <a href="https://joiniceshrimp.org/@user" class="u-url mention">@user</a> d</p>', '<p>a <a href="https://joiniceshrimp.org/@user" class="u-url mention">@user</a> d</p>',
), ),
"a @user@joiniceshrimp.org d", "a @user@joiniceshrimp.org d",
); );
}); });
it("hashtag", () => { it("hashtag", async () => {
assert.deepStrictEqual( assert.deepStrictEqual(
fromHtml('<p>a <a href="https://joiniceshrimp.org/tags/a">#a</a> d</p>', [ await fromHtml('<p>a <a href="https://joiniceshrimp.org/tags/a">#a</a> d</p>', [
"#a", "#a",
]), ]),
"a #a d", "a #a d",