m->d: make image-replies work

This commit is contained in:
Cadence Ember
2025-11-26 17:21:15 +13:00
parent e6c30f80b5
commit 1758b7aa22
2 changed files with 241 additions and 146 deletions

View File

@@ -605,7 +605,7 @@ async function eventToMessage(event, guild, di) {
}
attachments.push({id: "0", filename})
pendingFiles.push({name: filename, mxc: event.content.url})
} else if (shouldProcessTextEvent) {
} else {
// Handling edits. If the edit was an edit of a reply, edits do not include the reply reference, so we need to fetch up to 2 more events.
// this event ---is an edit of--> original event ---is a reply to--> past event
await (async () => {
@@ -738,157 +738,159 @@ async function eventToMessage(event, guild, di) {
replyLine = `-# > ${replyLine}${contentPreview}\n`
})()
if (event.content.format === "org.matrix.custom.html" && event.content.formatted_body) {
let input = event.content.formatted_body
if (event.content.msgtype === "m.emote") {
input = `* ${displayName} ${input}`
}
// Handling mentions of Discord users
input = input.replace(/("https:\/\/matrix.to\/#\/((?:@|%40)[^"]+)")>/g, (whole, attributeValue, mxid) => {
mxid = decodeURIComponent(mxid)
if (mxUtils.eventSenderIsFromDiscord(mxid)) {
// Handle mention of an OOYE sim user by their mxid
const id = select("sim", "user_id", {mxid}).pluck().get()
if (!id) return whole
return `${attributeValue} data-user-id="${id}">`
} else {
// Handle mention of a Matrix user by their mxid
// Check if this Matrix user is actually the sim user from another old bridge in the room?
const match = mxid.match(/[^:]*discord[^:]*_([0-9]{6,}):/) // try to match @_discord_123456, @_discordpuppet_123456, etc.
if (match) return `${attributeValue} data-user-id="${match[1]}">`
// Nope, just a real Matrix user.
return whole
if (shouldProcessTextEvent) {
if (event.content.format === "org.matrix.custom.html" && event.content.formatted_body) {
let input = event.content.formatted_body
if (event.content.msgtype === "m.emote") {
input = `* ${displayName} ${input}`
}
})
// Handling mentions of rooms and room-messages
input = await handleRoomOrMessageLinks(input, di)
// Stripping colons after mentions
input = input.replace(/( data-user-id.*?<\/a>):?/g, "$1")
input = input.replace(/("https:\/\/matrix.to.*?<\/a>):?/g, "$1")
// Element adds a bunch of <br> before </blockquote> but doesn't render them. I can't figure out how this even works in the browser, so let's just delete those.
input = input.replace(/(?:\n|<br ?\/?>\s*)*<\/blockquote>/g, "</blockquote>")
// The matrix spec hasn't decided whether \n counts as a newline or not, but I'm going to count it, because if it's in the data it's there for a reason.
// But I should not count it if it's between block elements.
input = input.replace(/(<\/?([^ >]+)[^>]*>)?\n(<\/?([^ >]+)[^>]*>)?/g, (whole, beforeContext, beforeTag, afterContext, afterTag) => {
// console.error(beforeContext, beforeTag, afterContext, afterTag)
if (typeof beforeTag !== "string" && typeof afterTag !== "string") {
return "<br>"
}
beforeContext = beforeContext || ""
beforeTag = beforeTag || ""
afterContext = afterContext || ""
afterTag = afterTag || ""
if (!mxUtils.BLOCK_ELEMENTS.includes(beforeTag.toUpperCase()) && !mxUtils.BLOCK_ELEMENTS.includes(afterTag.toUpperCase())) {
return beforeContext + "<br>" + afterContext
} else {
return whole
}
})
// Note: Element's renderers on Web and Android currently collapse whitespace, like the browser does. Turndown also collapses whitespace which is good for me.
// If later I'm using a client that doesn't collapse whitespace and I want turndown to follow suit, uncomment the following line of code, and it Just Works:
// input = input.replace(/ /g, "&nbsp;")
// There is also a corresponding test to uncomment, named "event2message: whitespace is retained"
// Handling written @mentions: we need to look for candidate Discord members to join to the room
// This shouldn't apply to code blocks, links, or inside attributes. So editing the HTML tree instead of regular expressions is a sensible choice here.
// We're using the domino parser because Turndown uses the same and can reuse this tree.
const doc = domino.createDocument(
// DOM parsers arrange elements in the <head> and <body>. Wrapping in a custom element ensures elements are reliably arranged in a single element.
'<x-turndown id="turndown-root">' + input + '</x-turndown>'
);
const root = doc.getElementById("turndown-root");
async function forEachNode(node) {
for (; node; node = node.nextSibling) {
// Check written mentions
if (node.nodeType === 3 && node.nodeValue.includes("@") && !nodeIsChildOf(node, ["A", "CODE", "PRE"])) {
const result = await checkWrittenMentions(node.nodeValue, event.sender, event.room_id, guild, di)
if (result) {
node.nodeValue = result.content
ensureJoined.push(...result.ensureJoined)
allowedMentionsParse.push(...result.allowedMentionsParse)
}
// Handling mentions of Discord users
input = input.replace(/("https:\/\/matrix.to\/#\/((?:@|%40)[^"]+)")>/g, (whole, attributeValue, mxid) => {
mxid = decodeURIComponent(mxid)
if (mxUtils.eventSenderIsFromDiscord(mxid)) {
// Handle mention of an OOYE sim user by their mxid
const id = select("sim", "user_id", {mxid}).pluck().get()
if (!id) return whole
return `${attributeValue} data-user-id="${id}">`
} else {
// Handle mention of a Matrix user by their mxid
// Check if this Matrix user is actually the sim user from another old bridge in the room?
const match = mxid.match(/[^:]*discord[^:]*_([0-9]{6,}):/) // try to match @_discord_123456, @_discordpuppet_123456, etc.
if (match) return `${attributeValue} data-user-id="${match[1]}">`
// Nope, just a real Matrix user.
return whole
}
// Check for incompatible backticks in code blocks
let preNode
if (node.nodeType === 3 && node.nodeValue.includes("```") && (preNode = nodeIsChildOf(node, ["PRE"]))) {
if (preNode.firstChild?.nodeName === "CODE") {
const ext = preNode.firstChild.className.match(/language-(\S+)/)?.[1] || "txt"
const filename = `inline_code.${ext}`
// Build the replacement <code> node
const replacementCode = doc.createElement("code")
replacementCode.textContent = `[${filename}]`
// Build its containing <span> node
const replacement = doc.createElement("span")
replacement.appendChild(doc.createTextNode(" "))
replacement.appendChild(replacementCode)
replacement.appendChild(doc.createTextNode(" "))
// Replace the code block with the <span>
preNode.replaceWith(replacement)
// Upload the code as an attachment
const content = getCodeContent(preNode.firstChild)
attachments.push({id: String(attachments.length), filename})
pendingFiles.push({name: filename, buffer: Buffer.from(content, "utf8")})
}
})
// Handling mentions of rooms and room-messages
input = await handleRoomOrMessageLinks(input, di)
// Stripping colons after mentions
input = input.replace(/( data-user-id.*?<\/a>):?/g, "$1")
input = input.replace(/("https:\/\/matrix.to.*?<\/a>):?/g, "$1")
// Element adds a bunch of <br> before </blockquote> but doesn't render them. I can't figure out how this even works in the browser, so let's just delete those.
input = input.replace(/(?:\n|<br ?\/?>\s*)*<\/blockquote>/g, "</blockquote>")
// The matrix spec hasn't decided whether \n counts as a newline or not, but I'm going to count it, because if it's in the data it's there for a reason.
// But I should not count it if it's between block elements.
input = input.replace(/(<\/?([^ >]+)[^>]*>)?\n(<\/?([^ >]+)[^>]*>)?/g, (whole, beforeContext, beforeTag, afterContext, afterTag) => {
// console.error(beforeContext, beforeTag, afterContext, afterTag)
if (typeof beforeTag !== "string" && typeof afterTag !== "string") {
return "<br>"
}
beforeContext = beforeContext || ""
beforeTag = beforeTag || ""
afterContext = afterContext || ""
afterTag = afterTag || ""
if (!mxUtils.BLOCK_ELEMENTS.includes(beforeTag.toUpperCase()) && !mxUtils.BLOCK_ELEMENTS.includes(afterTag.toUpperCase())) {
return beforeContext + "<br>" + afterContext
} else {
return whole
}
})
// Note: Element's renderers on Web and Android currently collapse whitespace, like the browser does. Turndown also collapses whitespace which is good for me.
// If later I'm using a client that doesn't collapse whitespace and I want turndown to follow suit, uncomment the following line of code, and it Just Works:
// input = input.replace(/ /g, "&nbsp;")
// There is also a corresponding test to uncomment, named "event2message: whitespace is retained"
// Handling written @mentions: we need to look for candidate Discord members to join to the room
// This shouldn't apply to code blocks, links, or inside attributes. So editing the HTML tree instead of regular expressions is a sensible choice here.
// We're using the domino parser because Turndown uses the same and can reuse this tree.
const doc = domino.createDocument(
// DOM parsers arrange elements in the <head> and <body>. Wrapping in a custom element ensures elements are reliably arranged in a single element.
'<x-turndown id="turndown-root">' + input + '</x-turndown>'
);
const root = doc.getElementById("turndown-root");
async function forEachNode(node) {
for (; node; node = node.nextSibling) {
// Check written mentions
if (node.nodeType === 3 && node.nodeValue.includes("@") && !nodeIsChildOf(node, ["A", "CODE", "PRE"])) {
const result = await checkWrittenMentions(node.nodeValue, event.sender, event.room_id, guild, di)
if (result) {
node.nodeValue = result.content
ensureJoined.push(...result.ensureJoined)
allowedMentionsParse.push(...result.allowedMentionsParse)
}
}
// Check for incompatible backticks in code blocks
let preNode
if (node.nodeType === 3 && node.nodeValue.includes("```") && (preNode = nodeIsChildOf(node, ["PRE"]))) {
if (preNode.firstChild?.nodeName === "CODE") {
const ext = preNode.firstChild.className.match(/language-(\S+)/)?.[1] || "txt"
const filename = `inline_code.${ext}`
// Build the replacement <code> node
const replacementCode = doc.createElement("code")
replacementCode.textContent = `[${filename}]`
// Build its containing <span> node
const replacement = doc.createElement("span")
replacement.appendChild(doc.createTextNode(" "))
replacement.appendChild(replacementCode)
replacement.appendChild(doc.createTextNode(" "))
// Replace the code block with the <span>
preNode.replaceWith(replacement)
// Upload the code as an attachment
const content = getCodeContent(preNode.firstChild)
attachments.push({id: String(attachments.length), filename})
pendingFiles.push({name: filename, buffer: Buffer.from(content, "utf8")})
}
}
await forEachNode(node.firstChild)
}
await forEachNode(node.firstChild)
}
await forEachNode(root)
// SPRITE SHEET EMOJIS FEATURE: Emojis at the end of the message that we don't know about will be reuploaded as a sprite sheet.
// First we need to determine which emojis are at the end.
endOfMessageEmojis = []
let match
let last = input.length
while ((match = input.slice(0, last).match(/<img [^>]*>\s*$/))) {
if (!match[0].includes("data-mx-emoticon")) break
const mxcUrl = match[0].match(/\bsrc="(mxc:\/\/[^"]+)"/)
if (mxcUrl) endOfMessageEmojis.unshift(mxcUrl[1])
assert(typeof match.index === "number", "Your JavaScript implementation does not comply with TC39: https://tc39.es/ecma262/multipage/text-processing.html#sec-regexpbuiltinexec")
last = match.index
}
// @ts-ignore bad type from turndown
content = turndownService.turndown(root)
// Put < > around any surviving matrix.to links to hide the URL previews
content = content.replace(/\bhttps?:\/\/matrix\.to\/[^<>\n )]*/g, "<$&>")
// It's designed for commonmark, we need to replace the space-space-newline with just newline
content = content.replace(/ \n/g, "\n")
// If there's a blockquote at the start of the message body and this message is a reply, they should be visually separated
if (replyLine && content.startsWith("> ")) content = "\n" + content
// SPRITE SHEET EMOJIS FEATURE:
content = await uploadEndOfMessageSpriteSheet(content, attachments, pendingFiles, di?.mxcDownloader)
} else {
// Looks like we're using the plaintext body!
content = event.content.body
if (event.content.msgtype === "m.emote") {
content = `* ${displayName} ${content}`
}
content = await handleRoomOrMessageLinks(content, di) // Replace matrix.to links with discord.com equivalents where possible
content = content.replace(/\bhttps?:\/\/matrix\.to\/[^<>\n )]*/, "<$&>") // Put < > around any surviving matrix.to links to hide the URL previews
const result = await checkWrittenMentions(content, event.sender, event.room_id, guild, di)
if (result) {
content = result.content
ensureJoined.push(...result.ensureJoined)
allowedMentionsParse.push(...result.allowedMentionsParse)
}
// Markdown needs to be escaped, though take care not to escape the middle of links
// @ts-ignore bad type from turndown
content = turndownService.escape(content)
}
await forEachNode(root)
// SPRITE SHEET EMOJIS FEATURE: Emojis at the end of the message that we don't know about will be reuploaded as a sprite sheet.
// First we need to determine which emojis are at the end.
endOfMessageEmojis = []
let match
let last = input.length
while ((match = input.slice(0, last).match(/<img [^>]*>\s*$/))) {
if (!match[0].includes("data-mx-emoticon")) break
const mxcUrl = match[0].match(/\bsrc="(mxc:\/\/[^"]+)"/)
if (mxcUrl) endOfMessageEmojis.unshift(mxcUrl[1])
assert(typeof match.index === "number", "Your JavaScript implementation does not comply with TC39: https://tc39.es/ecma262/multipage/text-processing.html#sec-regexpbuiltinexec")
last = match.index
}
// @ts-ignore bad type from turndown
content = turndownService.turndown(root)
// Put < > around any surviving matrix.to links to hide the URL previews
content = content.replace(/\bhttps?:\/\/matrix\.to\/[^<>\n )]*/g, "<$&>")
// It's designed for commonmark, we need to replace the space-space-newline with just newline
content = content.replace(/ \n/g, "\n")
// If there's a blockquote at the start of the message body and this message is a reply, they should be visually separated
if (replyLine && content.startsWith("> ")) content = "\n" + content
// SPRITE SHEET EMOJIS FEATURE:
content = await uploadEndOfMessageSpriteSheet(content, attachments, pendingFiles, di?.mxcDownloader)
} else {
// Looks like we're using the plaintext body!
content = event.content.body
if (event.content.msgtype === "m.emote") {
content = `* ${displayName} ${content}`
}
content = await handleRoomOrMessageLinks(content, di) // Replace matrix.to links with discord.com equivalents where possible
content = content.replace(/\bhttps?:\/\/matrix\.to\/[^<>\n )]*/, "<$&>") // Put < > around any surviving matrix.to links to hide the URL previews
const result = await checkWrittenMentions(content, event.sender, event.room_id, guild, di)
if (result) {
content = result.content
ensureJoined.push(...result.ensureJoined)
allowedMentionsParse.push(...result.allowedMentionsParse)
}
// Markdown needs to be escaped, though take care not to escape the middle of links
// @ts-ignore bad type from turndown
content = turndownService.escape(content)
}
}

View File

@@ -2671,6 +2671,99 @@ test("event2message: rich reply to a state event with no body", async t => {
)
})
test("event2message: rich reply with an image", async t => {
let called = 0
t.deepEqual(
await eventToMessage({
type: "m.room.message",
sender: "@cadence:cadence.moe",
content: {
body: "image.png",
info: {
size: 470379,
mimetype: "image/png",
thumbnail_info: {
w: 800,
h: 450,
mimetype: "image/png",
size: 183014
},
w: 1920,
h: 1080,
"xyz.amorgan.blurhash": "L24_wtVt00xuxvR%NFX74Toz?waL",
thumbnail_url: "mxc://cadence.moe/lPtnjlleowWCXGOHKVDyoXGn"
},
msgtype: "m.image",
"m.relates_to": {
"m.in_reply_to": {
event_id: "$Ij3qo7NxMA4VPexlAiIx2CB9JbsiGhJeyt-2OvkAUe4"
}
},
url: "mxc://cadence.moe/yxMobQMbSqNHpajxgSHtaooG"
},
origin_server_ts: 1764127662631,
unsigned: {
membership: "join",
age: 97,
transaction_id: "m1764127662540.2"
},
event_id: "$QOxkw7u8vjTrrdKxEUO13JWSixV7UXAZU1freT1SkHc",
room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe"
}, data.guild.general, {
api: {
getEvent(roomID, eventID) {
called++
t.equal(roomID, "!kLRqKKUQXcibIMtOpl:cadence.moe")
t.equal(eventID, "$Ij3qo7NxMA4VPexlAiIx2CB9JbsiGhJeyt-2OvkAUe4")
return {
type: "m.room.message",
sender: "@cadence:cadence.moe",
content: {
msgtype: "m.text",
body: "you have to check every diff above insane on this set https://osu.ppy.sh/beatmapsets/2263303#osu/4826296"
},
origin_server_ts: 1763639396419,
unsigned: {
membership: "join",
age: 486586696,
transaction_id: "m1763639396324.578"
},
event_id: "$Ij3qo7NxMA4VPexlAiIx2CB9JbsiGhJeyt-2OvkAUe4",
room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe"
}
}
}
}),
{
ensureJoined: [],
messagesToDelete: [],
messagesToEdit: [],
messagesToSend: [
{
content: "-# > <:L1:1144820033948762203><:L2:1144820084079087647>https://discord.com/channels/112760669178241024/112760669178241024/1128118177155526666 **Ⓜcadence [they]**: you have to check every diff above insane on this...",
allowed_mentions: {
parse: ["users", "roles"]
},
attachments: [
{
filename: "image.png",
id: "0",
},
],
avatar_url: undefined,
pendingFiles: [
{
mxc: "mxc://cadence.moe/yxMobQMbSqNHpajxgSHtaooG",
name: "image.png",
},
],
username: "cadence [they]",
},
]
}
)
})
test("event2message: raw mentioning discord users in plaintext body works", async t => {
t.deepEqual(
await eventToMessage({