diff --git a/src/m2d/converters/event-to-message.js b/src/m2d/converters/event-to-message.js index fd9289d..eca3008 100644 --- a/src/m2d/converters/event-to-message.js +++ b/src/m2d/converters/event-to-message.js @@ -605,7 +605,7 @@ async function eventToMessage(event, guild, di) { } attachments.push({id: "0", filename}) pendingFiles.push({name: filename, mxc: event.content.url}) - } else if (shouldProcessTextEvent) { + } else { // Handling edits. If the edit was an edit of a reply, edits do not include the reply reference, so we need to fetch up to 2 more events. // this event ---is an edit of--> original event ---is a reply to--> past event await (async () => { @@ -738,157 +738,159 @@ async function eventToMessage(event, guild, di) { replyLine = `-# > ${replyLine}${contentPreview}\n` })() - if (event.content.format === "org.matrix.custom.html" && event.content.formatted_body) { - let input = event.content.formatted_body - if (event.content.msgtype === "m.emote") { - input = `* ${displayName} ${input}` - } - - // Handling mentions of Discord users - input = input.replace(/("https:\/\/matrix.to\/#\/((?:@|%40)[^"]+)")>/g, (whole, attributeValue, mxid) => { - mxid = decodeURIComponent(mxid) - if (mxUtils.eventSenderIsFromDiscord(mxid)) { - // Handle mention of an OOYE sim user by their mxid - const id = select("sim", "user_id", {mxid}).pluck().get() - if (!id) return whole - return `${attributeValue} data-user-id="${id}">` - } else { - // Handle mention of a Matrix user by their mxid - // Check if this Matrix user is actually the sim user from another old bridge in the room? - const match = mxid.match(/[^:]*discord[^:]*_([0-9]{6,}):/) // try to match @_discord_123456, @_discordpuppet_123456, etc. - if (match) return `${attributeValue} data-user-id="${match[1]}">` - // Nope, just a real Matrix user. - return whole + if (shouldProcessTextEvent) { + if (event.content.format === "org.matrix.custom.html" && event.content.formatted_body) { + let input = event.content.formatted_body + if (event.content.msgtype === "m.emote") { + input = `* ${displayName} ${input}` } - }) - // Handling mentions of rooms and room-messages - input = await handleRoomOrMessageLinks(input, di) - - // Stripping colons after mentions - input = input.replace(/( data-user-id.*?<\/a>):?/g, "$1") - input = input.replace(/("https:\/\/matrix.to.*?<\/a>):?/g, "$1") - - // Element adds a bunch of
before but doesn't render them. I can't figure out how this even works in the browser, so let's just delete those. - input = input.replace(/(?:\n|
\s*)*<\/blockquote>/g, "") - - // The matrix spec hasn't decided whether \n counts as a newline or not, but I'm going to count it, because if it's in the data it's there for a reason. - // But I should not count it if it's between block elements. - input = input.replace(/(<\/?([^ >]+)[^>]*>)?\n(<\/?([^ >]+)[^>]*>)?/g, (whole, beforeContext, beforeTag, afterContext, afterTag) => { - // console.error(beforeContext, beforeTag, afterContext, afterTag) - if (typeof beforeTag !== "string" && typeof afterTag !== "string") { - return "
" - } - beforeContext = beforeContext || "" - beforeTag = beforeTag || "" - afterContext = afterContext || "" - afterTag = afterTag || "" - if (!mxUtils.BLOCK_ELEMENTS.includes(beforeTag.toUpperCase()) && !mxUtils.BLOCK_ELEMENTS.includes(afterTag.toUpperCase())) { - return beforeContext + "
" + afterContext - } else { - return whole - } - }) - - // Note: Element's renderers on Web and Android currently collapse whitespace, like the browser does. Turndown also collapses whitespace which is good for me. - // If later I'm using a client that doesn't collapse whitespace and I want turndown to follow suit, uncomment the following line of code, and it Just Works: - // input = input.replace(/ /g, " ") - // There is also a corresponding test to uncomment, named "event2message: whitespace is retained" - - // Handling written @mentions: we need to look for candidate Discord members to join to the room - // This shouldn't apply to code blocks, links, or inside attributes. So editing the HTML tree instead of regular expressions is a sensible choice here. - // We're using the domino parser because Turndown uses the same and can reuse this tree. - const doc = domino.createDocument( - // DOM parsers arrange elements in the and . Wrapping in a custom element ensures elements are reliably arranged in a single element. - '' + input + '' - ); - const root = doc.getElementById("turndown-root"); - async function forEachNode(node) { - for (; node; node = node.nextSibling) { - // Check written mentions - if (node.nodeType === 3 && node.nodeValue.includes("@") && !nodeIsChildOf(node, ["A", "CODE", "PRE"])) { - const result = await checkWrittenMentions(node.nodeValue, event.sender, event.room_id, guild, di) - if (result) { - node.nodeValue = result.content - ensureJoined.push(...result.ensureJoined) - allowedMentionsParse.push(...result.allowedMentionsParse) - } + // Handling mentions of Discord users + input = input.replace(/("https:\/\/matrix.to\/#\/((?:@|%40)[^"]+)")>/g, (whole, attributeValue, mxid) => { + mxid = decodeURIComponent(mxid) + if (mxUtils.eventSenderIsFromDiscord(mxid)) { + // Handle mention of an OOYE sim user by their mxid + const id = select("sim", "user_id", {mxid}).pluck().get() + if (!id) return whole + return `${attributeValue} data-user-id="${id}">` + } else { + // Handle mention of a Matrix user by their mxid + // Check if this Matrix user is actually the sim user from another old bridge in the room? + const match = mxid.match(/[^:]*discord[^:]*_([0-9]{6,}):/) // try to match @_discord_123456, @_discordpuppet_123456, etc. + if (match) return `${attributeValue} data-user-id="${match[1]}">` + // Nope, just a real Matrix user. + return whole } - // Check for incompatible backticks in code blocks - let preNode - if (node.nodeType === 3 && node.nodeValue.includes("```") && (preNode = nodeIsChildOf(node, ["PRE"]))) { - if (preNode.firstChild?.nodeName === "CODE") { - const ext = preNode.firstChild.className.match(/language-(\S+)/)?.[1] || "txt" - const filename = `inline_code.${ext}` - // Build the replacement node - const replacementCode = doc.createElement("code") - replacementCode.textContent = `[${filename}]` - // Build its containing node - const replacement = doc.createElement("span") - replacement.appendChild(doc.createTextNode(" ")) - replacement.appendChild(replacementCode) - replacement.appendChild(doc.createTextNode(" ")) - // Replace the code block with the - preNode.replaceWith(replacement) - // Upload the code as an attachment - const content = getCodeContent(preNode.firstChild) - attachments.push({id: String(attachments.length), filename}) - pendingFiles.push({name: filename, buffer: Buffer.from(content, "utf8")}) - } + }) + + // Handling mentions of rooms and room-messages + input = await handleRoomOrMessageLinks(input, di) + + // Stripping colons after mentions + input = input.replace(/( data-user-id.*?<\/a>):?/g, "$1") + input = input.replace(/("https:\/\/matrix.to.*?<\/a>):?/g, "$1") + + // Element adds a bunch of
before but doesn't render them. I can't figure out how this even works in the browser, so let's just delete those. + input = input.replace(/(?:\n|
\s*)*<\/blockquote>/g, "") + + // The matrix spec hasn't decided whether \n counts as a newline or not, but I'm going to count it, because if it's in the data it's there for a reason. + // But I should not count it if it's between block elements. + input = input.replace(/(<\/?([^ >]+)[^>]*>)?\n(<\/?([^ >]+)[^>]*>)?/g, (whole, beforeContext, beforeTag, afterContext, afterTag) => { + // console.error(beforeContext, beforeTag, afterContext, afterTag) + if (typeof beforeTag !== "string" && typeof afterTag !== "string") { + return "
" + } + beforeContext = beforeContext || "" + beforeTag = beforeTag || "" + afterContext = afterContext || "" + afterTag = afterTag || "" + if (!mxUtils.BLOCK_ELEMENTS.includes(beforeTag.toUpperCase()) && !mxUtils.BLOCK_ELEMENTS.includes(afterTag.toUpperCase())) { + return beforeContext + "
" + afterContext + } else { + return whole + } + }) + + // Note: Element's renderers on Web and Android currently collapse whitespace, like the browser does. Turndown also collapses whitespace which is good for me. + // If later I'm using a client that doesn't collapse whitespace and I want turndown to follow suit, uncomment the following line of code, and it Just Works: + // input = input.replace(/ /g, " ") + // There is also a corresponding test to uncomment, named "event2message: whitespace is retained" + + // Handling written @mentions: we need to look for candidate Discord members to join to the room + // This shouldn't apply to code blocks, links, or inside attributes. So editing the HTML tree instead of regular expressions is a sensible choice here. + // We're using the domino parser because Turndown uses the same and can reuse this tree. + const doc = domino.createDocument( + // DOM parsers arrange elements in the and . Wrapping in a custom element ensures elements are reliably arranged in a single element. + '' + input + '' + ); + const root = doc.getElementById("turndown-root"); + async function forEachNode(node) { + for (; node; node = node.nextSibling) { + // Check written mentions + if (node.nodeType === 3 && node.nodeValue.includes("@") && !nodeIsChildOf(node, ["A", "CODE", "PRE"])) { + const result = await checkWrittenMentions(node.nodeValue, event.sender, event.room_id, guild, di) + if (result) { + node.nodeValue = result.content + ensureJoined.push(...result.ensureJoined) + allowedMentionsParse.push(...result.allowedMentionsParse) + } + } + // Check for incompatible backticks in code blocks + let preNode + if (node.nodeType === 3 && node.nodeValue.includes("```") && (preNode = nodeIsChildOf(node, ["PRE"]))) { + if (preNode.firstChild?.nodeName === "CODE") { + const ext = preNode.firstChild.className.match(/language-(\S+)/)?.[1] || "txt" + const filename = `inline_code.${ext}` + // Build the replacement node + const replacementCode = doc.createElement("code") + replacementCode.textContent = `[${filename}]` + // Build its containing node + const replacement = doc.createElement("span") + replacement.appendChild(doc.createTextNode(" ")) + replacement.appendChild(replacementCode) + replacement.appendChild(doc.createTextNode(" ")) + // Replace the code block with the + preNode.replaceWith(replacement) + // Upload the code as an attachment + const content = getCodeContent(preNode.firstChild) + attachments.push({id: String(attachments.length), filename}) + pendingFiles.push({name: filename, buffer: Buffer.from(content, "utf8")}) + } + } + await forEachNode(node.firstChild) } - await forEachNode(node.firstChild) } + await forEachNode(root) + + // SPRITE SHEET EMOJIS FEATURE: Emojis at the end of the message that we don't know about will be reuploaded as a sprite sheet. + // First we need to determine which emojis are at the end. + endOfMessageEmojis = [] + let match + let last = input.length + while ((match = input.slice(0, last).match(/]*>\s*$/))) { + if (!match[0].includes("data-mx-emoticon")) break + const mxcUrl = match[0].match(/\bsrc="(mxc:\/\/[^"]+)"/) + if (mxcUrl) endOfMessageEmojis.unshift(mxcUrl[1]) + assert(typeof match.index === "number", "Your JavaScript implementation does not comply with TC39: https://tc39.es/ecma262/multipage/text-processing.html#sec-regexpbuiltinexec") + last = match.index + } + + // @ts-ignore bad type from turndown + content = turndownService.turndown(root) + + // Put < > around any surviving matrix.to links to hide the URL previews + content = content.replace(/\bhttps?:\/\/matrix\.to\/[^<>\n )]*/g, "<$&>") + + // It's designed for commonmark, we need to replace the space-space-newline with just newline + content = content.replace(/ \n/g, "\n") + + // If there's a blockquote at the start of the message body and this message is a reply, they should be visually separated + if (replyLine && content.startsWith("> ")) content = "\n" + content + + // SPRITE SHEET EMOJIS FEATURE: + content = await uploadEndOfMessageSpriteSheet(content, attachments, pendingFiles, di?.mxcDownloader) + } else { + // Looks like we're using the plaintext body! + content = event.content.body + + if (event.content.msgtype === "m.emote") { + content = `* ${displayName} ${content}` + } + + content = await handleRoomOrMessageLinks(content, di) // Replace matrix.to links with discord.com equivalents where possible + content = content.replace(/\bhttps?:\/\/matrix\.to\/[^<>\n )]*/, "<$&>") // Put < > around any surviving matrix.to links to hide the URL previews + + const result = await checkWrittenMentions(content, event.sender, event.room_id, guild, di) + if (result) { + content = result.content + ensureJoined.push(...result.ensureJoined) + allowedMentionsParse.push(...result.allowedMentionsParse) + } + + // Markdown needs to be escaped, though take care not to escape the middle of links + // @ts-ignore bad type from turndown + content = turndownService.escape(content) } - await forEachNode(root) - - // SPRITE SHEET EMOJIS FEATURE: Emojis at the end of the message that we don't know about will be reuploaded as a sprite sheet. - // First we need to determine which emojis are at the end. - endOfMessageEmojis = [] - let match - let last = input.length - while ((match = input.slice(0, last).match(/]*>\s*$/))) { - if (!match[0].includes("data-mx-emoticon")) break - const mxcUrl = match[0].match(/\bsrc="(mxc:\/\/[^"]+)"/) - if (mxcUrl) endOfMessageEmojis.unshift(mxcUrl[1]) - assert(typeof match.index === "number", "Your JavaScript implementation does not comply with TC39: https://tc39.es/ecma262/multipage/text-processing.html#sec-regexpbuiltinexec") - last = match.index - } - - // @ts-ignore bad type from turndown - content = turndownService.turndown(root) - - // Put < > around any surviving matrix.to links to hide the URL previews - content = content.replace(/\bhttps?:\/\/matrix\.to\/[^<>\n )]*/g, "<$&>") - - // It's designed for commonmark, we need to replace the space-space-newline with just newline - content = content.replace(/ \n/g, "\n") - - // If there's a blockquote at the start of the message body and this message is a reply, they should be visually separated - if (replyLine && content.startsWith("> ")) content = "\n" + content - - // SPRITE SHEET EMOJIS FEATURE: - content = await uploadEndOfMessageSpriteSheet(content, attachments, pendingFiles, di?.mxcDownloader) - } else { - // Looks like we're using the plaintext body! - content = event.content.body - - if (event.content.msgtype === "m.emote") { - content = `* ${displayName} ${content}` - } - - content = await handleRoomOrMessageLinks(content, di) // Replace matrix.to links with discord.com equivalents where possible - content = content.replace(/\bhttps?:\/\/matrix\.to\/[^<>\n )]*/, "<$&>") // Put < > around any surviving matrix.to links to hide the URL previews - - const result = await checkWrittenMentions(content, event.sender, event.room_id, guild, di) - if (result) { - content = result.content - ensureJoined.push(...result.ensureJoined) - allowedMentionsParse.push(...result.allowedMentionsParse) - } - - // Markdown needs to be escaped, though take care not to escape the middle of links - // @ts-ignore bad type from turndown - content = turndownService.escape(content) } } diff --git a/src/m2d/converters/event-to-message.test.js b/src/m2d/converters/event-to-message.test.js index 73ca4e9..2e347f5 100644 --- a/src/m2d/converters/event-to-message.test.js +++ b/src/m2d/converters/event-to-message.test.js @@ -2671,6 +2671,99 @@ test("event2message: rich reply to a state event with no body", async t => { ) }) +test("event2message: rich reply with an image", async t => { + let called = 0 + t.deepEqual( + await eventToMessage({ + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + body: "image.png", + info: { + size: 470379, + mimetype: "image/png", + thumbnail_info: { + w: 800, + h: 450, + mimetype: "image/png", + size: 183014 + }, + w: 1920, + h: 1080, + "xyz.amorgan.blurhash": "L24_wtVt00xuxvR%NFX74Toz?waL", + thumbnail_url: "mxc://cadence.moe/lPtnjlleowWCXGOHKVDyoXGn" + }, + msgtype: "m.image", + "m.relates_to": { + "m.in_reply_to": { + event_id: "$Ij3qo7NxMA4VPexlAiIx2CB9JbsiGhJeyt-2OvkAUe4" + } + }, + url: "mxc://cadence.moe/yxMobQMbSqNHpajxgSHtaooG" + }, + origin_server_ts: 1764127662631, + unsigned: { + membership: "join", + age: 97, + transaction_id: "m1764127662540.2" + }, + event_id: "$QOxkw7u8vjTrrdKxEUO13JWSixV7UXAZU1freT1SkHc", + room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe" + }, data.guild.general, { + api: { + getEvent(roomID, eventID) { + called++ + t.equal(roomID, "!kLRqKKUQXcibIMtOpl:cadence.moe") + t.equal(eventID, "$Ij3qo7NxMA4VPexlAiIx2CB9JbsiGhJeyt-2OvkAUe4") + return { + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: "you have to check every diff above insane on this set https://osu.ppy.sh/beatmapsets/2263303#osu/4826296" + }, + origin_server_ts: 1763639396419, + unsigned: { + membership: "join", + age: 486586696, + transaction_id: "m1763639396324.578" + }, + event_id: "$Ij3qo7NxMA4VPexlAiIx2CB9JbsiGhJeyt-2OvkAUe4", + room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe" + } + } + } + }), + { + ensureJoined: [], + messagesToDelete: [], + messagesToEdit: [], + messagesToSend: [ + { + content: "-# > <:L1:1144820033948762203><:L2:1144820084079087647>https://discord.com/channels/112760669178241024/112760669178241024/1128118177155526666 **â“‚cadence [they]**: you have to check every diff above insane on this...", + allowed_mentions: { + parse: ["users", "roles"] + }, + attachments: [ + { + filename: "image.png", + id: "0", + }, + ], + avatar_url: undefined, + pendingFiles: [ + { + mxc: "mxc://cadence.moe/yxMobQMbSqNHpajxgSHtaooG", + name: "image.png", + }, + ], + username: "cadence [they]", + }, + ] + } + ) +}) + test("event2message: raw mentioning discord users in plaintext body works", async t => { t.deepEqual( await eventToMessage({