m->d: fix image captions spec, fix upload issues

This commit is contained in:
Cadence Ember
2025-02-28 00:39:03 +13:00
parent 050cc9cee9
commit a8670323a0
10 changed files with 203 additions and 97 deletions

View File

@@ -89,14 +89,14 @@ Whether you read those or not, I'm more than happy to help you 1-on-1 with codin
# Dependency justification
Total transitive production dependencies: 139
Total transitive production dependencies: 137
### <font size="+2">🦕</font>
* (31) better-sqlite3: SQLite3 is the best database, and this is the best library for it.
* (27) @cloudrac3r/pug: Language for dynamic web pages. This is my fork. (I released code that hadn't made it to npm, and removed the heavy pug-filters feature.)
* (16) stream-mime-type@1: This seems like the best option. Version 1 is used because version 2 is ESM-only.
* (10) h3: Web server. OOYE needs this for the appservice listener, authmedia proxy, and more. 14 transitive dependencies is on the low end for a web server.
* (9) h3: Web server. OOYE needs this for the appservice listener, authmedia proxy, and more. 14 transitive dependencies is on the low end for a web server.
* (11) sharp: Image resizing and compositing. OOYE needs this for the emoji sprite sheets.
### <font size="-1">🪱</font>
@@ -118,7 +118,6 @@ Total transitive production dependencies: 139
* (1) enquirer: Interactive prompting for the initial setup rather than forcing users to edit YAML non-interactively.
* (0) entities: Looks fine. No dependencies.
* (0) get-relative-path: Looks fine. No dependencies.
* (0) get-stream: Only needed if content_length_workaround is true.
* (1) heatsync: Module hot-reloader that I trust.
* (1) js-yaml: Will be removed in the future after registration.yaml is converted to JSON.
* (0) lru-cache: For holding unused nonce in memory and letting them be overwritten later if never used.

46
package-lock.json generated
View File

@@ -29,8 +29,7 @@
"enquirer": "^2.4.1",
"entities": "^5.0.0",
"get-relative-path": "^1.0.2",
"get-stream": "^6.0.1",
"h3": "^1.12.0",
"h3": "^1.15.1",
"heatsync": "^2.7.2",
"htmx.org": "^2.0.4",
"lru-cache": "^10.4.3",
@@ -46,7 +45,7 @@
},
"devDependencies": {
"@cloudrac3r/tap-dot": "^2.0.3",
"@types/node": "^18.16.0",
"@types/node": "^20.17.19",
"c8": "^10.1.2",
"cross-env": "^7.0.3",
"supertape": "^10.4.0"
@@ -1076,13 +1075,13 @@
"dev": true
},
"node_modules/@types/node": {
"version": "18.19.76",
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.76.tgz",
"integrity": "sha512-yvR7Q9LdPz2vGpmpJX5LolrgRdWvB67MJKDPSgIIzpFbaf9a1j/f5DnLp5VDyHGMR0QZHlTr1afsD87QCXFHKw==",
"version": "20.17.19",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.19.tgz",
"integrity": "sha512-LEwC7o1ifqg/6r2gn9Dns0f1rhK+fPFDoMiceTJ6kWmVk6bgXBI/9IOWfVan4WiAavK9pIVWdX0/e3J+eEUh5A==",
"dev": true,
"license": "MIT",
"dependencies": {
"undici-types": "~5.26.4"
"undici-types": "~6.19.2"
}
},
"node_modules/@types/prop-types": {
@@ -1795,17 +1794,6 @@
"source-map": "^0.6.1"
}
},
"node_modules/get-stream": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
"integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==",
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/github-from-package": {
"version": "0.0.0",
"resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz",
@@ -1833,9 +1821,10 @@
}
},
"node_modules/h3": {
"version": "1.15.0",
"resolved": "https://registry.npmjs.org/h3/-/h3-1.15.0.tgz",
"integrity": "sha512-OsjX4JW8J4XGgCgEcad20pepFQWnuKH+OwkCJjogF3C+9AZ1iYdtB4hX6vAb5DskBiu5ljEXqApINjR8CqoCMQ==",
"version": "1.15.1",
"resolved": "https://registry.npmjs.org/h3/-/h3-1.15.1.tgz",
"integrity": "sha512-+ORaOBttdUm1E2Uu/obAyCguiI7MbBvsLTndc3gyK3zU+SYLoZXlyCP9Xgy0gikkGufFLTZXCXD6+4BsufnmHA==",
"license": "MIT",
"dependencies": {
"cookie-es": "^1.2.2",
"crossws": "^0.3.3",
@@ -1843,7 +1832,6 @@
"destr": "^2.0.3",
"iron-webcrypto": "^1.2.1",
"node-mock-http": "^1.0.0",
"ohash": "^1.1.4",
"radix3": "^1.1.2",
"ufo": "^1.5.4",
"uncrypto": "^0.1.3"
@@ -2197,11 +2185,6 @@
"node": ">=0.10.0"
}
},
"node_modules/ohash": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/ohash/-/ohash-1.1.4.tgz",
"integrity": "sha512-FlDryZAahJmEF3VR3w1KogSEdWX3WhA5GPakFx4J81kEAiHyLMpdLLElS8n8dfNadMgAne/MywcvmogzscVt4g=="
},
"node_modules/once": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
@@ -3115,10 +3098,11 @@
"license": "MIT"
},
"node_modules/undici-types": {
"version": "5.26.5",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
"dev": true
"version": "6.19.8",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
"integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
"dev": true,
"license": "MIT"
},
"node_modules/uqr": {
"version": "0.1.2",

View File

@@ -38,8 +38,7 @@
"enquirer": "^2.4.1",
"entities": "^5.0.0",
"get-relative-path": "^1.0.2",
"get-stream": "^6.0.1",
"h3": "^1.12.0",
"h3": "^1.15.1",
"heatsync": "^2.7.2",
"htmx.org": "^2.0.4",
"lru-cache": "^10.4.3",
@@ -55,7 +54,7 @@
},
"devDependencies": {
"@cloudrac3r/tap-dot": "^2.0.3",
"@types/node": "^18.16.0",
"@types/node": "^20.17.19",
"c8": "^10.1.2",
"cross-env": "^7.0.3",
"supertape": "^10.4.0"
@@ -66,6 +65,6 @@
"addbot": "node addbot.js",
"test": "cross-env FORCE_COLOR=true supertape --no-check-assertions-count --format tap --no-worker test/test.js | tap-dot",
"test-slow": "cross-env FORCE_COLOR=true supertape --no-check-assertions-count --format tap --no-worker test/test.js -- --slow | tap-dot",
"cover": "c8 -o test/coverage --skip-full -x db/migrations -x src/matrix/file.js -x src/matrix/api.js -x src/matrix/mreq.js -x src/d2m/converters/rlottie-wasm.js -r html -r text supertape --no-check-assertions-count --format fail --no-worker test/test.js -- --slow"
"cover": "c8 -o test/coverage --skip-full -x db/migrations -x src/matrix/file.js -x src/matrix/api.js -x src/d2m/converters/rlottie-wasm.js -r html -r text supertape --no-check-assertions-count --format fail --no-worker test/test.js -- --slow"
}
}

View File

@@ -315,7 +315,7 @@ function getUserOrProxyOwnerID(mxid) {
* At the time of this executing, we know what the end of message emojis are, and we know that at least one of them is unknown.
* This function will strip them from the content and generate the correct pending file of the sprite sheet.
* @param {string} content
* @param {{id: string, name: string}[]} attachments
* @param {{id: string, filename: string}[]} attachments
* @param {({name: string, mxc: string} | {name: string, mxc: string, key: string, iv: string} | {name: string, buffer: Buffer})[]} pendingFiles
* @param {(mxc: string) => Promise<Buffer | undefined>} mxcDownloader function that will download the mxc URLs and convert to uncompressed PNG data. use `getAndConvertEmoji` or a mock.
*/
@@ -329,9 +329,9 @@ async function uploadEndOfMessageSpriteSheet(content, attachments, pendingFiles,
// Create a sprite sheet of known and unknown emojis from the end of the message
const buffer = await emojiSheet.compositeMatrixEmojis(endOfMessageEmojis, mxcDownloader)
// Attach it
const name = "emojis.png"
attachments.push({id: String(attachments.length), name})
pendingFiles.push({name, buffer})
const filename = "emojis.png"
attachments.push({id: String(attachments.length), filename})
pendingFiles.push({name: filename, buffer})
return content
}
@@ -486,6 +486,7 @@ async function eventToMessage(event, guild, di) {
}
let content = event.content.body // ultimate fallback
/** @type {{id: string, filename: string}[]} */
const attachments = []
/** @type {({name: string, mxc: string} | {name: string, mxc: string, key: string, iv: string} | {name: string, buffer: Buffer})[]} */
const pendingFiles = []
@@ -493,7 +494,45 @@ async function eventToMessage(event, guild, di) {
const ensureJoined = []
// Convert content depending on what the message is
if (event.type === "m.room.message" && (event.content.msgtype === "m.text" || event.content.msgtype === "m.emote")) {
// Handle images first - might need to handle their `body`/`formatted_body` as well, which will fall through to the text processor
let shouldProcessTextEvent = event.type === "m.room.message" && (event.content.msgtype === "m.text" || event.content.msgtype === "m.emote")
if (event.type === "m.room.message" && (event.content.msgtype === "m.file" || event.content.msgtype === "m.video" || event.content.msgtype === "m.audio" || event.content.msgtype === "m.image")) {
content = ""
const filename = event.content.filename || event.content.body
if ("url" in event.content) {
// Unencrypted
attachments.push({id: "0", filename})
pendingFiles.push({name: filename, mxc: event.content.url})
} else {
// Encrypted
assert.equal(event.content.file.key.alg, "A256CTR")
attachments.push({id: "0", filename})
pendingFiles.push({name: filename, mxc: event.content.file.url, key: event.content.file.key.k, iv: event.content.file.iv})
}
// Check if we also need to process a text event for this image - if it has a caption that's different from its filename
if ((event.content.body && event.content.filename && event.content.body !== event.content.filename) || event.content.formatted_body) {
shouldProcessTextEvent = true
}
}
if (event.type === "m.sticker") {
content = ""
let filename = event.content.body
if (event.type === "m.sticker") {
let mimetype
if (event.content.info?.mimetype?.includes("/")) {
mimetype = event.content.info.mimetype
} else {
const res = await di.api.getMedia(event.content.url, {method: "HEAD"})
if (res.status === 200) {
mimetype = res.headers.get("content-type")
}
if (!mimetype) throw new Error(`Server error ${res.status} or missing content-type while detecting sticker mimetype`)
}
filename += "." + mimetype.split("/")[1]
}
attachments.push({id: "0", filename})
pendingFiles.push({name: filename, mxc: event.content.url})
} else if (shouldProcessTextEvent) {
// Handling edits. If the edit was an edit of a reply, edits do not include the reply reference, so we need to fetch up to 2 more events.
// this event ---is an edit of--> original event ---is a reply to--> past event
await (async () => {
@@ -780,40 +819,6 @@ async function eventToMessage(event, guild, di) {
// @ts-ignore bad type from turndown
content = turndownService.escape(content)
}
} else if (event.type === "m.room.message" && (event.content.msgtype === "m.file" || event.content.msgtype === "m.video" || event.content.msgtype === "m.audio" || event.content.msgtype === "m.image")) {
content = ""
const filename = event.content.filename || event.content.body
// A written `event.content.body` will be bridged to Discord's image `description` which is like alt text.
// Bridging as description rather than message content in order to match Matrix clients (Element, Neochat) which treat this as alt text or title text.
const description = (event.content.body !== event.content.filename && event.content.filename && event.content.body) || undefined
if ("url" in event.content) {
// Unencrypted
attachments.push({id: "0", description, filename})
pendingFiles.push({name: filename, mxc: event.content.url})
} else {
// Encrypted
assert.equal(event.content.file.key.alg, "A256CTR")
attachments.push({id: "0", description, filename})
pendingFiles.push({name: filename, mxc: event.content.file.url, key: event.content.file.key.k, iv: event.content.file.iv})
}
} else if (event.type === "m.sticker") {
content = ""
let filename = event.content.body
if (event.type === "m.sticker") {
let mimetype
if (event.content.info?.mimetype?.includes("/")) {
mimetype = event.content.info.mimetype
} else {
const res = await di.api.getMedia(event.content.url, {method: "HEAD"})
if (res.status === 200) {
mimetype = res.headers.get("content-type")
}
if (!mimetype) throw new Error(`Server error ${res.status} or missing content-type while detecting sticker mimetype`)
}
filename += "." + mimetype.split("/")[1]
}
attachments.push({id: "0", filename})
pendingFiles.push({name: filename, mxc: event.content.url})
}
content = displayNameRunoff + replyLine + content

View File

@@ -3770,7 +3770,7 @@ test("event2message: text attachments work", async t => {
username: "cadence [they]",
content: "",
avatar_url: "https://bridge.example.org/download/matrix/cadence.moe/azCAhThKTojXSZJRoWwZmhvU",
attachments: [{id: "0", description: undefined, filename: "chiki-powerups.txt"}],
attachments: [{id: "0", filename: "chiki-powerups.txt"}],
pendingFiles: [{name: "chiki-powerups.txt", mxc: "mxc://cadence.moe/zyThGlYQxvlvBVbVgKDDbiHH"}]
}]
}
@@ -3806,14 +3806,14 @@ test("event2message: image attachments work", async t => {
username: "cadence [they]",
content: "",
avatar_url: "https://bridge.example.org/download/matrix/cadence.moe/azCAhThKTojXSZJRoWwZmhvU",
attachments: [{id: "0", description: undefined, filename: "cool cat.png"}],
attachments: [{id: "0", filename: "cool cat.png"}],
pendingFiles: [{name: "cool cat.png", mxc: "mxc://cadence.moe/IvxVJFLEuksCNnbojdSIeEvn"}]
}]
}
)
})
test("event2message: image attachments can have a custom description", async t => {
test("event2message: image attachments can have a plaintext caption", async t => {
t.deepEqual(
await eventToMessage({
type: "m.room.message",
@@ -3840,10 +3840,62 @@ test("event2message: image attachments can have a custom description", async t =
messagesToEdit: [],
messagesToSend: [{
username: "cadence [they]",
content: "",
content: "Cat emoji surrounded by pink hearts",
avatar_url: "https://bridge.example.org/download/matrix/cadence.moe/azCAhThKTojXSZJRoWwZmhvU",
attachments: [{id: "0", description: "Cat emoji surrounded by pink hearts", filename: "cool cat.png"}],
pendingFiles: [{name: "cool cat.png", mxc: "mxc://cadence.moe/IvxVJFLEuksCNnbojdSIeEvn"}]
attachments: [{id: "0", filename: "cool cat.png"}],
pendingFiles: [{name: "cool cat.png", mxc: "mxc://cadence.moe/IvxVJFLEuksCNnbojdSIeEvn"}],
allowed_mentions: {
parse: ["users", "roles"]
}
}]
}
)
})
test("event2message: image attachments can have a formatted caption", async t => {
t.deepEqual(
await eventToMessage({
content: {
body: "this event has `formatting`",
filename: "5740.jpg",
format: "org.matrix.custom.html",
formatted_body: "this event has <code>formatting</code>",
info: {
h: 1340,
mimetype: "image/jpeg",
size: 226689,
thumbnail_info: {
h: 670,
mimetype: "image/jpeg",
size: 80157,
w: 540
},
thumbnail_url: "mxc://thomcat.rocks/XhLsOCDBYyearsLQgUUrbAvw",
w: 1080,
"xyz.amorgan.blurhash": "KHJQG*55ic-.}?0M58J.9v"
},
msgtype: "m.image",
url: "mxc://thomcat.rocks/RTHsXmcMPXmuHqVNsnbKtRbh"
},
origin_server_ts: 1740607766895,
sender: "@cadence:cadence.moe",
type: "m.room.message",
event_id: "$NqNqVgukiQm1nynm9vIr9FIq31hZpQ3udOd7cBIW46U",
room_id: "!BnKuBPCvyfOkhcUjEu:cadence.moe"
}),
{
ensureJoined: [],
messagesToDelete: [],
messagesToEdit: [],
messagesToSend: [{
username: "cadence [they]",
content: "this event has `formatting`",
avatar_url: "https://bridge.example.org/download/matrix/cadence.moe/azCAhThKTojXSZJRoWwZmhvU",
attachments: [{id: "0", filename: "5740.jpg"}],
pendingFiles: [{name: "5740.jpg", mxc: "mxc://thomcat.rocks/RTHsXmcMPXmuHqVNsnbKtRbh"}],
allowed_mentions: {
parse: ["users", "roles"]
}
}]
}
)
@@ -3892,7 +3944,7 @@ test("event2message: encrypted image attachments work", async t => {
username: "cadence [they]",
content: "",
avatar_url: "https://bridge.example.org/download/matrix/cadence.moe/azCAhThKTojXSZJRoWwZmhvU",
attachments: [{id: "0", description: undefined, filename: "image.png"}],
attachments: [{id: "0", filename: "image.png"}],
pendingFiles: [{
name: "image.png",
mxc: "mxc://heyquark.com/LOGkUTlVFrqfiExlGZNgCJJX",

View File

@@ -1,9 +1,9 @@
// @ts-check
const mixin = require("@cloudrac3r/mixin-deep")
const stream = require("stream")
const streamWeb = require("stream/web")
const getStream = require("get-stream")
const {buffer} = require("stream/consumers")
const mixin = require("@cloudrac3r/mixin-deep")
const {reg, writeRegistration} = require("./read-registration.js")
@@ -19,20 +19,33 @@ class MatrixServerError extends Error {
}
}
/**
* @param {undefined | string | object | streamWeb.ReadableStream | stream.Readable} body
* @returns {Promise<string | streamWeb.ReadableStream | stream.Readable | Buffer>}
*/
async function _convertBody(body) {
if (body == undefined || Object.is(body.constructor, Object)) {
return JSON.stringify(body) // almost every POST request is going to follow this one
} else if (body instanceof stream.Readable && reg.ooye.content_length_workaround) {
return await buffer(body) // content length workaround is set, so convert to buffer. the buffer consumer accepts node streams.
} else if (body instanceof stream.Readable) {
return stream.Readable.toWeb(body) // native fetch can only consume web streams
} else if (body instanceof streamWeb.ReadableStream && reg.ooye.content_length_workaround) {
return await buffer(body) // content lenght workaround is set, so convert to buffer. the buffer consumer accepts async iterables, which web streams are.
}
return body
}
/* c8 ignore start */
/**
* @param {string} method
* @param {string} url
* @param {string | object | streamWeb.ReadableStream | stream.Readable} [body]
* @param {string | object | streamWeb.ReadableStream | stream.Readable} [bodyIn]
* @param {any} [extra]
*/
async function mreq(method, url, body, extra = {}) {
if (body == undefined || Object.is(body.constructor, Object)) {
body = JSON.stringify(body)
} else if (body instanceof stream.Readable && reg.ooye.content_length_workaround) {
body = await getStream.buffer(body)
} else if (body instanceof streamWeb.ReadableStream && reg.ooye.content_length_workaround) {
body = await stream.consumers.buffer(stream.Readable.fromWeb(body))
}
async function mreq(method, url, bodyIn, extra = {}) {
const body = await _convertBody(bodyIn)
/** @type {RequestInit} */
const opts = mixin({
@@ -86,3 +99,4 @@ module.exports.MatrixServerError = MatrixServerError
module.exports.baseUrl = baseUrl
module.exports.mreq = mreq
module.exports.withAccessToken = withAccessToken
module.exports._convertBody = _convertBody

47
src/matrix/mreq.test.js Normal file
View File

@@ -0,0 +1,47 @@
// @ts-check
const assert = require("assert")
const stream = require("stream")
const streamWeb = require("stream/web")
const {buffer} = require("stream/consumers")
const {test} = require("supertape")
const {_convertBody} = require("./mreq")
const {reg} = require("./read-registration")
async function *generator() {
yield "a"
yield "b"
}
reg.ooye.content_length_workaround = false
test("convert body: converts object to string", async t => {
t.equal(await _convertBody({a: "1"}), `{"a":"1"}`)
})
test("convert body: leaves undefined as undefined", async t => {
t.equal(await _convertBody(undefined), undefined)
})
test("convert body: leaves web readable as web readable", async t => {
const webReadable = stream.Readable.toWeb(stream.Readable.from(generator()))
t.equal(await _convertBody(webReadable), webReadable)
})
test("convert body: converts node readable to web readable (for native fetch upload)", async t => {
const readable = stream.Readable.from(generator())
const webReadable = await _convertBody(readable)
assert(webReadable instanceof streamWeb.ReadableStream)
t.deepEqual(await buffer(webReadable), Buffer.from("ab"))
})
test("convert body: converts node readable to buffer", async t => {
reg.ooye.content_length_workaround = true
const readable = stream.Readable.from(generator())
t.deepEqual(await _convertBody(readable), Buffer.from("ab"))
})
test("convert body: converts web readable to buffer", async t => {
const webReadable = stream.Readable.toWeb(stream.Readable.from(generator()))
t.deepEqual(await _convertBody(webReadable), Buffer.from("ab"))
})

4
src/types.d.ts vendored
View File

@@ -167,6 +167,8 @@ export namespace Event {
export type M_Room_Message_File = {
msgtype: "m.file" | "m.image" | "m.video" | "m.audio"
body: string
format?: "org.matrix.custom.html"
formatted_body?: string
filename?: string
url: string
info?: any
@@ -184,6 +186,8 @@ export namespace Event {
export type M_Room_Message_Encrypted_File = {
msgtype: "m.file" | "m.image" | "m.video" | "m.audio"
body: string
format?: "org.matrix.custom.html"
formatted_body?: string
filename?: string
file: {
url: string

View File

@@ -1,14 +1,14 @@
// @ts-check
const streamWeb = require("stream/web")
const {test} = require("supertape")
const {test} = require("../../test/web")
const {router} = require("../../test/web")
const assert = require("assert").strict
require("./server")
test("web server: can get home", async t => {
t.match(await router.test("get", "/", {}), /Add the bot to your Discord server./)
t.has(await router.test("get", "/", {}), /a bridge between the Discord and Matrix chat apps/)
})
test("web server: can get htmx", async t => {

View File

@@ -29,6 +29,7 @@ reg.ooye.bridge_origin = "https://bridge.example.org"
const sync = new HeatSync({watchFS: false})
const discord = {
// @ts-ignore - only ignore guilds, because my data dump is missing random properties
guilds: new Map([
[data.guild.general.id, data.guild.general],
[data.guild.fna.id, data.guild.fna],
@@ -130,6 +131,7 @@ file._actuallyUploadDiscordFileToMxc = function(url, res) { throw new Error(`Not
require("../src/matrix/kstate.test")
require("../src/matrix/api.test")
require("../src/matrix/file.test")
require("../src/matrix/mreq.test")
require("../src/matrix/read-registration.test")
require("../src/matrix/txnid.test")
require("../src/d2m/actions/create-room.test")