diff --git a/docs/superpowers/specs/2026-04-30-issue-7599-open-graph-metadata-design.md b/docs/superpowers/specs/2026-04-30-issue-7599-open-graph-metadata-design.md new file mode 100644 index 00000000000..ba025584397 --- /dev/null +++ b/docs/superpowers/specs/2026-04-30-issue-7599-open-graph-metadata-design.md @@ -0,0 +1,146 @@ +# Open Graph metadata for pad pages — Design + +GitHub issue: https://github.com/ether/etherpad/issues/7599 + +## Problem + +When an Etherpad pad URL is shared in chat apps (WhatsApp, Signal, Slack, +Discord, iMessage, etc.) the link unfurls with no preview because the rendered +HTML carries no Open Graph or Twitter Card metadata. The reporter asks for +basic OG tags so shared links show a meaningful preview. + +## Goals + +- Pad URLs (`/p/:pad`), timeslider URLs (`/p/:pad/timeslider`), and the + homepage (`/`) emit Open Graph + Twitter Card meta tags. +- A site operator can override the default description via `settings.json`. +- No new runtime dependencies. Implementation lives in the existing EJS + templates and the existing settings module. + +## Non-goals + +- Per-pad descriptions, custom OG images per pad, or pulling content from the + pad body. The pad text is mutable and frequently empty at first load; using + it would be both expensive (extra DB read on a hot path) and misleading. +- A plugin hook for OG override. Defer until a plugin actually needs it + (YAGNI). +- Removing or changing the existing `` tag. OG unfurling is performed by chat clients that ignore + `robots`, so the privacy posture is unchanged. + +## Tags emitted + +For the **pad page** (`/p/:pad`): + +| Tag | Value | +| ------------------- | ----------------------------------------------------------- | +| `og:title` | `{decoded pad name} | {settings.title}` | +| `og:description` | `settings.socialDescription` | +| `og:image` | absolute URL to `{req.protocol}://{host}/favicon.ico`* | +| `og:url` | absolute URL of the request | +| `og:type` | `website` | +| `og:site_name` | `settings.title` | +| `og:locale` | negotiated `renderLang` (already computed in `pad.html`), normalized to BCP-47 with underscore (e.g. `en_US`, `de_DE`); falls back to `en_US` | +| `og:image:alt` | `"{settings.title} logo"` (a11y — screen readers in chat clients announce this) | +| `twitter:card` | `summary` | +| `twitter:title` | same as `og:title` | +| `twitter:description` | same as `og:description` | +| `twitter:image` | same as `og:image` | +| `twitter:image:alt` | same as `og:image:alt` | + +\* `settings.favicon` is normally null (defaults route to the bundled +`favicon.ico` via the favicon middleware). The template builds the absolute +URL by joining `req.protocol`, `req.get('host')`, and the favicon path. If +`settings.favicon` is an absolute URL it is used verbatim. + +For the **timeslider** (`/p/:pad/timeslider`): same tags, with `og:title` set +to `{decoded pad name} (history) | {settings.title}`. + +For the **homepage** (`/`): same tags, with `og:title` set to +`settings.title` and `og:url` set to the request URL. + +## i18n source + +The description text lives in Etherpad's standard locale catalog under the +key `pad.social.description`. The shipped English default in +`src/locales/en.json` is the softer rewording of the wording in the issue: + +> A collaborative document that everyone can edit in real time. + +Other locale files may translate the key as the translation community picks +it up; missing translations fall back to English. **No new `settings.json` +key is added** — operators who want to override the text per-language do so +via the existing `customLocaleStrings` mechanism that Etherpad already +supports. + +**Locale negotiation.** Resolution order at request time: +1. `locales[renderLang]['pad.social.description']` (exact match, where + `renderLang` was negotiated via `req.acceptsLanguages()`). +2. `locales[primarySubtag]['pad.social.description']` (e.g. `de-AT` → `de`). +3. `locales.en['pad.social.description']` (English fallback). +4. Empty string (only if `en.json` is missing the key — should not happen + in core). + +The `i18n` hook now exports the loaded `locales` map so other server-side +modules can look up translated strings without re-reading the JSON files. + +## Implementation outline + +1. **Settings** — declare `socialDescription: string` on the Settings module + with the default above; document it in both example settings files. +2. **Helper** — extract the meta-tag block into a single source of truth. + Preferred form is an EJS partial included from each template; if + Etherpad's `eejs` wrapper does not support `include()` cleanly, fall back + to a small JS helper (e.g. `src/node/utils/socialMeta.ts`) exported into + the template via the existing `eejs.require` context, returning the + rendered `` block as a string. Implementation step 1 of the plan + must verify which mechanism `eejs` supports before committing to one. +3. **pad.html / timeslider.html / index.html** — compute the four template + inputs at the top of each file and `<%- include('_socialMeta', {...}) %>` + in ``, after the existing `` line. The pad name is decoded + with `decodeURIComponent(req.params.pad)` and HTML-escaped via the + existing `<%= %>` mechanism (EJS escapes by default). +4. **Route handlers** — `specialpages.ts` already passes `req` and + `settings` to the templates; no route changes needed. + +## Tests + +Add to the existing backend test suite (likely +`src/tests/backend/specs/specialpages.ts` or a new +`src/tests/backend/specs/socialmeta.ts`): + +- GET `/p/TestPad-7599` → response HTML contains + `<meta property="og:title" content="TestPad-7599 | Etherpad">` and an + `og:description` matching the default. +- GET `/p/TestPad-7599` with `settings.socialDescription` overridden to + `"Custom desc"` → that custom value appears in `og:description`. +- GET `/p/Has%20Space` → `og:title` contains `Has Space` (decoded) and is + HTML-safe (no raw `%`). +- GET `/p/<script>` (encoded) → `og:title` contains escaped `<script>`, + not raw HTML. +- GET `/p/TestPad/timeslider` → `og:title` contains `(history)`. +- GET `/` → `og:title` equals `settings.title`. +- GET `/p/TestPad` with `Accept-Language: de` and + `socialDescription: {default: "X", de: "Y"}` → `og:description` is `Y` + and `og:locale` is `de_DE` (or `de`). +- Response includes `og:image:alt` and `twitter:image:alt`. + +The XSS escape test is the security-relevant one: pad IDs are user-controlled +(anyone can navigate to `/p/<anything>`). + +## Risks and trade-offs + +- **Pad-name leakage.** Anyone the link is shared with can already see the pad + name in the URL, so emitting it in `og:title` does not expose anything new. +- **Caching.** OG tags are read once per unfurl. Chat clients cache aggressively; + changing `socialDescription` will not propagate to previously-cached previews. + This is acceptable and standard. +- **Template-set drift.** Etherpad has three top-level HTML templates that + need OG tags; the `_socialMeta` partial avoids three copies of the same + block. + +## Out of scope (future work) + +- A `padSocialMetadata` hook that lets plugins override the values. +- Per-pad description (e.g. ep_pad_title integration). +- Generated preview images (would require a rendering service). diff --git a/settings.json.docker b/settings.json.docker index 8fdd51de01e..76cc408dc07 100644 --- a/settings.json.docker +++ b/settings.json.docker @@ -117,6 +117,14 @@ */ "favicon": "${FAVICON:null}", + /* + * Canonical public origin of this Etherpad instance, e.g. + * "https://pad.example.com" (no trailing slash, must include scheme). + * Used to build absolute URLs in OG/Twitter link-preview meta tags. + * When null, falls back to the incoming request's protocol+Host. + */ + "publicURL": "${PUBLIC_URL:null}", + /* * Skin name. * diff --git a/settings.json.template b/settings.json.template index b62a51d2a02..7f0e586c2dc 100644 --- a/settings.json.template +++ b/settings.json.template @@ -108,6 +108,21 @@ */ "favicon": null, + /* + * Canonical public origin of this Etherpad instance, e.g. + * "https://pad.example.com" (no trailing slash, must include scheme). + * + * When set, this is used to build absolute URLs in server-rendered output + * such as the Open Graph / Twitter Card link-preview meta tags (og:url, + * og:image, ...). When null, those URLs fall back to the request's + * protocol+Host, which can reflect client-controlled headers if your + * reverse proxy passes them through unsanitized. + * + * Set this in production deployments to lock down the canonical origin + * advertised in shared link previews. + */ + "publicURL": null, + /* * Skin name. * diff --git a/src/locales/en.json b/src/locales/en.json index 729d312d23c..73618401e7c 100644 --- a/src/locales/en.json +++ b/src/locales/en.json @@ -220,5 +220,6 @@ "pad.impexp.importfailed": "Import failed", "pad.impexp.copypaste": "Please copy paste", "pad.impexp.exportdisabled": "Exporting as {{type}} format is disabled. Please contact your system administrator for details.", - "pad.impexp.maxFileSize": "File too big. Contact your site administrator to increase the allowed file size for import" + "pad.impexp.maxFileSize": "File too big. Contact your site administrator to increase the allowed file size for import", + "pad.social.description": "A collaborative document that everyone can edit in real time." } diff --git a/src/node/hooks/express/specialpages.ts b/src/node/hooks/express/specialpages.ts index 2863074e2fd..34cf7e190d1 100644 --- a/src/node/hooks/express/specialpages.ts +++ b/src/node/hooks/express/specialpages.ts @@ -10,6 +10,8 @@ import settings, {getEpVersion} from '../../utils/Settings'; import util from 'node:util'; const webaccess = require('./webaccess'); const plugins = require('../../../static/js/pluginfw/plugin_defs'); +const i18n = require('../i18n'); +import {renderSocialMeta} from '../../utils/socialMeta'; import {build, buildSync} from 'esbuild' import {ArgsExpressType} from "../../types/ArgsExpressType"; @@ -172,7 +174,10 @@ const handleLiveReload = async (args: ArgsExpressType, padString: string, timeSl }) setRouteHandler('/', (req: any, res: any) => { const proxyPath = sanitizeProxyPath(req); - res.send(eejs.require('ep_etherpad-lite/templates/index.html', {req, entrypoint: proxyPath + '/watch/index?hash=' + hash, settings})); + const socialMetaHtml = renderSocialMeta({ + req, settings, availableLangs: i18n.availableLangs, locales: i18n.locales, kind: 'home', + }); + res.send(eejs.require('ep_etherpad-lite/templates/index.html', {req, entrypoint: proxyPath + '/watch/index?hash=' + hash, settings, socialMetaHtml})); }) }) @@ -196,12 +201,16 @@ const handleLiveReload = async (args: ArgsExpressType, padString: string, timeSl }); const proxyPath = sanitizeProxyPath(req); + const socialMetaHtml = renderSocialMeta({ + req, settings, availableLangs: i18n.availableLangs, locales: i18n.locales, kind: 'pad', padName: req.params.pad, + }); const content = eejs.require('ep_etherpad-lite/templates/pad.html', { req, toolbar, isReadOnly, entrypoint: proxyPath + '/watch/pad?hash=' + hash, - settings: settings.getPublicSettings() + settings: settings.getPublicSettings(), + socialMetaHtml, }) res.send(content); }) @@ -227,12 +236,16 @@ const handleLiveReload = async (args: ArgsExpressType, padString: string, timeSl }); const proxyPath = sanitizeProxyPath(req); + const socialMetaHtml = renderSocialMeta({ + req, settings, availableLangs: i18n.availableLangs, locales: i18n.locales, kind: 'timeslider', padName: req.params.pad, + }); const content = eejs.require('ep_etherpad-lite/templates/timeslider.html', { req, toolbar, isReadOnly, entrypoint: proxyPath + '/watch/timeslider?hash=' + hash, - settings: settings.getPublicSettings() + settings: settings.getPublicSettings(), + socialMetaHtml, }) res.send(content); }) @@ -342,7 +355,10 @@ exports.expressCreateServer = async (_hookName: string, args: ArgsExpressType, c // serve index.html under / args.app.get('/', (req: any, res: any) => { - res.send(eejs.require('ep_etherpad-lite/templates/index.html', {req, settings, entrypoint: "./"+fileNameIndex})); + const socialMetaHtml = renderSocialMeta({ + req, settings, availableLangs: i18n.availableLangs, locales: i18n.locales, kind: 'home', + }); + res.send(eejs.require('ep_etherpad-lite/templates/index.html', {req, settings, entrypoint: "./"+fileNameIndex, socialMetaHtml})); }); @@ -356,12 +372,16 @@ exports.expressCreateServer = async (_hookName: string, args: ArgsExpressType, c isReadOnly }); + const socialMetaHtml = renderSocialMeta({ + req, settings, availableLangs: i18n.availableLangs, locales: i18n.locales, kind: 'pad', padName: req.params.pad, + }); const content = eejs.require('ep_etherpad-lite/templates/pad.html', { req, toolbar, isReadOnly, entrypoint: "../"+fileNamePad, - settings: settings.getPublicSettings() + settings: settings.getPublicSettings(), + socialMetaHtml, }) res.send(content); }); @@ -372,11 +392,15 @@ exports.expressCreateServer = async (_hookName: string, args: ArgsExpressType, c toolbar, }); + const socialMetaHtml = renderSocialMeta({ + req, settings, availableLangs: i18n.availableLangs, locales: i18n.locales, kind: 'timeslider', padName: req.params.pad, + }); res.send(eejs.require('ep_etherpad-lite/templates/timeslider.html', { req, toolbar, entrypoint: "../../"+fileNameTimeSlider, - settings: settings.getPublicSettings() + settings: settings.getPublicSettings(), + socialMetaHtml, })); }); } else { diff --git a/src/node/hooks/i18n.ts b/src/node/hooks/i18n.ts index a9adc190b07..47dbdc3ec49 100644 --- a/src/node/hooks/i18n.ts +++ b/src/node/hooks/i18n.ts @@ -136,6 +136,9 @@ exports.expressPreSession = async (hookName:string, {app}:any) => { const locales = getAllLocales(); const localeIndex = generateLocaleIndex(locales); exports.availableLangs = getAvailableLangs(locales); + // Exported so server-rendered HTML (e.g. Open Graph meta tags) can look + // up translated strings without re-reading the locale files. + exports.locales = locales; app.get('/locales/:locale', (req:any, res:any) => { // works with /locale/en and /locale/en.json requests diff --git a/src/node/utils/Settings.ts b/src/node/utils/Settings.ts index 0428187195e..053cca39cba 100644 --- a/src/node/utils/Settings.ts +++ b/src/node/utils/Settings.ts @@ -164,6 +164,7 @@ export type SettingsType = { title: string, showRecentPads: boolean, favicon: string | null, + publicURL: string | null, ttl: { AccessToken: number, AuthorizationCode: number, @@ -323,6 +324,18 @@ const settings: SettingsType = { * Etherpad root directory. */ favicon: null, + + /** + * Canonical public origin of this Etherpad instance, e.g. "https://pad.example.com". + * When set, it is used to build absolute URLs in server-rendered output (currently + * the Open Graph / Twitter Card meta tags). When null, those URLs fall back to the + * incoming request's protocol+host, which is safe when Host/X-Forwarded-Host + * headers are trusted but should be configured explicitly in production to avoid + * client-controlled origin values appearing in og:url / og:image. + * + * No trailing slash. Must include scheme. + */ + publicURL: null, ttl: { AccessToken: 1 * 60 * 60, // 1 hour in seconds AuthorizationCode: 10 * 60, // 10 minutes in seconds diff --git a/src/node/utils/socialMeta.ts b/src/node/utils/socialMeta.ts new file mode 100644 index 00000000000..25c0290472c --- /dev/null +++ b/src/node/utils/socialMeta.ts @@ -0,0 +1,168 @@ +'use strict'; + +/** + * Builds the Open Graph + Twitter Card <meta> tag block for the pad page, + * timeslider and homepage. Output values are HTML-escaped — pad names are + * user-controlled, so this is the security boundary that prevents reflected + * XSS via crafted pad IDs. + * + * The description text is sourced from Etherpad's i18n catalog under the key + * `pad.social.description`. Operators can override it per-language via the + * standard `customLocaleStrings` mechanism in settings.json. + */ + +const SOCIAL_DESCRIPTION_KEY = 'pad.social.description'; + +const ESCAPE_MAP: {[ch: string]: string} = { + '&': '&', + '<': '<', + '>': '>', + '"': '"', + "'": ''', +}; + +const escapeHtml = (s: string): string => s.replace(/[&<>"']/g, (c) => ESCAPE_MAP[c]); + +const resolveDescription = ( + locales: {[lang: string]: {[key: string]: string}} | undefined, + renderLang: string, +): string => { + if (!locales) return ''; + // Exact match. + if (locales[renderLang] && locales[renderLang][SOCIAL_DESCRIPTION_KEY]) { + return locales[renderLang][SOCIAL_DESCRIPTION_KEY]; + } + // Primary subtag fallback (e.g. de-AT → de). + const primary = renderLang.split('-')[0]; + if (locales[primary] && locales[primary][SOCIAL_DESCRIPTION_KEY]) { + return locales[primary][SOCIAL_DESCRIPTION_KEY]; + } + // English fallback. + if (locales.en && locales.en[SOCIAL_DESCRIPTION_KEY]) { + return locales.en[SOCIAL_DESCRIPTION_KEY]; + } + return ''; +}; + +const toOgLocale = (renderLang: string): string => { + // Open Graph wants `xx_XX`. We already negotiate render language from + // request headers; if it has a region we keep it (lowercased primary, + // uppercased region), otherwise we just emit the primary subtag. + const parts = renderLang.split('-'); + if (parts.length >= 2) return `${parts[0].toLowerCase()}_${parts[1].toUpperCase()}`; + return parts[0].toLowerCase(); +}; + +export type SocialMetaOpts = { + url: string, + siteName: string, + title: string, + description: string, + imageUrl: string, + imageAlt: string, + renderLang: string, +}; + +export const buildSocialMetaHtml = (opts: SocialMetaOpts): string => { + const tag = (prop: string, value: string, attr: 'property' | 'name' = 'property') => + ` <meta ${attr}="${prop}" content="${escapeHtml(value)}">`; + + return [ + tag('og:type', 'website'), + tag('og:site_name', opts.siteName), + tag('og:title', opts.title), + tag('og:description', opts.description), + tag('og:url', opts.url), + tag('og:image', opts.imageUrl), + tag('og:image:alt', opts.imageAlt), + tag('og:locale', toOgLocale(opts.renderLang)), + tag('twitter:card', 'summary', 'name'), + tag('twitter:title', opts.title, 'name'), + tag('twitter:description', opts.description, 'name'), + tag('twitter:image', opts.imageUrl, 'name'), + tag('twitter:image:alt', opts.imageAlt, 'name'), + ].join('\n'); +}; + +const negotiateRenderLang = (req: any, availableLangs: {[k: string]: any}): string => { + if (req && typeof req.acceptsLanguages === 'function') { + const negotiated = req.acceptsLanguages(Object.keys(availableLangs)); + if (negotiated) return negotiated; + } + return 'en'; +}; + +// Strict hostname[:port] pattern. Rejects header injection (\r\n), userinfo +// (user@host), wildcards, and any non-DNS-character garbage. Length-capped so +// a giant Host header can't blow up the response. +const HOST_RE = /^[a-z0-9]([a-z0-9.-]{0,253}[a-z0-9])?(:\d{1,5})?$/i; + +const sanitizeHost = (host: string | undefined): string | null => { + if (!host || host.length > 255) return null; + return HOST_RE.test(host) ? host : null; +}; + +const sanitizePublicURL = (raw: string | null | undefined): string | null => { + if (!raw || typeof raw !== 'string') return null; + // Must be http(s)://host[:port], no path. Strip trailing slash if present. + const m = raw.replace(/\/+$/, '').match(/^(https?):\/\/([^\/?#]+)$/i); + if (!m) return null; + return sanitizeHost(m[2]) ? `${m[1].toLowerCase()}://${m[2]}` : null; +}; + +// Builds an absolute URL. Prefers settings.publicURL when configured (operator- +// trusted); otherwise falls back to the request's protocol+Host with strict +// host validation so a crafted Host header can't appear in og:url / og:image. +const buildAbsoluteUrl = ( + req: any, pathname: string, publicURL: string | null | undefined, +): string => { + const trusted = sanitizePublicURL(publicURL); + if (trusted) return `${trusted}${pathname}`; + const proto = req.protocol === 'https' ? 'https' : 'http'; + const host = sanitizeHost(req.get && req.get('host')) || 'localhost'; + return `${proto}://${host}${pathname}`; +}; + +const resolveImageUrl = ( + req: any, faviconSetting: string | null | undefined, publicURL: string | null | undefined, +): string => { + if (faviconSetting && /^https?:\/\//i.test(faviconSetting)) return faviconSetting; + return buildAbsoluteUrl(req, '/favicon.ico', publicURL); +}; + +export type RenderOpts = { + req: any, + settings: any, + availableLangs: {[k: string]: any}, + locales: {[lang: string]: {[key: string]: string}}, + kind: 'pad' | 'timeslider' | 'home', + padName?: string, +}; + +export const renderSocialMeta = (o: RenderOpts): string => { + const renderLang = negotiateRenderLang(o.req, o.availableLangs); + const siteName = o.settings.title || 'Etherpad'; + const description = resolveDescription(o.locales, renderLang); + const imageUrl = resolveImageUrl(o.req, o.settings.favicon, o.settings.publicURL); + const imageAlt = `${siteName} logo`; + + let title = siteName; + let pathname = (o.req && o.req.originalUrl) || '/'; + if (o.padName) { + // Express has already URL-decoded :pad route params; do not decode again. + if (o.kind === 'pad') title = `${o.padName} | ${siteName}`; + else if (o.kind === 'timeslider') title = `${o.padName} (history) | ${siteName}`; + } + const qIdx = pathname.indexOf('?'); + if (qIdx >= 0) pathname = pathname.slice(0, qIdx); + + return buildSocialMetaHtml({ + url: buildAbsoluteUrl(o.req, pathname, o.settings.publicURL), + siteName, + title, + description, + imageUrl, + imageAlt, + renderLang, + }); +}; diff --git a/src/templates/index.html b/src/templates/index.html index b6d1e535c1d..3602939c29a 100644 --- a/src/templates/index.html +++ b/src/templates/index.html @@ -8,6 +8,7 @@ <html lang="<%=renderLang%>" dir="<%=renderDir%>"> <title><%=settings.title%> +<%- typeof socialMetaHtml !== 'undefined' ? socialMetaHtml : '' %> diff --git a/src/templates/pad.html b/src/templates/pad.html index 46d0c942e78..8b067629210 100644 --- a/src/templates/pad.html +++ b/src/templates/pad.html @@ -12,6 +12,7 @@ <% e.begin_block("htmlHead"); %> <% e.end_block(); %> <%=settings.title%> +<%- typeof socialMetaHtml !== 'undefined' ? socialMetaHtml : '' %> '; + const html = buildSocialMetaHtml({ + url: evil, siteName: evil, title: evil, description: evil, + imageUrl: evil, imageAlt: evil, renderLang: 'en', + }); + assert.ok(!/', + }); + assert.ok(!/">')) + .expect((r: any) => { + // Etherpad may 404 or render — either is fine, but no raw