Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions app/components/Package/Playgrounds.vue
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
<script setup lang="ts">
import type { PlaygroundLink } from '#shared/types'
import { decodeHtmlEntities } from '~/utils/formatters'

const props = defineProps<{
links: PlaygroundLink[]
Expand Down Expand Up @@ -130,7 +129,7 @@ function focusMenuItem(index: number) {
:class="[getIcon(firstLink.provider), getColor(firstLink.provider), 'w-4 h-4 shrink-0']"
aria-hidden="true"
/>
<span class="truncate text-fg-muted">{{ decodeHtmlEntities(firstLink.label) }}</span>
<span class="truncate text-fg-muted">{{ firstLink.label }}</span>
</a>
</TooltipApp>

Expand Down Expand Up @@ -186,7 +185,7 @@ function focusMenuItem(index: number) {
:class="[getIcon(link.provider), getColor(link.provider), 'w-4 h-4 shrink-0']"
aria-hidden="true"
/>
<span class="truncate">{{ decodeHtmlEntities(link.label) }}</span>
<span class="truncate">{{ link.label }}</span>
</a>
</TooltipApp>
</div>
Expand Down
7 changes: 3 additions & 4 deletions app/components/ReadmeTocDropdown.vue
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
<script setup lang="ts">
import type { TocItem } from '#shared/types/readme'
import { onClickOutside, useEventListener } from '@vueuse/core'
import { decodeHtmlEntities } from '~/utils/formatters'

const props = defineProps<{
toc: TocItem[]
Expand Down Expand Up @@ -202,7 +201,7 @@ function handleKeydown(event: KeyboardEvent) {
@click="select()"
@mouseenter="highlightedIndex = getIndex(node.id)"
>
<span class="truncate">{{ decodeHtmlEntities(node.text) }}</span>
<span class="truncate">{{ node.text }}</span>
</NuxtLink>

<template v-for="child in node.children" :key="child.id">
Expand All @@ -220,7 +219,7 @@ function handleKeydown(event: KeyboardEvent) {
@click="select()"
@mouseenter="highlightedIndex = getIndex(child.id)"
>
<span class="truncate">{{ decodeHtmlEntities(child.text) }}</span>
<span class="truncate">{{ child.text }}</span>
</NuxtLink>

<NuxtLink
Expand All @@ -241,7 +240,7 @@ function handleKeydown(event: KeyboardEvent) {
@click="select()"
@mouseenter="highlightedIndex = getIndex(grandchild.id)"
>
<span class="truncate">{{ decodeHtmlEntities(grandchild.text) }}</span>
<span class="truncate">{{ grandchild.text }}</span>
</NuxtLink>
</template>
</template>
Expand Down
2 changes: 1 addition & 1 deletion app/composables/useMarkdown.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { decodeHtmlEntities } from '~/utils/formatters'
import { decodeHtmlEntities } from '#shared/utils/html'

interface UseMarkdownOptions {
text: string
Expand Down
14 changes: 0 additions & 14 deletions app/utils/formatters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,3 @@ export function toIsoDateString(date: Date): string {
const day = String(date.getUTCDate()).padStart(2, '0')
return `${year}-${month}-${day}`
}

const htmlEntities: Record<string, string> = {
'&amp;': '&',
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&#39;': "'",
'&apos;': "'",
'&nbsp;': ' ',
}

export function decodeHtmlEntities(text: string): string {
return text.replace(/&(?:amp|lt|gt|quot|apos|nbsp|#39);/g, match => htmlEntities[match] || match)
}
2 changes: 1 addition & 1 deletion server/api/registry/readme/[...pkg].get.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ export default defineCachedEventHandler(
swr: true,
getKey: event => {
const pkg = getRouterParam(event, 'pkg') ?? ''
return `readme:v8:${pkg.replace(/\/+$/, '').trim()}`
return `readme:v9:${pkg.replace(/\/+$/, '').trim()}`
},
},
)
34 changes: 24 additions & 10 deletions server/utils/readme.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ import sanitizeHtml from 'sanitize-html'
import { hasProtocol } from 'ufo'
import type { ReadmeResponse, TocItem } from '#shared/types/readme'
import { convertBlobOrFileToRawUrl, type RepositoryInfo } from '#shared/utils/git-providers'
import { highlightCodeSync } from './shiki'
import { decodeHtmlEntities } from '#shared/utils/html'
import { convertToEmoji } from '#shared/utils/emoji'

import { highlightCodeSync } from './shiki'

/**
* Playground provider configuration
*/
Expand Down Expand Up @@ -172,8 +174,21 @@ const ALLOWED_ATTR: Record<string, string[]> = {
'p': ['align'],
}

// GitHub-style callout types
// Format: > [!NOTE], > [!TIP], > [!IMPORTANT], > [!WARNING], > [!CAUTION]
/**
* Strip all HTML tags from a string, looping until stable to prevent
* incomplete sanitization from nested/interleaved tags
* (e.g. `<scr<script>ipt>` → `<script>` after one pass).
*/
Comment on lines +178 to +181
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Comment example does not match the actual regex behaviour.

The doc comment illustrates <scr<script>ipt><script> after one pass, which is the failure mode of the narrower regex /<[^<>]*>/g (which excludes both < and >). The actual regex used here is /<[^>]*>/g, whose greedy [^>]* also matches <, so the first pass strips <scr<script> (from the opening < to the first >), leaving ipt> — not <script>.

📝 Proposed fix for the comment
 /**
  * Strip all HTML tags from a string, looping until stable to prevent
  * incomplete sanitization from nested/interleaved tags
- * (e.g. `<scr<script>ipt>` → `<script>` after one pass).
+ * (e.g. `<<script>>` → `<script>` after one pass, then `''` after the second).
  */

function stripHtmlTags(text: string): string {
const tagPattern = /<[^>]*>/g
let result = text
let previous: string
do {
previous = result
result = result.replace(tagPattern, '')
} while (result !== previous)
return result
}

/**
* Generate a GitHub-style slug from heading text.
Expand All @@ -184,8 +199,7 @@ const ALLOWED_ATTR: Record<string, string[]> = {
* - Collapse multiple hyphens
*/
function slugify(text: string): string {
return text
.replace(/<[^>]*>/g, '') // Strip HTML tags
return stripHtmlTags(text)
.toLowerCase()
.trim()
.replace(/\s+/g, '-') // Spaces to hyphens
Expand Down Expand Up @@ -371,8 +385,8 @@ export async function renderReadmeHtml(
// (e.g., #install, #dependencies, #versions are used by the package page)
const id = `user-content-${uniqueSlug}`

// Collect TOC item with plain text (HTML stripped)
const plainText = text.replace(/<[^>]*>/g, '').trim()
// Collect TOC item with plain text (HTML stripped, entities decoded)
const plainText = decodeHtmlEntities(stripHtmlTags(text).trim())
if (plainText) {
toc.push({ text: plainText, id, depth })
}
Expand Down Expand Up @@ -402,11 +416,11 @@ ${html}
return `<img src="${resolvedHref}"${altAttr}${titleAttr}>`
}

// // Resolve link URLs, add security attributes, and collect playground links
// Resolve link URLs, add security attributes, and collect playground links
renderer.link = function ({ href, title, tokens }: Tokens.Link) {
const text = this.parser.parseInline(tokens)
const titleAttr = title ? ` title="${title}"` : ''
let plainText = text.replace(/<[^>]*>/g, '').trim()
let plainText = stripHtmlTags(text).trim()

// If plain text is empty, check if we have an image with alt text
if (!plainText && tokens.length === 1 && tokens[0]?.type === 'image') {
Expand Down Expand Up @@ -511,7 +525,7 @@ ${html}
* provide the text of the element. This will automatically be removed, because there
* is an allow list for link attributes.
* */
label: attribs['data-title-intermediate'] || provider.name,
label: decodeHtmlEntities(attribs['data-title-intermediate'] || provider.name),
})
}

Expand Down
13 changes: 13 additions & 0 deletions shared/utils/html.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
const htmlEntities: Record<string, string> = {
'&amp;': '&',
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&#39;': "'",
'&apos;': "'",
'&nbsp;': '\u00A0',
}

export function decodeHtmlEntities(text: string): string {
return text.replace(/&(?:amp|lt|gt|quot|apos|nbsp|#39);/g, match => htmlEntities[match] || match)
}
28 changes: 1 addition & 27 deletions test/unit/app/utils/formatters.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, expect, it } from 'vitest'
import { decodeHtmlEntities, toIsoDateString } from '../../../../app/utils/formatters'
import { toIsoDateString } from '../../../../app/utils/formatters'

describe('toIsoDateString', () => {
it('formats a date as YYYY-MM-DD', () => {
Expand All @@ -10,29 +10,3 @@ describe('toIsoDateString', () => {
expect(toIsoDateString(new Date('2024-01-05T00:00:00Z'))).toBe('2024-01-05')
})
})

describe('decodeHtmlEntities', () => {
it.each([
['&amp;', '&'],
['&lt;', '<'],
['&gt;', '>'],
['&quot;', '"'],
['&#39;', "'"],
['&apos;', "'"],
['&nbsp;', ' '],
] as const)('%s → %s', (input, expected) => {
expect(decodeHtmlEntities(input)).toBe(expected)
})

it('decodes multiple entities in one string', () => {
expect(decodeHtmlEntities('a &amp; b &lt; c')).toBe('a & b < c')
})

it('leaves plain text unchanged', () => {
expect(decodeHtmlEntities('say no to bloat')).toBe('say no to bloat')
})

it('leaves unknown entities unchanged', () => {
expect(decodeHtmlEntities('&unknown;')).toBe('&unknown;')
})
})
28 changes: 28 additions & 0 deletions test/unit/shared/utils/html.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import { describe, expect, it } from 'vitest'
import { decodeHtmlEntities } from '../../../../shared/utils/html'

describe('decodeHtmlEntities', () => {
it.each([
['&amp;', '&'],
['&lt;', '<'],
['&gt;', '>'],
['&quot;', '"'],
['&#39;', "'"],
['&apos;', "'"],
['&nbsp;', '\u00A0'],
] as const)('%s → %s', (input, expected) => {
expect(decodeHtmlEntities(input)).toBe(expected)
})

it('decodes multiple entities in one string', () => {
expect(decodeHtmlEntities('a &amp; b &lt; c')).toBe('a & b < c')
})

it('leaves plain text unchanged', () => {
expect(decodeHtmlEntities('say no to bloat')).toBe('say no to bloat')
})

it('leaves unknown entities unchanged', () => {
expect(decodeHtmlEntities('&unknown;')).toBe('&unknown;')
})
})
Loading