Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 0 additions & 14 deletions .changeset/code-cleanup-cache-optimization.md

This file was deleted.

5 changes: 0 additions & 5 deletions .changeset/eight-clubs-read.md

This file was deleted.

6 changes: 0 additions & 6 deletions .changeset/eight-vans-sleep.md

This file was deleted.

6 changes: 0 additions & 6 deletions .changeset/fast-lands-sit.md

This file was deleted.

6 changes: 0 additions & 6 deletions .changeset/legal-toes-joke.md

This file was deleted.

37 changes: 0 additions & 37 deletions .changeset/major-refactor-breadcrumbs-wrap.md

This file was deleted.

16 changes: 16 additions & 0 deletions packages/docusaurus-plugin-llms-txt/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,21 @@
# @signalwire/docusaurus-plugin-llms-txt

## 2.0.0-alpha.7

### Patch Changes

- Fix code block language identifiers lost in MDX components (fixes #20)

Code blocks were losing their language identifiers during HTML → Markdown conversion because
Docusaurus places language classes on `<pre>` and wrapper `<div>` elements, not on `<code>`
elements. Implemented a custom pre handler that:
- Extracts language from `<pre>` element's className
- Falls back to checking parent element if needed
- Preserves code formatting by converting `<br/>` to newlines
- Handles all edge cases gracefully with proper fallbacks

Tested with 21 programming languages in MDX Tabs and regular code blocks.

## 2.0.0-alpha.5

### Patch Changes
Expand Down
2 changes: 1 addition & 1 deletion packages/docusaurus-plugin-llms-txt/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@signalwire/docusaurus-plugin-llms-txt",
"version": "2.0.0-alpha.6",
"version": "2.0.0-alpha.7",
"type": "module",
"description": "Generate Markdown versions of Docusaurus HTML pages and an llms.txt index file",
"main": "./lib/src/index.js",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/**
* Copyright (c) SignalWire, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

import type { Element, Parents } from 'hast';
import type { State } from 'hast-util-to-mdast';
import type { Code } from 'mdast';

/**
* Extracts language identifier from className array.
* Looks for classes starting with 'language-' and extracts the language part.
*
* @param className - Array of class names from the element
* @returns Language identifier or null if not found
*/
function extractLanguage(className: unknown): string | null {
if (!Array.isArray(className)) {
return null;
}

for (const cls of className) {
if (typeof cls === 'string' && cls.startsWith('language-')) {
return cls.replace('language-', '');
}
}

return null;
}

/**
* Extracts text content from a hast node and its children,
* converting <br/> elements to newlines.
*
* @param node - The hast node to extract text from
* @returns The text content with preserved line breaks
*/
function extractText(node: Element | { type: string; value?: string }): string {
// Text nodes have a value property
if ('value' in node && typeof node.value === 'string') {
return node.value;
}

// Element nodes - check if it's a <br/> tag
if ('tagName' in node && node.tagName === 'br') {
return '\n';
}

// Recursively extract text from children
if ('children' in node && Array.isArray(node.children)) {
return node.children
.map((child: Element | { type: string; value?: string }) =>
extractText(child)
)
.join('');
}

return '';
}

/**
* Custom handler for <pre> elements to preserve code block
* language identifiers.
*
* Docusaurus places language classes on <pre> and parent <div> elements,
* but not on <code> elements. The default rehype-remark handler only
* checks <code> elements, causing language identifiers to be lost.
*
* This handler:
* 1. Checks if <pre> contains a <code> child
* 2. Extracts language from <pre> element's className
* 3. Falls back to checking parent element if needed
* 4. Extracts code content from <code> element
* 5. Returns proper mdast code node with language preserved
*
* @param state - Handler state from hast-util-to-mdast
* @param node - The <pre> element from the hast tree
* @param parent - The parent element (optional)
* @returns An mdast code node with language identifier
*/
export function handlePreElement(
state: State,
node: Element,
parent?: Parents
): Code | void {
// Verify this is a pre element
if (node.tagName !== 'pre') {
return undefined;
}

// Find the code element child
const codeElement = node.children?.find(
(child): child is Element =>
typeof child === 'object' &&
child !== null &&
'type' in child &&
child.type === 'element' &&
'tagName' in child &&
child.tagName === 'code'
);

if (!codeElement) {
// No code element found, let default handler process it
return undefined;
}

// Try to extract language from pre element first
let lang = extractLanguage(node.properties?.className);

// If not found on pre, check parent element (Docusaurus wrapper div)
if (!lang && parent && parent.type === 'element') {
lang = extractLanguage(parent.properties?.className);
}

// Extract the code content from the code element
const value = extractText(codeElement);

// Return mdast code node with language (or null if not found)
return {
type: 'code',
lang,
meta: null,
value,
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import remarkGfm from 'remark-gfm';
import remarkStringify from 'remark-stringify';
import { unified } from 'unified';

import { handlePreElement } from './code-block-handler';
import rehypeLinks from './rehype-links';
import rehypeTables from './rehype-tables';

Expand Down Expand Up @@ -97,7 +98,10 @@ export class PluginRegistry {

// Always last - converts HTML AST to Markdown AST
processor.use(rehypeRemark, {
handlers: { br: () => ({ type: 'html', value: '<br />' }) },
handlers: {
br: () => ({ type: 'html', value: '<br />' }),
pre: handlePreElement,
},
});
}

Expand Down
2 changes: 1 addition & 1 deletion packages/docusaurus-theme-llms-txt/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
"@docusaurus/module-type-aliases": "^3.0.0",
"@docusaurus/plugin-content-docs": "^3.0.0",
"@docusaurus/types": "^3.0.0",
"@signalwire/docusaurus-plugin-llms-txt": "2.0.0-alpha.6",
"@signalwire/docusaurus-plugin-llms-txt": "2.0.0-alpha.7",
"@types/node": "^22.15.19",
"@types/react": "^19.1.13",
"@types/react-dom": "^19.1.9",
Expand Down
7 changes: 7 additions & 0 deletions website/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# website

## 0.0.1

### Patch Changes

- Updated dependencies [dfedf00]
- @signalwire/docusaurus-plugin-llms-txt@2.0.0

## 0.0.1-alpha.1

### Patch Changes
Expand Down
Loading