diff --git a/__tests__/lib/mdxish/mdxish-snake-case.test.ts b/__tests__/lib/mdxish/mdxish-snake-case.test.ts new file mode 100644 index 000000000..3e2112966 --- /dev/null +++ b/__tests__/lib/mdxish/mdxish-snake-case.test.ts @@ -0,0 +1,281 @@ +import type { Element } from 'hast'; + +import { mdxish } from '../../../lib/mdxish'; +import { type RMDXModule } from '../../../types'; + +const stubModule: RMDXModule = { + default: () => null, + Toc: null, + toc: [], +}; + +const makeComponents = (...names: string[]) => + names.reduce>((acc, name) => { + acc[name] = stubModule; + return acc; + }, {}); + +describe('mdxish snake_case component integration', () => { + describe('basic rendering', () => { + it('should render snake_case component as HAST element', () => { + const doc = ''; + const components = makeComponents('Snake_case'); + + const hast = mdxish(doc, { components }); + + const component = hast.children.find(child => child.type === 'element' && child.tagName === 'Snake_case'); + expect(component).toBeDefined(); + expect(component?.type).toBe('element'); + expect(component?.tagName).toBe('Snake_case'); + }); + + it('should render component with multiple underscores', () => { + const doc = ''; + const components = makeComponents('Multiple_Underscore_Component'); + + const hast = mdxish(doc, { components }); + + const component = hast.children.find( + child => child.type === 'element' && child.tagName === 'Multiple_Underscore_Component', + ); + expect(component).toBeDefined(); + }); + + it('should remove undefined snake_case component', () => { + const doc = ''; + const hast = mdxish(doc); + + const component = hast.children.find( + child => child.type === 'element' && child.tagName === 'Undefined_Component', + ); + expect(component).toBeUndefined(); + }); + }); + + describe('components with content', () => { + it('should render snake_case component with text content', () => { + const doc = ` +Simple text content +`; + + const components = makeComponents('Snake_case'); + + const hast = mdxish(doc, { components }); + + const component = hast.children.find(child => child.type === 'element' && child.tagName === 'Snake_case'); + expect(component).toBeDefined(); + expect(component?.type).toBe('element'); + + const elementNode = component as Element; + expect(elementNode.children.length).toBeGreaterThan(0); + }); + + it('should render snake_case component with markdown content', () => { + const doc = ` + +# Heading + +Some **bold** and *italic* text. + +`; + + const components = makeComponents('Snake_case'); + + const hast = mdxish(doc, { components }); + + const component = hast.children.find(child => child.type === 'element' && child.tagName === 'Snake_case'); + expect(component).toBeDefined(); + + const elementNode = component as Element; + expect(elementNode.children.length).toBeGreaterThan(0); + }); + }); + + describe('components with attributes', () => { + it('should preserve string attributes', () => { + const doc = ''; + const components = makeComponents('Snake_case'); + + const hast = mdxish(doc, { components }); + + const component = hast.children.find(child => child.type === 'element' && child.tagName === 'Snake_case'); + expect(component).toBeDefined(); + expect(component?.type).toBe('element'); + + const elementNode = component as Element; + expect(elementNode.properties?.theme).toBe('info'); + expect(elementNode.properties?.id).toBe('test-id'); + }); + + it('should preserve boolean attributes', () => { + const doc = ''; + const components = makeComponents('Snake_case'); + + const hast = mdxish(doc, { components }); + + const component = hast.children.find(child => child.type === 'element' && child.tagName === 'Snake_case'); + expect(component).toBeDefined(); + expect(component?.type).toBe('element'); + + const elementNode = component as Element; + expect(elementNode.properties?.empty).toBeDefined(); + }); + }); + + describe('multiple components', () => { + it('should render multiple instances of same snake_case component', () => { + const doc = ` + + + +`; + + const components = makeComponents('Snake_case'); + + const hast = mdxish(doc, { components }); + + const componentsFound = hast.children.filter(child => child.type === 'element' && child.tagName === 'Snake_case'); + expect(componentsFound).toHaveLength(3); + }); + + it('should render multiple different snake_case components', () => { + const doc = ` + + + +`; + + const components = makeComponents('First_Component', 'Second_Component'); + + const hast = mdxish(doc, { components }); + + const firstComponents = hast.children.filter( + child => child.type === 'element' && child.tagName === 'First_Component', + ); + const secondComponents = hast.children.filter( + child => child.type === 'element' && child.tagName === 'Second_Component', + ); + + expect(firstComponents).toHaveLength(2); + expect(secondComponents).toHaveLength(1); + }); + }); + + describe('nested components', () => { + it('should handle nested snake_case components', () => { + const doc = ` + + + +`; + + const components = makeComponents('Outer_Component', 'Inner_Component'); + + const hast = mdxish(doc, { components }); + + const outerComponent = hast.children.find( + child => child.type === 'element' && child.tagName === 'Outer_Component', + ); + expect(outerComponent).toBeDefined(); + expect(outerComponent?.type).toBe('element'); + + const outerElement = outerComponent as Element; + const innerComponent = outerElement.children.find( + child => child.type === 'element' && (child as Element).tagName === 'Inner_Component', + ); + expect(innerComponent).toBeDefined(); + }); + }); + + describe('mixed component types', () => { + it('should handle snake_case alongside PascalCase components', () => { + const doc = ` + +`; + + const components = makeComponents('Snake_case', 'PascalCase'); + + const hast = mdxish(doc, { components }); + + const snakeComponent = hast.children.find(child => child.type === 'element' && child.tagName === 'Snake_case'); + const pascalComponent = hast.children.find(child => child.type === 'element' && child.tagName === 'PascalCase'); + + expect(snakeComponent).toBeDefined(); + expect(pascalComponent).toBeDefined(); + }); + + it('should handle snake_case alongside markdown', () => { + const doc = `# Main Heading + +Some regular markdown text. + + + +More markdown after the component.`; + + const components = makeComponents('Snake_case'); + + const hast = mdxish(doc, { components }); + + const heading = hast.children.find(child => child.type === 'element' && child.tagName === 'h1'); + const component = hast.children.find(child => child.type === 'element' && child.tagName === 'Snake_case'); + const paragraphs = hast.children.filter(child => child.type === 'element' && child.tagName === 'p'); + + expect(heading).toBeDefined(); + expect(component).toBeDefined(); + expect(paragraphs.length).toBeGreaterThan(0); + }); + }); + + describe('edge cases', () => { + it('should handle consecutive underscores', () => { + const doc = ''; + const components = makeComponents('Component__Double'); + + const hast = mdxish(doc, { components }); + + const component = hast.children.find(child => child.type === 'element' && child.tagName === 'Component__Double'); + expect(component).toBeDefined(); + }); + + it('should NOT transform lowercase snake_case tags', () => { + const doc = '\n\n'; + const components = makeComponents('Snake_case'); + + const hast = mdxish(doc, { components }); + + const upperComponent = hast.children.find(child => child.type === 'element' && child.tagName === 'Snake_case'); + expect(upperComponent).toBeDefined(); + }); + }); + + describe('regression tests', () => { + it('should still render PascalCase components correctly', () => { + const doc = ''; + const components = makeComponents('MyComponent'); + + const hast = mdxish(doc, { components }); + + const component = hast.children.find(child => child.type === 'element' && child.tagName === 'MyComponent'); + expect(component).toBeDefined(); + }); + + it('should still render kebab-case components correctly', () => { + const doc = ''; + const components = makeComponents('my-component'); + + const hast = mdxish(doc, { components }); + + const component = hast.children.find(child => child.type === 'element' && child.tagName === 'my-component'); + expect(component).toBeDefined(); + }); + + it('should still render GFM blockquotes', () => { + const doc = '> This is a blockquote'; + const hast = mdxish(doc); + + const blockquote = hast.children.find(child => child.type === 'element' && child.tagName === 'blockquote'); + expect(blockquote).toBeDefined(); + }); + }); +}); diff --git a/lib/constants.ts b/lib/constants.ts new file mode 100644 index 000000000..1d721f5c2 --- /dev/null +++ b/lib/constants.ts @@ -0,0 +1,5 @@ +/** + * Pattern to match component tags (PascalCase or snake_case) + */ +export const componentTagPattern = /<(\/?[A-Z][A-Za-z0-9_]*)([^>]*?)(\/?)>/g; + diff --git a/lib/mdxish.ts b/lib/mdxish.ts index c20b5caf1..c1b2eec7e 100644 --- a/lib/mdxish.ts +++ b/lib/mdxish.ts @@ -23,8 +23,10 @@ import evaluateExpressions from '../processor/transform/mdxish/evaluate-expressi import mdxishComponentBlocks from '../processor/transform/mdxish/mdxish-component-blocks'; import mdxishHtmlBlocks from '../processor/transform/mdxish/mdxish-html-blocks'; import magicBlockRestorer from '../processor/transform/mdxish/mdxish-magic-blocks'; +import { processSnakeCaseComponent } from '../processor/transform/mdxish/mdxish-snake-case-components'; import mdxishTables from '../processor/transform/mdxish/mdxish-tables'; import { preprocessJSXExpressions, type JSXContext } from '../processor/transform/mdxish/preprocess-jsx-expressions'; +import restoreSnakeCaseComponentNames from '../processor/transform/mdxish/restore-snake-case-component-name'; import variablesTextTransformer from '../processor/transform/mdxish/variables-text'; import tailwindTransformer from '../processor/transform/tailwind'; @@ -53,11 +55,17 @@ export function mdxish(mdContent: string, opts: MdxishOpts = {}): Root { ...userComponents, }; - // Preprocess content: extract legacy magic blocks and evaluate JSX attribute expressions - const { replaced, blocks } = extractMagicBlocks(mdContent); - const processedContent = preprocessJSXExpressions(replaced, jsxContext); + // Preprocessing pipeline: Transform content to be parser-ready + // Step 1: Extract legacy magic blocks + const { replaced: contentAfterMagicBlocks, blocks } = extractMagicBlocks(mdContent); + // Step 2: Evaluate JSX expressions in attributes + const contentAfterJSXEvaluation = preprocessJSXExpressions(contentAfterMagicBlocks, jsxContext); + // Step 3: Replace snake_case component names with parser-safe placeholders + // (e.g., which will be restored after parsing) + const { content: parserReadyContent, mapping: snakeCaseMapping } = + processSnakeCaseComponent(contentAfterJSXEvaluation); - // Create string map of components for tailwind transformer + // Create string map for tailwind transformer const tempComponentsMap = Object.entries(components).reduce((acc, [key, value]) => { acc[key] = String(value); return acc; @@ -72,6 +80,7 @@ export function mdxish(mdContent: string, opts: MdxishOpts = {}): Root { .use(imageTransformer, { isMdxish: true }) .use(defaultTransformers) .use(mdxishComponentBlocks) + .use(restoreSnakeCaseComponentNames, { mapping: snakeCaseMapping }) .use(mdxishTables) .use(mdxishHtmlBlocks) .use(evaluateExpressions, { context: jsxContext }) // Evaluate MDX expressions using jsxContext @@ -86,8 +95,8 @@ export function mdxish(mdContent: string, opts: MdxishOpts = {}): Root { processMarkdown: (markdown: string) => mdxish(markdown, opts), }); - const vfile = new VFile({ value: processedContent }); - const hast = processor.runSync(processor.parse(processedContent), vfile) as Root; + const vfile = new VFile({ value: parserReadyContent }); + const hast = processor.runSync(processor.parse(parserReadyContent), vfile) as Root; if (!hast) { throw new Error('Markdown pipeline did not produce a HAST tree.'); diff --git a/lib/mdxishTags.ts b/lib/mdxishTags.ts index dc3214240..313db5920 100644 --- a/lib/mdxishTags.ts +++ b/lib/mdxishTags.ts @@ -6,13 +6,14 @@ import { visit } from 'unist-util-visit'; import mdxishComponentBlocks from '../processor/transform/mdxish/mdxish-component-blocks'; import { isMDXElement } from '../processor/utils'; + import { extractMagicBlocks } from './utils/extractMagicBlocks'; const tags = (doc: string) => { const { replaced: sanitizedDoc } = extractMagicBlocks(doc); + const set = new Set(); - const processor = remark() - .use(mdxishComponentBlocks); + const processor = remark().use(mdxishComponentBlocks); const tree = processor.parse(sanitizedDoc); visit(processor.runSync(tree), isMDXElement, (node: MdxJsxFlowElement | MdxJsxTextElement) => { diff --git a/processor/transform/mdxish/mdxish-component-blocks.ts b/processor/transform/mdxish/mdxish-component-blocks.ts index 53e6e2299..14784fa68 100644 --- a/processor/transform/mdxish/mdxish-component-blocks.ts +++ b/processor/transform/mdxish/mdxish-component-blocks.ts @@ -5,7 +5,7 @@ import type { Plugin } from 'unified'; import remarkParse from 'remark-parse'; import { unified } from 'unified'; -const tagPattern = /^<([A-Z][A-Za-z0-9]*)([^>]*?)(\/?)>([\s\S]*)?$/; +const tagPattern = /^<([A-Z][A-Za-z0-9_]*)([^>]*?)(\/?)>([\s\S]*)?$/; const attributePattern = /([a-zA-Z_:][-a-zA-Z0-9_:.]*)(?:\s*=\s*("[^"]*"|'[^']*'|[^\s"'>]+))?/g; const inlineMdProcessor = unified().use(remarkParse); diff --git a/processor/transform/mdxish/mdxish-snake-case-components.ts b/processor/transform/mdxish/mdxish-snake-case-components.ts new file mode 100644 index 000000000..88676962e --- /dev/null +++ b/processor/transform/mdxish/mdxish-snake-case-components.ts @@ -0,0 +1,65 @@ +import { componentTagPattern } from '../../../lib/constants'; + +export type SnakeCaseMapping = Record; + +export interface SnakeCasePreprocessResult { + content: string; + mapping: SnakeCaseMapping; +} + +/** + * Replaces snake_case component names with valid HTML placeholders. + * Required because remark-parse rejects tags with underscores. + * Example: `` → `` + */ +export function processSnakeCaseComponent(content: string): SnakeCasePreprocessResult { + // Early exit if no potential snake_case components + if (!/[A-Z][A-Za-z0-9]*_[A-Za-z0-9_]*/.test(content)) { + return { content, mapping: {} }; + } + + const mapping: SnakeCaseMapping = {}; + const reverseMap = new Map(); + let counter = 0; + + const processedContent = content.replace(componentTagPattern, (match, tagName, attrs, selfClosing) => { + if (!tagName.includes('_')) { + return match; + } + + const isClosing = tagName.startsWith('/'); + const cleanTagName = isClosing ? tagName.slice(1) : tagName; + + let placeholder = reverseMap.get(cleanTagName); + + if (!placeholder) { + // eslint-disable-next-line no-plusplus + placeholder = `MDXishSnakeCase${counter++}`; + mapping[placeholder] = cleanTagName; + reverseMap.set(cleanTagName, placeholder); + } + + const processedTagName = isClosing ? `/${placeholder}` : placeholder; + return `<${processedTagName}${attrs}${selfClosing}>`; + }); + + return { + content: processedContent, + mapping, + }; +} + +/** + * Restores placeholder name to original snake_case name. + * Uses case-insensitive matching since HTML parsers normalize to lowercase. + */ +export function restoreSnakeCase(placeholderName: string, mapping: SnakeCaseMapping): string { + if (mapping[placeholderName]) { + return mapping[placeholderName]; + } + + const lowerName = placeholderName.toLowerCase(); + const matchingKey = Object.keys(mapping).find(key => key.toLowerCase() === lowerName); + + return matchingKey ? mapping[matchingKey] : placeholderName; +} diff --git a/processor/transform/mdxish/restore-snake-case-component-name.ts b/processor/transform/mdxish/restore-snake-case-component-name.ts new file mode 100644 index 000000000..8b9c42029 --- /dev/null +++ b/processor/transform/mdxish/restore-snake-case-component-name.ts @@ -0,0 +1,56 @@ +import type { SnakeCaseMapping } from './mdxish-snake-case-components'; +import type { Parent, Html } from 'mdast'; +import type { MdxJsxFlowElement } from 'mdast-util-mdx-jsx'; +import type { Plugin } from 'unified'; + +import { visit } from 'unist-util-visit'; + +import { restoreSnakeCase } from './mdxish-snake-case-components'; + +interface Options { + mapping: SnakeCaseMapping; +} + +/** + * Restores snake_case component names from placeholders after parsing. + * Runs after mdxishComponentBlocks converts HTML nodes to mdxJsxFlowElement. + */ +const restoreSnakeCaseComponentNames: Plugin<[Options], Parent> = (options: Options) => { + const { mapping } = options; + + return tree => { + if (!mapping || Object.keys(mapping).length === 0) { + return tree; + } + + visit(tree, 'mdxJsxFlowElement', (node: MdxJsxFlowElement) => { + if (node.name) { + node.name = restoreSnakeCase(node.name, mapping); + } + }); + + // Pre-compile regex patterns for better performance + const regexPatterns = Object.entries(mapping).map(([placeholder, original]) => ({ + regex: new RegExp(`(<\\/?)(${placeholder})(\\s|\\/?>)`, 'gi'), + original, + })); + + visit(tree, 'html', (node: Html) => { + if (node.value) { + let newValue = node.value; + + regexPatterns.forEach(({ regex, original }) => { + newValue = newValue.replace(regex, `$1${original}$3`); + }); + + if (newValue !== node.value) { + node.value = newValue; + } + } + }); + + return tree; + }; +}; + +export default restoreSnakeCaseComponentNames;