diff --git a/__tests__/lib/mdxish/magic-blocks.test.ts b/__tests__/lib/mdxish/magic-blocks.test.ts index 30596908e..a3d8129c6 100644 --- a/__tests__/lib/mdxish/magic-blocks.test.ts +++ b/__tests__/lib/mdxish/magic-blocks.test.ts @@ -56,92 +56,187 @@ ${JSON.stringify( const ast = mdxish(md); - // Some extra children are added to the AST by the mdxish wrapper - expect(ast.children).toHaveLength(4); - expect(ast.children[2].type).toBe('element'); + expect(ast.children).toHaveLength(2); + expect(ast.children[1].type).toBe('element'); - const element = ast.children[2] as Element; + const element = ast.children[1] as Element; expect(element.tagName).toBe('table'); expect(element.children).toHaveLength(2); expect((element.children[0] as Element).tagName).toBe('thead'); expect((element.children[1] as Element).tagName).toBe('tbody'); }); + }); - it('should convert html content inside table cells as nodes in the ast', () => { - const md = ` -[block:parameters] -${JSON.stringify( - { - data: { - 'h-0': 'Header 0', - 'h-1': 'Header 1', - '0-0': '

this should be a h1 element node

', - '0-1': 'this should be a strong element node', - }, - cols: 2, - rows: 1, - }, - null, - 2, -)} -[/block]`; + describe('general tests', () => { + it('should restore image block inside a list item', () => { + const md = `- First item +- [block:image]{"images":[{"image":["https://example.com/img.png",null,null]}]}[/block]`; const ast = mdxish(md); - // Some extra children are added to the AST by the mdxish wrapper - expect(ast.children).toHaveLength(4); - // Table is the 3rd child - const element = ast.children[2] as Element; - expect(element.tagName).toBe('table'); - expect(element.children).toHaveLength(2); - expect((element.children[1] as Element).tagName).toBe('tbody'); + const listElement = ast.children.find(c => c.type === 'element' && (c as Element).tagName === 'ul') as Element; + expect(listElement).toBeDefined(); - // Check that HTML in cells is parsed as element nodes - const tbody = element.children[1] as Element; - const row = tbody.children[0] as Element; - const cell0 = row.children[0] as Element; - const cell1 = row.children[1] as Element; + const imageElement = listElement.children + .filter((li): li is Element => li.type === 'element') + .flatMap((li: Element) => li.children || []) + .find((c): c is Element => c.type === 'element' && c.tagName === 'img'); - expect((cell0.children[0] as Element).tagName).toBe('h1'); - expect((cell1.children[0] as Element).tagName).toBe('strong'); + expect(imageElement).toBeDefined(); + expect(imageElement!.tagName).toBe('img'); + expect(imageElement!.properties.src).toBe('https://example.com/img.png'); }); - it('should restore markdown content inside table cells', () => { - const md = ` -[block:parameters] -${JSON.stringify( - { - data: { - 'h-0': 'Header 0', - 'h-1': 'Header 1', - '0-0': '**Bold**', - '0-1': '*Italic*', - }, - cols: 2, - rows: 1, - }, - null, - 2, -)} -[/block]`; + it('should restore code block inside a list item', () => { + const md = `- First item +- [block:code]{"codes":[{"code":"const x = 1;","language":"javascript"}]}[/block]`; const ast = mdxish(md); - // Some extra children are added to the AST by the mdxish wrapper - expect(ast.children).toHaveLength(4); - // Table is the 3rd child - const element = ast.children[2] as Element; - expect(element.tagName).toBe('table'); - expect(element.children).toHaveLength(2); - expect((element.children[1] as Element).tagName).toBe('tbody'); + const listElement = ast.children.find(c => c.type === 'element' && (c as Element).tagName === 'ul') as Element; + expect(listElement).toBeDefined(); + + const codeElement = listElement.children + .filter((li): li is Element => li.type === 'element') + .flatMap((li: Element) => li.children || []) + .find((c): c is Element => c.type === 'element' && c.tagName === 'CodeTabs'); + + expect(codeElement).toBeDefined(); + expect(codeElement!.tagName).toBe('CodeTabs'); + }); + + it('should restore api-header block inside a list item', () => { + const md = `- First item +- [block:api-header]{"title":"API Endpoint","level":2}[/block]`; + + const ast = mdxish(md); + + const listElement = ast.children.find(c => c.type === 'element' && (c as Element).tagName === 'ul') as Element; + expect(listElement).toBeDefined(); + + const headingElement = listElement.children + .filter((li): li is Element => li.type === 'element') + .flatMap((li: Element) => li.children || []) + .find((c): c is Element => c.type === 'element' && ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(c.tagName)); + + expect(headingElement).toBeDefined(); + expect(headingElement!.tagName).toBe('h2'); + }); + + // TODO: unskip this test once callout magic blocks are correctly supported + it.skip('should restore callout block inside a list item', () => { + const md = `- First item +- [block:callout]{"type":"info","title":"Note","body":"This is important"}[/block]`; + + const ast = mdxish(md); + + const listElement = ast.children.find(c => c.type === 'element' && (c as Element).tagName === 'ul') as Element; + expect(listElement).toBeDefined(); + + const calloutElement = listElement.children + .filter((li): li is Element => li.type === 'element') + .flatMap((li: Element) => li.children || []) + .find((c): c is Element => c.type === 'element' && c.tagName === 'Callout'); + + expect(calloutElement).toBeDefined(); + // rehypeMdxishComponents maps rdme-callout -> Callout + expect(calloutElement!.tagName).toBe('Callout'); + }); + + it('should restore parameters block inside a list item', () => { + const md = `- First item +- [block:parameters]{"data":{"h-0":"Name","h-1":"Type","0-0":"id","0-1":"string"},"cols":2,"rows":1}[/block]`; + + const ast = mdxish(md); + + const listElement = ast.children.find(c => c.type === 'element' && (c as Element).tagName === 'ul') as Element; + expect(listElement).toBeDefined(); + + const tableElement = listElement.children + .filter((li): li is Element => li.type === 'element') + .flatMap((li: Element) => li.children || []) + .find((c): c is Element => c.type === 'element' && c.tagName === 'table'); + + expect(tableElement).toBeDefined(); + expect(tableElement!.tagName).toBe('table'); + }); + + // TODO: unskip this test once embed magic blocks are supported + it.skip('should restore embed block inside a list item', () => { + const md = `- First item +- [block:embed]{"url":"https://www.youtube.com/watch?v=dQw4w9WgXcQ","title":"Video"}[/block]`; + + const ast = mdxish(md); + + const listElement = ast.children.find(c => c.type === 'element' && (c as Element).tagName === 'ul') as Element; + expect(listElement).toBeDefined(); + + const embedElement = listElement.children + .filter((li): li is Element => li.type === 'element') + .flatMap((li: Element) => li.children || []) + .find((c): c is Element => c.type === 'element' && c.tagName === 'rdme-embed'); + + expect(embedElement).toBeDefined(); + expect(embedElement!.tagName).toBe('rdme-embed'); + }); + + it('should restore html block inside a list item', () => { + const md = `- First item +- [block:html]{"html":"
Hello World
"}[/block]`; + + const ast = mdxish(md); + const listElement = ast.children.find(c => c.type === 'element' && (c as Element).tagName === 'ul') as Element; + expect(listElement).toBeDefined(); + + const htmlElement = listElement.children + .filter((li): li is Element => li.type === 'element') + .flatMap((li: Element) => li.children || []) + .find((c): c is Element => c.type === 'element' && c.tagName === 'HTMLBlock'); + + expect(htmlElement).toBeDefined(); + expect(htmlElement!.tagName).toBe('HTMLBlock'); + }); + + // TODO: unskip this test once recipe magic blocks are correctly supported + it.skip('should restore recipe block inside a list item', () => { + const md = `- open +- [block:tutorial-tile]{"emoji":"🦉","slug":"whoaaa","title":"WHOAAA"}[/block]`; + + const ast = mdxish(md); + + const listElement = ast.children.find(c => c.type === 'element' && (c as Element).tagName === 'ul') as Element; + expect(listElement).toBeDefined(); + + const recipeElement = listElement.children + .filter((li): li is Element => li.type === 'element') + .flatMap((li: Element) => li.children || []) + .find((c): c is Element => c.type === 'element' && c.tagName === 'Recipe'); + + expect(recipeElement).toBeDefined(); + expect(recipeElement!.tagName).toBe('Recipe'); + expect(recipeElement!.properties.slug).toBe('whoaaa'); + expect(recipeElement!.properties.title).toBe('WHOAAA'); + }); + + // TODO: unskip this test once recipe magic blocks are correctly supported + it.skip('should restore recipe block (recipe type) inside a list item', () => { + const md = `- open +- [block:recipe]{"emoji":"👉","slug":"test-recipe","title":"Test Recipe"}[/block]`; + + const ast = mdxish(md); + + const listElement = ast.children.find(c => c.type === 'element' && (c as Element).tagName === 'ul') as Element; + expect(listElement).toBeDefined(); - const tbody = element.children[1] as Element; - const row = tbody.children[0] as Element; - const cell0 = row.children[0] as Element; - const cell1 = row.children[1] as Element; + const recipeElement = listElement.children + .filter((li): li is Element => li.type === 'element') + .flatMap((li: Element) => li.children || []) + .find((c): c is Element => c.type === 'element' && c.tagName === 'Recipe'); - expect((cell0.children[0] as Element).tagName).toBe('strong'); - expect((cell1.children[0] as Element).tagName).toBe('em'); + expect(recipeElement).toBeDefined(); + expect(recipeElement!.tagName).toBe('Recipe'); + expect(recipeElement!.properties.slug).toBe('test-recipe'); + expect(recipeElement!.properties.title).toBe('Test Recipe'); }); }); -}); \ No newline at end of file +}); diff --git a/lib/mdxish.ts b/lib/mdxish.ts index c20b5caf1..10a7ae759 100644 --- a/lib/mdxish.ts +++ b/lib/mdxish.ts @@ -54,7 +54,7 @@ export function mdxish(mdContent: string, opts: MdxishOpts = {}): Root { }; // Preprocess content: extract legacy magic blocks and evaluate JSX attribute expressions - const { replaced, blocks } = extractMagicBlocks(mdContent); + const { replaced, blocks } = extractMagicBlocks(mdContent, false); const processedContent = preprocessJSXExpressions(replaced, jsxContext); // Create string map of components for tailwind transformer diff --git a/lib/utils/extractMagicBlocks.ts b/lib/utils/extractMagicBlocks.ts index b11240f26..fda34c7d0 100644 --- a/lib/utils/extractMagicBlocks.ts +++ b/lib/utils/extractMagicBlocks.ts @@ -17,7 +17,7 @@ const MAGIC_BLOCK_REGEX = /\[block:[^\]]{1,100}\](?:(?!\[block:)(?!\[\/block\])[ * Extract legacy magic block syntax from a markdown string. * Returns the modified markdown and an array of extracted blocks. */ -export function extractMagicBlocks(markdown: string) { +export function extractMagicBlocks(markdown: string, prependNewline: boolean = true) { const blocks: BlockHit[] = []; let index = 0; @@ -31,7 +31,7 @@ export function extractMagicBlocks(markdown: string) { * - Prepend a newline to the token to ensure it is parsed as a block level node */ const key = `__MAGIC_BLOCK_${index}__`; - const token = `\n\`${key}\``; + const token = prependNewline ? `\n\`${key}\`` : `\`${key}\``; blocks.push({ key, raw: match, token }); index += 1; @@ -44,10 +44,10 @@ export function extractMagicBlocks(markdown: string) { /** * Restore extracted magic blocks back into a markdown string. */ -export function restoreMagicBlocks(replaced: string, blocks: BlockHit[]) { +export function restoreMagicBlocks(replaced: string, blocks: BlockHit[]) { let content = replaced; - // If a magic block is at the start of the document, the extraction token's prepended + // If a magic block is at the start of the document, the extraction token's prepended // newline will have been trimmed during processing. We need to account for that here // to ensure the token is found and replaced correctly. const isTokenAtStart = content.startsWith(blocks[0]?.token.trimStart()); diff --git a/processor/transform/mdxish/mdxish-magic-blocks.ts b/processor/transform/mdxish/mdxish-magic-blocks.ts index 4a28a4d6c..88ce17041 100644 --- a/processor/transform/mdxish/mdxish-magic-blocks.ts +++ b/processor/transform/mdxish/mdxish-magic-blocks.ts @@ -388,6 +388,48 @@ const magicBlockRestorer: Plugin<[{ blocks: BlockHit[] }], MdastRoot> = const magicBlockKeys = new Map(blocks.map(({ key, raw }) => [key, raw] as const)); // Find inlineCode nodes that match our placeholder tokens + // We need to collect modifications first to avoid index issues during iteration + const modifications: { + children: RootContent[]; + paragraphIndex: number; + parent: Parent; + }[] = []; + + /** + * Check if a node is a flow (block-level) element that cannot be a child of a paragraph. + * Flow elements include: code, heading, image, figure, table, blockquote, list, html, + * mdxJsxFlowElement, and custom block types like rdme-callout, embed, html-block, etc. + * Note: In magic blocks, images are always block-level, even though MDAST allows inline images. + */ + const isFlowElement = (node: RootContent): boolean => { + // Phrasing/inline element types that CAN be children of paragraphs + const phrasingTypes = new Set([ + 'text', + 'emphasis', + 'strong', + 'delete', + 'inlineCode', + 'break', + 'link', + 'footnoteReference', + 'mdxJsxTextElement', + ]); + + // If it's not a phrasing element, it's a flow element + // Note: 'image' is not in phrasingTypes because magic block images are always block-level + return !phrasingTypes.has(node.type); + }; + + // First pass: collect all inlineCode and code nodes that need to be replaced + const inlineCodeReplacements: { + children: RootContent[]; + index: number; + isFlowElement: boolean; + nodeType: 'code' | 'inlineCode'; + parent: Parent; + }[] = []; + + // Visit inlineCode nodes (for non-indented magic blocks) visit(tree, 'inlineCode', (node: Code, index: number, parent: Parent) => { if (!parent || index == null) return; const raw = magicBlockKeys.get(node.value); @@ -397,7 +439,205 @@ const magicBlockRestorer: Plugin<[{ blocks: BlockHit[] }], MdastRoot> = const children = parseMagicBlock(raw) as unknown as RootContent[]; if (!children.length) return; - parent.children.splice(index, 1, ...children); + // Check if the first child is a flow element (block-level element) + // Flow elements cannot be children of paragraphs, so we need to unwrap the paragraph + const isFlow = isFlowElement(children[0]); + + inlineCodeReplacements.push({ children, index, isFlowElement: isFlow, nodeType: 'inlineCode', parent }); + }); + + // Visit code nodes (for indented magic blocks that were parsed as code blocks) + visit(tree, 'code', (node: Code, index: number, parent: Parent) => { + if (!parent || index == null) return; + // Code blocks have a 'value' property + const codeValue = (node as { value?: string }).value; + if (!codeValue) return; + + // Try to find matching magic block key (handle whitespace/newlines) + // The code block value might be exactly the key, or might have extra whitespace + const trimmedValue = codeValue.trim(); + let raw = magicBlockKeys.get(trimmedValue); + + // If not found, try matching any line that contains a magic block key + // This handles cases where the code block has multiple lines or extra content + if (!raw && trimmedValue.includes('__MAGIC_BLOCK_')) { + // Try to extract the magic block key from the code value using regex + const keyMatch = trimmedValue.match(/__MAGIC_BLOCK_\d+__/); + if (keyMatch) { + raw = magicBlockKeys.get(keyMatch[0]); + } + + // Fallback: try matching line by line using array methods + if (!raw) { + const lines = trimmedValue.split('\n'); + const matchingLine = lines.find(line => { + const key = line.trim(); + return magicBlockKeys.has(key); + }); + if (matchingLine) { + raw = magicBlockKeys.get(matchingLine.trim()); + } + } + } + + if (!raw) return; + + // Parse the original magic block and replace the placeholder with the result + const children = parseMagicBlock(raw) as unknown as RootContent[]; + if (!children.length) return; + + // Code blocks are always flow elements + inlineCodeReplacements.push({ children, index, isFlowElement: true, nodeType: 'code', parent }); + }); + + // Second pass: replace paragraphs containing flow elements with the flow elements directly + inlineCodeReplacements.forEach(({ children, index, isFlowElement: isFlow, nodeType, parent }) => { + // Handle code blocks that were parsed as code blocks (not inline code) + if (nodeType === 'code') { + // Code blocks are flow elements, so we need to replace them directly in their parent + // If code block is in a listItem, we need to replace it while preserving other children + if (parent.type === 'listItem') { + // Code blocks in listItems should be replaced directly + // Other children of the listItem (like nested lists) will be preserved automatically + // because we're only replacing the code block node itself + modifications.push({ children, paragraphIndex: index, parent }); + return; + } + + // Otherwise, replace the code block directly + parent.children.splice(index, 1, ...children); + return; + } + + // Handle inlineCode nodes + if (!isFlow || parent.type !== 'paragraph') { + parent.children.splice(index, 1, ...children); + return; + } + + let paragraphParent: Parent | undefined; + visit(tree, 'paragraph', (p, pIndex, pParent) => { + if (p === parent && pParent && 'children' in pParent) { + paragraphParent = pParent as Parent; + return false; + } + return undefined; + }); + + if (!paragraphParent) return; + + const paragraphIndex = paragraphParent.children.indexOf(parent as RootContent); + if (paragraphIndex === -1) return; + + if (paragraphParent.type === 'listItem') { + // When replacing a paragraph in a listItem, we need to preserve: + // 1. Any content before the magic block token within the paragraph + // 2. Any content after the magic block token within the paragraph + // 3. Other children of the listItem (like nested lists) that come after the paragraph + // These are automatically preserved because we're only replacing the paragraph node itself + const paragraph = parent as { children: RootContent[] }; + const beforeContent = paragraph.children.slice(0, index); + const afterContent = paragraph.children.slice(index + 1); + + // Build the nodes to insert: content before → flow element(s) → content after + const nodesToInsert: RootContent[] = []; + + // If there's content before the magic block, keep it in a paragraph + if (beforeContent.length > 0) { + nodesToInsert.push({ + type: 'paragraph', + children: beforeContent, + } as RootContent); + } + + // Add the flow element(s) + nodesToInsert.push(...children); + + // If there's content after the magic block token in the paragraph + if (afterContent.length > 0) { + // Check if afterContent contains only phrasing/inline content + const phrasingTypes = new Set([ + 'text', + 'emphasis', + 'strong', + 'delete', + 'inlineCode', + 'break', + 'link', + 'footnoteReference', + 'mdxJsxTextElement', + ]); + const hasOnlyPhrasing = afterContent.every(node => phrasingTypes.has(node.type)); + + if (hasOnlyPhrasing) { + // Inline content can't be after a flow element, so wrap it in a paragraph + nodesToInsert.push({ + type: 'paragraph', + children: afterContent, + } as RootContent); + } else { + // Flow elements should already be separate nodes (shouldn't happen in practice) + nodesToInsert.push(...afterContent); + } + } + + // Replace the paragraph with the new nodes + // The splice operation will preserve other children of the listItem (like nested lists) + // that come after this paragraph + if (nodesToInsert.length > 0) { + modifications.push({ children: nodesToInsert, paragraphIndex, parent: paragraphParent }); + } + return; + } + + if (paragraphParent.type === 'root' && paragraphIndex > 0) { + const prevSibling = paragraphParent.children[paragraphIndex - 1]; + if (prevSibling.type === 'list' && 'children' in prevSibling && Array.isArray(prevSibling.children)) { + const list = prevSibling as { children: RootContent[] }; + if (list.children.length > 0) { + const lastListItem = list.children[list.children.length - 1]; + if (lastListItem && 'children' in lastListItem && Array.isArray(lastListItem.children)) { + modifications.push({ children: [], paragraphIndex, parent: paragraphParent }); + modifications.push({ + children, + paragraphIndex: (lastListItem.children as RootContent[]).length, + parent: lastListItem as unknown as Parent, + }); + return; + } + } + } + } + + modifications.push({ children, paragraphIndex, parent: paragraphParent }); + }); + + // Apply modifications (replacing paragraphs/code blocks with flow elements) + // Separate modifications into appends (to list items) and replacements + const appends: typeof modifications = []; + const replacements: typeof modifications = []; + + modifications.forEach(mod => { + // If we're appending to a list item (index equals length), use append logic + if (mod.parent.type === 'listItem' && mod.paragraphIndex === mod.parent.children.length) { + appends.push(mod); + } else { + replacements.push(mod); + } + }); + + // Apply appends first (they don't affect indices) + appends.forEach(({ children: modChildren, paragraphIndex, parent: modParent }) => { + modParent.children.splice(paragraphIndex, 0, ...modChildren); + }); + + // Then apply replacements in reverse order (bottom-up to avoid index shifting) + // This ensures that when we replace nodes, we don't affect indices of nodes we haven't processed yet + replacements.reverse().forEach(({ children: modChildren, paragraphIndex, parent: modParent }) => { + // Ensure we're not going out of bounds + if (paragraphIndex >= 0 && paragraphIndex < modParent.children.length) { + modParent.children.splice(paragraphIndex, 1, ...modChildren); + } }); };