Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/astronot.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { config } from 'dotenv';
import { parseArgs } from 'node:util';
import { sanitizeUrl, sanitizeImageString } from './helpers/sanitize.mjs';
import { hashString, downloadImage } from './helpers/images.mjs';
import { convertInternalLinks } from './helpers/links.mjs';
import { delay } from './helpers/delay.mjs';

// Input Arguments
Expand Down Expand Up @@ -123,10 +124,12 @@ const pages = results.map((page) => {
for (let page of pages) {
console.info("Fetching from Notion & Converting to Markdown: ", `${page.title} [${page.id}]`);
const mdblocks = await n2m.pageToMarkdown(page.id);
const { parent: mdString } = n2m.toMarkdownString(mdblocks);
let { parent: mdString } = n2m.toMarkdownString(mdblocks);

const estimatedReadingTime = readingTime(mdString || '').text;

mdString = convertInternalLinks(mdString, pages);
Comment on lines +127 to +131
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add error handling for link conversion and update reading time calculation

The reading time should be calculated after converting internal links, and error handling should be added for the conversion process.

   const mdblocks = await n2m.pageToMarkdown(page.id);
   let { parent: mdString } = n2m.toMarkdownString(mdblocks);
 
-  const estimatedReadingTime = readingTime(mdString || '').text;
+  try {
+    mdString = convertInternalLinks(mdString, pages);
+  } catch (error) {
+    console.error(`Error converting internal links for page ${page.id}:`, error);
+    // Continue with original mdString
+  }
 
-  mdString = convertInternalLinks(mdString, pages);
+  const estimatedReadingTime = readingTime(mdString || '').text;
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
let { parent: mdString } = n2m.toMarkdownString(mdblocks);
const estimatedReadingTime = readingTime(mdString || '').text;
mdString = convertInternalLinks(mdString, pages);
let { parent: mdString } = n2m.toMarkdownString(mdblocks);
try {
mdString = convertInternalLinks(mdString, pages);
} catch (error) {
console.error(`Error converting internal links for page ${page.id}:`, error);
// Continue with original mdString
}
const estimatedReadingTime = readingTime(mdString || '').text;


// Download Cover Image
const coverFileName = page.cover ? await downloadImage(page.cover, { isCover: true }) : '';
if (coverFileName) console.info("Cover image downloaded:", coverFileName)
Expand Down
51 changes: 51 additions & 0 deletions src/helpers/links.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/**
* convert internal links in markdown to the correct url
*
* detects links like [link text](/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX) and [link text](XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX)
* if the link matches a page id in the pages array, it replaces the link with the page url
* @param {*} mdString
* @param {*} pages
* @returns
*/
export function convertInternalLinks(mdString, pages) {
// find all link url in markdown
const links = mdString.match(/\[.*?\]\(.*?\)/g);
if (!links) {
return mdString;
}
Comment on lines +12 to +15
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add error handling for malformed markdown links

The current regex pattern might fail with malformed markdown links. Consider adding error handling and using a more robust pattern.

 export function convertInternalLinks(mdString, pages) {
+    if (!mdString || typeof mdString !== 'string') {
+        throw new Error('mdString must be a non-empty string');
+    }
+    if (!Array.isArray(pages)) {
+        throw new Error('pages must be an array');
+    }
+
     // find all link url in markdown
-    const links = mdString.match(/\[.*?\]\(.*?\)/g);
+    const links = mdString.match(/\[([^\]]*)\]\(([^)]+)\)/g);
     if (!links) {
         return mdString;
     }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
const links = mdString.match(/\[.*?\]\(.*?\)/g);
if (!links) {
return mdString;
}
if (!mdString || typeof mdString !== 'string') {
throw new Error('mdString must be a non-empty string');
}
if (!Array.isArray(pages)) {
throw new Error('pages must be an array');
}
// find all link url in markdown
const links = mdString.match(/\[([^\]]*)\]\(([^)]+)\)/g);
if (!links) {
return mdString;
}

links.forEach(link => {
// find the page title in the link
let title = link.match(/\[(.*?)\]/)[1];

// find the page slug in the link
const slug = link.match(/\((.*?)\)/)[1];
let targetPageId = null;

// skip external links
if (slug.startsWith("http")) {
return;
}

// inline links start with "/" and do not have "-" in the slug
if (slug.startsWith("/") && slug.length === 33) {
// remove the leading "/" from the slug and add "-" like XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
targetPageId = slug.substring(1, 9) + "-" + slug.substring(9, 13) + "-" + slug.substring(13, 17) + "-" + slug.substring(17, 21) + "-" + slug.substring(21);
}
// block links are like XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX without the leading "/"
else if (slug.length === 36) {
targetPageId = slug;
}
Comment on lines +30 to +37
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Simplify UUID formatting logic using regex

The current implementation manually formats the UUID string. Consider using regex for a more maintainable solution.

-        if (slug.startsWith("/") && slug.length === 33) {
-            // remove the leading "/" from the slug and add "-" like XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
-            targetPageId = slug.substring(1, 9) + "-" + slug.substring(9, 13) + "-" + slug.substring(13, 17) + "-" + slug.substring(17, 21) + "-" + slug.substring(21);
-        }
-        // block links are like XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX without the leading "/"
-        else if (slug.length === 36) {
-            targetPageId = slug;
-        }
+        const uuidPattern = /^(?:\/)?([0-9a-f]{8})([0-9a-f]{4})([0-9a-f]{4})([0-9a-f]{4})([0-9a-f]{12})$/i;
+        const match = slug.match(uuidPattern);
+        if (match) {
+            targetPageId = match.slice(1).join('-');
+        }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if (slug.startsWith("/") && slug.length === 33) {
// remove the leading "/" from the slug and add "-" like XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
targetPageId = slug.substring(1, 9) + "-" + slug.substring(9, 13) + "-" + slug.substring(13, 17) + "-" + slug.substring(17, 21) + "-" + slug.substring(21);
}
// block links are like XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX without the leading "/"
else if (slug.length === 36) {
targetPageId = slug;
}
const uuidPattern = /^(?:\/)?([0-9a-f]{8})([0-9a-f]{4})([0-9a-f]{4})([0-9a-f]{4})([0-9a-f]{12})$/i;
const match = slug.match(uuidPattern);
if (match) {
targetPageId = match.slice(1).join('-');
}


// find the page id in the pages array
const page = pages.find(page => page.id === targetPageId);
// if the page exists, replace the link with the page url
if (page) {
// this seems to be a bug in the original code. Some links have the title "link_to_page"
if (title === "link_to_page") {
title = page.title;
}
mdString = mdString.replace(link, `[${title}](/posts/${page.slug})`);
}
});
return mdString;
}
6 changes: 6 additions & 0 deletions src/helpers/sanitize.mjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
export function sanitizeUrl(str) {
return str
.normalize('NFD') // decompose combined graphemes into base characters and diacritical marks
.replace(/[\u0300-\u036f]/g, "") // remove diacritical marks
Comment on lines +3 to +4
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Fix the character class regex for diacritical marks removal

The current regex /[\u0300-\u036f]/g for removing diacritical marks has a potential issue with matching combined characters. Consider using a more robust approach.

-    .normalize('NFD') // decompose combined graphemes into base characters and diacritical marks
-    .replace(/[\u0300-\u036f]/g, "") // remove diacritical marks
+    .normalize('NFD')
+    .replace(/([^\u0300-\u036f]|\n)/g, "$1") // preserve base characters while removing combining marks

Committable suggestion skipped: line range outside the PR's diff.

🧰 Tools
🪛 Biome (1.9.4)

[error] 4-4: A character class cannot match a character and a combining character.

A character and a combining character forms a new character. Replace the character class with an alternation.

(lint/suspicious/noMisleadingCharacterClass)

.toLowerCase() // convert to lowercase
.replace('ä', 'ae')
.replace('ö', 'oe')
.replace('ü', 'ue')
.replace('ß', 'ss')
.replace(/[^a-z0-9]+/g, "-") // replace non-alphanumeric characters with hyphens
.replace(/(^-|-$)+/g, ""); // remove leading/trailing hyphens
}
Expand Down