-
-
Notifications
You must be signed in to change notification settings - Fork 20
create internal links & improved sanitizeUrl for non-english characters #6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,51 @@ | ||||||||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||||||||
| * convert internal links in markdown to the correct url | ||||||||||||||||||||||||||||||||||
| * | ||||||||||||||||||||||||||||||||||
| * detects links like [link text](/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX) and [link text](XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX) | ||||||||||||||||||||||||||||||||||
| * if the link matches a page id in the pages array, it replaces the link with the page url | ||||||||||||||||||||||||||||||||||
| * @param {*} mdString | ||||||||||||||||||||||||||||||||||
| * @param {*} pages | ||||||||||||||||||||||||||||||||||
| * @returns | ||||||||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||||||||
| export function convertInternalLinks(mdString, pages) { | ||||||||||||||||||||||||||||||||||
| // find all link url in markdown | ||||||||||||||||||||||||||||||||||
| const links = mdString.match(/\[.*?\]\(.*?\)/g); | ||||||||||||||||||||||||||||||||||
| if (!links) { | ||||||||||||||||||||||||||||||||||
| return mdString; | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
|
Comment on lines
+12
to
+15
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add error handling for malformed markdown links The current regex pattern might fail with malformed markdown links. Consider adding error handling and using a more robust pattern. export function convertInternalLinks(mdString, pages) {
+ if (!mdString || typeof mdString !== 'string') {
+ throw new Error('mdString must be a non-empty string');
+ }
+ if (!Array.isArray(pages)) {
+ throw new Error('pages must be an array');
+ }
+
// find all link url in markdown
- const links = mdString.match(/\[.*?\]\(.*?\)/g);
+ const links = mdString.match(/\[([^\]]*)\]\(([^)]+)\)/g);
if (!links) {
return mdString;
}📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||||||||||||||
| links.forEach(link => { | ||||||||||||||||||||||||||||||||||
| // find the page title in the link | ||||||||||||||||||||||||||||||||||
| let title = link.match(/\[(.*?)\]/)[1]; | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| // find the page slug in the link | ||||||||||||||||||||||||||||||||||
| const slug = link.match(/\((.*?)\)/)[1]; | ||||||||||||||||||||||||||||||||||
| let targetPageId = null; | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| // skip external links | ||||||||||||||||||||||||||||||||||
| if (slug.startsWith("http")) { | ||||||||||||||||||||||||||||||||||
| return; | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| // inline links start with "/" and do not have "-" in the slug | ||||||||||||||||||||||||||||||||||
| if (slug.startsWith("/") && slug.length === 33) { | ||||||||||||||||||||||||||||||||||
| // remove the leading "/" from the slug and add "-" like XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX | ||||||||||||||||||||||||||||||||||
| targetPageId = slug.substring(1, 9) + "-" + slug.substring(9, 13) + "-" + slug.substring(13, 17) + "-" + slug.substring(17, 21) + "-" + slug.substring(21); | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
| // block links are like XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX without the leading "/" | ||||||||||||||||||||||||||||||||||
| else if (slug.length === 36) { | ||||||||||||||||||||||||||||||||||
| targetPageId = slug; | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
|
Comment on lines
+30
to
+37
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Simplify UUID formatting logic using regex The current implementation manually formats the UUID string. Consider using regex for a more maintainable solution. - if (slug.startsWith("/") && slug.length === 33) {
- // remove the leading "/" from the slug and add "-" like XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
- targetPageId = slug.substring(1, 9) + "-" + slug.substring(9, 13) + "-" + slug.substring(13, 17) + "-" + slug.substring(17, 21) + "-" + slug.substring(21);
- }
- // block links are like XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX without the leading "/"
- else if (slug.length === 36) {
- targetPageId = slug;
- }
+ const uuidPattern = /^(?:\/)?([0-9a-f]{8})([0-9a-f]{4})([0-9a-f]{4})([0-9a-f]{4})([0-9a-f]{12})$/i;
+ const match = slug.match(uuidPattern);
+ if (match) {
+ targetPageId = match.slice(1).join('-');
+ }📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| // find the page id in the pages array | ||||||||||||||||||||||||||||||||||
| const page = pages.find(page => page.id === targetPageId); | ||||||||||||||||||||||||||||||||||
| // if the page exists, replace the link with the page url | ||||||||||||||||||||||||||||||||||
| if (page) { | ||||||||||||||||||||||||||||||||||
| // this seems to be a bug in the original code. Some links have the title "link_to_page" | ||||||||||||||||||||||||||||||||||
| if (title === "link_to_page") { | ||||||||||||||||||||||||||||||||||
| title = page.title; | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
| mdString = mdString.replace(link, `[${title}](/posts/${page.slug})`); | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
| }); | ||||||||||||||||||||||||||||||||||
| return mdString; | ||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,12 @@ | ||
| export function sanitizeUrl(str) { | ||
| return str | ||
| .normalize('NFD') // decompose combined graphemes into base characters and diacritical marks | ||
| .replace(/[\u0300-\u036f]/g, "") // remove diacritical marks | ||
|
Comment on lines
+3
to
+4
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix the character class regex for diacritical marks removal The current regex - .normalize('NFD') // decompose combined graphemes into base characters and diacritical marks
- .replace(/[\u0300-\u036f]/g, "") // remove diacritical marks
+ .normalize('NFD')
+ .replace(/([^\u0300-\u036f]|\n)/g, "$1") // preserve base characters while removing combining marks
🧰 Tools🪛 Biome (1.9.4)[error] 4-4: A character class cannot match a character and a combining character. A character and a combining character forms a new character. Replace the character class with an alternation. (lint/suspicious/noMisleadingCharacterClass) |
||
| .toLowerCase() // convert to lowercase | ||
| .replace('ä', 'ae') | ||
| .replace('ö', 'oe') | ||
| .replace('ü', 'ue') | ||
| .replace('ß', 'ss') | ||
| .replace(/[^a-z0-9]+/g, "-") // replace non-alphanumeric characters with hyphens | ||
| .replace(/(^-|-$)+/g, ""); // remove leading/trailing hyphens | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🛠️ Refactor suggestion
Add error handling for link conversion and update reading time calculation
The reading time should be calculated after converting internal links, and error handling should be added for the conversion process.
const mdblocks = await n2m.pageToMarkdown(page.id); let { parent: mdString } = n2m.toMarkdownString(mdblocks); - const estimatedReadingTime = readingTime(mdString || '').text; + try { + mdString = convertInternalLinks(mdString, pages); + } catch (error) { + console.error(`Error converting internal links for page ${page.id}:`, error); + // Continue with original mdString + } - mdString = convertInternalLinks(mdString, pages); + const estimatedReadingTime = readingTime(mdString || '').text;📝 Committable suggestion