From 0c0b76e664361dd21844343ddd210671bd665a8e Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 1 Aug 2025 23:07:44 +0100 Subject: [PATCH 1/3] Add test for unmatched inline code --- src/wrap/tokenize.rs | 3 ++- tests/wrap/tokenize_markdown.rs | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/wrap/tokenize.rs b/src/wrap/tokenize.rs index a839ca2e..08ba8bc1 100644 --- a/src/wrap/tokenize.rs +++ b/src/wrap/tokenize.rs @@ -129,13 +129,14 @@ fn next_token(s: &str) -> Option<(Token<'_>, usize)> { } let delim_len = s.chars().take_while(|&c| c == '`').count(); if delim_len == 0 { - return Some((Token::Text(s), s.len())); + return Some((Token::Text(&s[..1]), 1)); } let closing = &s[..delim_len]; if let Some(end) = s[delim_len..].find(closing) { let code = &s[delim_len..delim_len + end]; return Some((Token::Code(code), delim_len + end + delim_len)); } + return Some((Token::Text(&s[..delim_len]), delim_len)); } Some((Token::Text(s), s.len())) } diff --git a/tests/wrap/tokenize_markdown.rs b/tests/wrap/tokenize_markdown.rs index 33f67f0b..5fce5012 100644 --- a/tests/wrap/tokenize_markdown.rs +++ b/tests/wrap/tokenize_markdown.rs @@ -50,3 +50,17 @@ fn incorrect_fence_length_is_text() { ] ); } +#[test] +fn unmatched_inline_code_is_text() { + let source = "bad `code span"; + let tokens = wrap::tokenize_markdown(source); + assert_eq!( + tokens, + vec![ + Token::Text("bad "), + Token::Text("`"), + Token::Text("code span"), + ] + ); +} + From a31c3f84c39897283b6aba063209173c7211e568 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 1 Aug 2025 23:46:57 +0100 Subject: [PATCH 2/3] Handle empty inline code delimiter --- src/wrap/tokenize.rs | 4 +++- tests/wrap/tokenize_markdown.rs | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/wrap/tokenize.rs b/src/wrap/tokenize.rs index 08ba8bc1..af3f12d1 100644 --- a/src/wrap/tokenize.rs +++ b/src/wrap/tokenize.rs @@ -129,7 +129,9 @@ fn next_token(s: &str) -> Option<(Token<'_>, usize)> { } let delim_len = s.chars().take_while(|&c| c == '`').count(); if delim_len == 0 { - return Some((Token::Text(&s[..1]), 1)); + let first_len = s.chars().next().unwrap().len_utf8(); + let next = s[first_len..].find('`').map_or(s.len(), |i| first_len + i); + return Some((Token::Text(&s[..next]), next)); } let closing = &s[..delim_len]; if let Some(end) = s[delim_len..].find(closing) { diff --git a/tests/wrap/tokenize_markdown.rs b/tests/wrap/tokenize_markdown.rs index 5fce5012..4e41a826 100644 --- a/tests/wrap/tokenize_markdown.rs +++ b/tests/wrap/tokenize_markdown.rs @@ -64,3 +64,16 @@ fn unmatched_inline_code_is_text() { ); } +#[test] +fn multiple_unmatched_backticks_are_text() { + let source = "``bad code"; + let tokens = wrap::tokenize_markdown(source); + assert_eq!( + tokens, + vec![ + Token::Text("``"), + Token::Text("bad code"), + ] + ); +} + From 23139a77ee702c10d28e480b15dd3ed3af064dfc Mon Sep 17 00:00:00 2001 From: Leynos Date: Sun, 3 Aug 2025 23:05:40 +0100 Subject: [PATCH 3/3] Handle text up to next backtick --- src/wrap/tokenize.rs | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/wrap/tokenize.rs b/src/wrap/tokenize.rs index af3f12d1..7214630e 100644 --- a/src/wrap/tokenize.rs +++ b/src/wrap/tokenize.rs @@ -123,24 +123,20 @@ fn next_token(s: &str) -> Option<(Token<'_>, usize)> { if s.is_empty() { return None; } - if let Some(pos) = s.find('`') { - if pos > 0 { + let delim_len = s.chars().take_while(|&c| c == '`').count(); + if delim_len == 0 { + if let Some(pos) = s.find('`') { return Some((Token::Text(&s[..pos]), pos)); } - let delim_len = s.chars().take_while(|&c| c == '`').count(); - if delim_len == 0 { - let first_len = s.chars().next().unwrap().len_utf8(); - let next = s[first_len..].find('`').map_or(s.len(), |i| first_len + i); - return Some((Token::Text(&s[..next]), next)); - } - let closing = &s[..delim_len]; - if let Some(end) = s[delim_len..].find(closing) { - let code = &s[delim_len..delim_len + end]; - return Some((Token::Code(code), delim_len + end + delim_len)); - } - return Some((Token::Text(&s[..delim_len]), delim_len)); + return Some((Token::Text(s), s.len())); + } + + let closing = &s[..delim_len]; + if let Some(end) = s[delim_len..].find(closing) { + let code = &s[delim_len..delim_len + end]; + return Some((Token::Code(code), delim_len + end + delim_len)); } - Some((Token::Text(s), s.len())) + Some((Token::Text(closing), delim_len)) } fn tokenize_inline<'a, F>(mut rest: &'a str, mut emit: F)