From 912acd7961eba7e5dafea1b45ce27bffd53833eb Mon Sep 17 00:00:00 2001 From: echobt Date: Wed, 4 Feb 2026 14:43:47 +0000 Subject: [PATCH 1/3] fix(cli): use safe UTF-8 slicing in import command base64 extraction --- src/cortex-cli/src/import_cmd.rs | 81 +++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/src/cortex-cli/src/import_cmd.rs b/src/cortex-cli/src/import_cmd.rs index 696d93a..38b25f8 100644 --- a/src/cortex-cli/src/import_cmd.rs +++ b/src/cortex-cli/src/import_cmd.rs @@ -357,31 +357,47 @@ fn validate_export_messages(messages: &[ExportMessage]) -> Result<()> { for (idx, message) in messages.iter().enumerate() { // Check for base64-encoded image data in content // Common pattern: "data:image/png;base64,..." or "data:image/jpeg;base64,..." - if let Some(data_uri_start) = message.content.find("data:image/") - && let Some(base64_marker) = message.content[data_uri_start..].find(";base64,") - { - let base64_start = data_uri_start + base64_marker + 8; // 8 = len(";base64,") - let remaining = &message.content[base64_start..]; - - // Find end of base64 data (could end with quote, whitespace, or end of string) - let base64_end = remaining - .find(['"', '\'', ' ', '\n', ')']) - .unwrap_or(remaining.len()); - let base64_data = &remaining[..base64_end]; - - // Validate the base64 data - if !base64_data.is_empty() { - let engine = base64::engine::general_purpose::STANDARD; - if let Err(e) = engine.decode(base64_data) { - bail!( - "Invalid base64 encoding in message {} (role: '{}'): {}\n\ - The image data starting at position {} has invalid base64 encoding.\n\ - Please ensure all embedded images use valid base64 encoding.", - idx + 1, - message.role, - e, - data_uri_start - ); + if let Some(data_uri_start) = message.content.find("data:image/") { + // Use safe slicing with .get() to avoid panics on multi-byte UTF-8 boundaries + let content_after_start = match message.content.get(data_uri_start..) { + Some(s) => s, + None => continue, // Invalid byte offset, skip this message + }; + + if let Some(base64_marker) = content_after_start.find(";base64,") { + let base64_start = data_uri_start + base64_marker + 8; // 8 = len(";base64,") + + // Safe slicing for the remaining content after base64 marker + let remaining = match message.content.get(base64_start..) { + Some(s) => s, + None => continue, // Invalid byte offset, skip this message + }; + + // Find end of base64 data (could end with quote, whitespace, or end of string) + let base64_end = remaining + .find(['"', '\'', ' ', '\n', ')']) + .unwrap_or(remaining.len()); + + // Safe slicing for the base64 data + let base64_data = match remaining.get(..base64_end) { + Some(s) => s, + None => continue, // Invalid byte offset, skip this message + }; + + // Validate the base64 data + if !base64_data.is_empty() { + let engine = base64::engine::general_purpose::STANDARD; + if let Err(e) = engine.decode(base64_data) { + bail!( + "Invalid base64 encoding in message {} (role: '{}'): {}\n\ + The image data starting at position {} has invalid base64 encoding.\n\ + Please ensure all embedded images use valid base64 encoding.", + idx + 1, + message.role, + e, + data_uri_start + ); + } } } } @@ -395,13 +411,24 @@ fn validate_export_messages(messages: &[ExportMessage]) -> Result<()> { // Try to find and validate any base64 in the arguments for (pos, _) in args_str.match_indices(";base64,") { let base64_start = pos + 8; - let remaining = &args_str[base64_start..]; + + // Safe slicing for the remaining content after base64 marker + let remaining = match args_str.get(base64_start..) { + Some(s) => s, + None => continue, // Invalid byte offset, skip this occurrence + }; + let base64_end = remaining .find(|c: char| { c == '"' || c == '\'' || c == ' ' || c == '\n' || c == ')' }) .unwrap_or(remaining.len()); - let base64_data = &remaining[..base64_end]; + + // Safe slicing for the base64 data + let base64_data = match remaining.get(..base64_end) { + Some(s) => s, + None => continue, // Invalid byte offset, skip this occurrence + }; if !base64_data.is_empty() { let engine = base64::engine::general_purpose::STANDARD; From 9b1382878d92e63301c2eaa728e037447a83c602 Mon Sep 17 00:00:00 2001 From: echobt Date: Wed, 4 Feb 2026 14:44:05 +0000 Subject: [PATCH 2/3] fix(notifications): use safe UTF-8 slicing for session IDs --- src/cortex-cli/src/utils/notification.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/cortex-cli/src/utils/notification.rs b/src/cortex-cli/src/utils/notification.rs index 4656e22..8edd2c9 100644 --- a/src/cortex-cli/src/utils/notification.rs +++ b/src/cortex-cli/src/utils/notification.rs @@ -63,7 +63,14 @@ pub fn send_task_notification(session_id: &str, success: bool) -> Result<()> { "Cortex Task Failed" }; - let short_id = &session_id[..8.min(session_id.len())]; + // Use safe UTF-8 slicing - find the last valid char boundary at or before position 8 + let short_id = session_id + .char_indices() + .take_while(|(idx, _)| *idx < 8) + .map(|(idx, ch)| idx + ch.len_utf8()) + .last() + .and_then(|end| session_id.get(..end)) + .unwrap_or(session_id); let body = format!("Session: {}", short_id); let urgency = if success { From 90ddb667a92646252fd91a2e5bb5a5572ee177a0 Mon Sep 17 00:00:00 2001 From: echobt Date: Wed, 4 Feb 2026 14:46:25 +0000 Subject: [PATCH 3/3] fix(resume): use char-aware string truncation for UTF-8 safety --- src/cortex-resume/src/resume_picker.rs | 45 ++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/src/cortex-resume/src/resume_picker.rs b/src/cortex-resume/src/resume_picker.rs index 9bf8832..7b0ee9a 100644 --- a/src/cortex-resume/src/resume_picker.rs +++ b/src/cortex-resume/src/resume_picker.rs @@ -153,12 +153,15 @@ fn format_relative_time(time: &chrono::DateTime) -> String { } } -/// Truncate string to fit width. +/// Truncate string to fit width, handling multi-byte UTF-8 safely. fn truncate_string(s: &str, width: usize) -> String { - if s.len() <= width { + // Count actual character width, not byte length + let char_count = s.chars().count(); + if char_count <= width { s.to_string() } else if width > 3 { - format!("{}...", &s[..width - 3]) + let truncated: String = s.chars().take(width - 3).collect(); + format!("{}...", truncated) } else { s.chars().take(width).collect() } @@ -176,4 +179,40 @@ mod tests { let hour_ago = now - chrono::Duration::hours(2); assert_eq!(format_relative_time(&hour_ago), "2h ago"); } + + #[test] + fn test_truncate_string_ascii() { + // Short string, no truncation needed + assert_eq!(truncate_string("hello", 10), "hello"); + + // Exact fit + assert_eq!(truncate_string("hello", 5), "hello"); + + // Needs truncation + assert_eq!(truncate_string("hello world", 8), "hello..."); + + // Very short width + assert_eq!(truncate_string("hello", 3), "hel"); + assert_eq!(truncate_string("hello", 2), "he"); + } + + #[test] + fn test_truncate_string_utf8() { + // UTF-8 multi-byte characters (Japanese) + let japanese = "こんにちは世界"; // 7 chars + assert_eq!(truncate_string(japanese, 10), japanese); // No truncation + assert_eq!(truncate_string(japanese, 7), japanese); // Exact fit + assert_eq!(truncate_string(japanese, 6), "こんに..."); // Truncated (3 chars + ...) + + // UTF-8 with emoji + let emoji = "Hello 🌍🌎🌏"; // 9 chars: H,e,l,l,o, ,🌍,🌎,🌏 + assert_eq!(truncate_string(emoji, 20), emoji); // No truncation + assert_eq!(truncate_string(emoji, 9), emoji); // Exact fit (9 chars) + assert_eq!(truncate_string(emoji, 8), "Hello..."); // Truncated (5 chars + ...) + + // Mixed UTF-8 and ASCII + let mixed = "路径/path/文件"; // 11 chars + assert_eq!(truncate_string(mixed, 20), mixed); // No truncation + assert_eq!(truncate_string(mixed, 8), "路径/pa..."); // Truncated + } }