-
Notifications
You must be signed in to change notification settings - Fork 3
fix(cli): use safe UTF-8 slicing in import command base64 extraction #31
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -357,31 +357,47 @@ fn validate_export_messages(messages: &[ExportMessage]) -> Result<()> { | |
| for (idx, message) in messages.iter().enumerate() { | ||
| // Check for base64-encoded image data in content | ||
| // Common pattern: "data:image/png;base64,..." or "data:image/jpeg;base64,..." | ||
| if let Some(data_uri_start) = message.content.find("data:image/") | ||
| && let Some(base64_marker) = message.content[data_uri_start..].find(";base64,") | ||
| { | ||
| let base64_start = data_uri_start + base64_marker + 8; // 8 = len(";base64,") | ||
| let remaining = &message.content[base64_start..]; | ||
|
|
||
| // Find end of base64 data (could end with quote, whitespace, or end of string) | ||
| let base64_end = remaining | ||
| .find(['"', '\'', ' ', '\n', ')']) | ||
| .unwrap_or(remaining.len()); | ||
| let base64_data = &remaining[..base64_end]; | ||
|
|
||
| // Validate the base64 data | ||
| if !base64_data.is_empty() { | ||
| let engine = base64::engine::general_purpose::STANDARD; | ||
| if let Err(e) = engine.decode(base64_data) { | ||
| bail!( | ||
| "Invalid base64 encoding in message {} (role: '{}'): {}\n\ | ||
| The image data starting at position {} has invalid base64 encoding.\n\ | ||
| Please ensure all embedded images use valid base64 encoding.", | ||
| idx + 1, | ||
| message.role, | ||
| e, | ||
| data_uri_start | ||
| ); | ||
| if let Some(data_uri_start) = message.content.find("data:image/") { | ||
| // Use safe slicing with .get() to avoid panics on multi-byte UTF-8 boundaries | ||
| let content_after_start = match message.content.get(data_uri_start..) { | ||
| Some(s) => s, | ||
| None => continue, // Invalid byte offset, skip this message | ||
| }; | ||
|
|
||
| if let Some(base64_marker) = content_after_start.find(";base64,") { | ||
| let base64_start = data_uri_start + base64_marker + 8; // 8 = len(";base64,") | ||
|
|
||
| // Safe slicing for the remaining content after base64 marker | ||
| let remaining = match message.content.get(base64_start..) { | ||
| Some(s) => s, | ||
| None => continue, // Invalid byte offset, skip this message | ||
| }; | ||
|
Comment on lines
+371
to
+374
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The arithmetic Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time! Prompt To Fix With AIThis is a comment left during a code review.
Path: src/cortex-cli/src/import_cmd.rs
Line: 371:374
Comment:
The arithmetic `data_uri_start + base64_marker + 8` could potentially result in an out-of-bounds index if the string ends unexpectedly. While using `.get()` prevents panics, silently continuing on `None` means validation is skipped for potentially malformed data. Consider whether this should be an error instead of silently continuing.
<sub>Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!</sub>
How can I resolve this? If you propose a fix, please make it concise. |
||
|
|
||
| // Find end of base64 data (could end with quote, whitespace, or end of string) | ||
| let base64_end = remaining | ||
| .find(['"', '\'', ' ', '\n', ')']) | ||
| .unwrap_or(remaining.len()); | ||
|
|
||
| // Safe slicing for the base64 data | ||
| let base64_data = match remaining.get(..base64_end) { | ||
| Some(s) => s, | ||
| None => continue, // Invalid byte offset, skip this message | ||
| }; | ||
|
|
||
| // Validate the base64 data | ||
| if !base64_data.is_empty() { | ||
| let engine = base64::engine::general_purpose::STANDARD; | ||
| if let Err(e) = engine.decode(base64_data) { | ||
| bail!( | ||
| "Invalid base64 encoding in message {} (role: '{}'): {}\n\ | ||
| The image data starting at position {} has invalid base64 encoding.\n\ | ||
| Please ensure all embedded images use valid base64 encoding.", | ||
| idx + 1, | ||
| message.role, | ||
| e, | ||
| data_uri_start | ||
| ); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -395,13 +411,24 @@ fn validate_export_messages(messages: &[ExportMessage]) -> Result<()> { | |
| // Try to find and validate any base64 in the arguments | ||
| for (pos, _) in args_str.match_indices(";base64,") { | ||
| let base64_start = pos + 8; | ||
| let remaining = &args_str[base64_start..]; | ||
|
|
||
| // Safe slicing for the remaining content after base64 marker | ||
| let remaining = match args_str.get(base64_start..) { | ||
| Some(s) => s, | ||
| None => continue, // Invalid byte offset, skip this occurrence | ||
| }; | ||
|
|
||
| let base64_end = remaining | ||
| .find(|c: char| { | ||
| c == '"' || c == '\'' || c == ' ' || c == '\n' || c == ')' | ||
| }) | ||
| .unwrap_or(remaining.len()); | ||
| let base64_data = &remaining[..base64_end]; | ||
|
|
||
| // Safe slicing for the base64 data | ||
| let base64_data = match remaining.get(..base64_end) { | ||
| Some(s) => s, | ||
| None => continue, // Invalid byte offset, skip this occurrence | ||
| }; | ||
|
|
||
| if !base64_data.is_empty() { | ||
| let engine = base64::engine::general_purpose::STANDARD; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Silently skipping validation when
.get()returnsNonecould hide issues. Ifdata_uri_startis a valid position found by.find(), then.get(data_uri_start..)should succeed unless there's memory corruption. Consider logging a warning or returning an error instead of continuing, since this indicates potentially malformed data that should be caught.Prompt To Fix With AI