Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 142 additions & 6 deletions src/cortex-agents/src/mention.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,46 @@
use regex::Regex;
use std::sync::LazyLock;

/// Safely get the string slice up to the given byte position.
///
/// Returns the slice `&text[..pos]` if `pos` is at a valid UTF-8 character boundary.
/// If `pos` is inside a multi-byte character, finds the nearest valid boundary
/// by searching backwards.
fn safe_slice_up_to(text: &str, pos: usize) -> &str {
if pos >= text.len() {
return text;
}
if text.is_char_boundary(pos) {
return &text[..pos];
}
// Find the nearest valid boundary by searching backwards
let mut valid_pos = pos;
while valid_pos > 0 && !text.is_char_boundary(valid_pos) {
valid_pos -= 1;
}
&text[..valid_pos]
}

/// Safely get the string slice from the given byte position to the end.
///
/// Returns the slice `&text[pos..]` if `pos` is at a valid UTF-8 character boundary.
/// If `pos` is inside a multi-byte character, finds the nearest valid boundary
/// by searching forwards.
fn safe_slice_from(text: &str, pos: usize) -> &str {
if pos >= text.len() {
return "";
}
if text.is_char_boundary(pos) {
return &text[pos..];
}
// Find the nearest valid boundary by searching forwards
let mut valid_pos = pos;
while valid_pos < text.len() && !text.is_char_boundary(valid_pos) {
valid_pos += 1;
}
&text[valid_pos..]
}

/// A parsed agent mention from user input.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AgentMention {
Expand Down Expand Up @@ -108,10 +148,10 @@ pub fn extract_mention_and_text(
) -> Option<(AgentMention, String)> {
let mention = find_first_valid_mention(text, valid_agents)?;

// Remove the mention from text
// Remove the mention from text, using safe slicing for UTF-8 boundaries
let mut remaining = String::with_capacity(text.len());
remaining.push_str(&text[..mention.start]);
remaining.push_str(&text[mention.end..]);
remaining.push_str(safe_slice_up_to(text, mention.start));
remaining.push_str(safe_slice_from(text, mention.end));

// Trim and normalize whitespace
let remaining = remaining.trim().to_string();
Expand All @@ -123,7 +163,8 @@ pub fn extract_mention_and_text(
pub fn starts_with_mention(text: &str, valid_agents: &[&str]) -> bool {
let text = text.trim();
if let Some(mention) = find_first_valid_mention(text, valid_agents) {
mention.start == 0 || text[..mention.start].trim().is_empty()
// Use safe slicing to handle UTF-8 boundaries
mention.start == 0 || safe_slice_up_to(text, mention.start).trim().is_empty()
} else {
false
}
Expand Down Expand Up @@ -196,8 +237,8 @@ pub fn parse_message_for_agent(text: &str, valid_agents: &[&str]) -> ParsedAgent

// Check if message starts with @agent
if let Some((mention, remaining)) = extract_mention_and_text(text, valid_agents) {
// Only trigger if mention is at the start
if mention.start == 0 || text[..mention.start].trim().is_empty() {
// Only trigger if mention is at the start, using safe slicing for UTF-8 boundaries
if mention.start == 0 || safe_slice_up_to(text, mention.start).trim().is_empty() {
return ParsedAgentMessage::for_agent(mention.agent_name, remaining, text.to_string());
}
}
Expand Down Expand Up @@ -318,4 +359,99 @@ mod tests {
assert_eq!(mentions[0].agent_name, "my-agent");
assert_eq!(mentions[1].agent_name, "my_agent");
}

// UTF-8 boundary safety tests
#[test]
fn test_safe_slice_up_to_ascii() {
let text = "hello world";
assert_eq!(safe_slice_up_to(text, 5), "hello");
assert_eq!(safe_slice_up_to(text, 0), "");
assert_eq!(safe_slice_up_to(text, 100), "hello world");
}

#[test]
fn test_safe_slice_up_to_multibyte() {
// "こんにちは" - each character is 3 bytes
let text = "こんにちは";
assert_eq!(safe_slice_up_to(text, 3), "こ"); // Valid boundary
assert_eq!(safe_slice_up_to(text, 6), "こん"); // Valid boundary
// Position 4 is inside the second character, should return "こ"
assert_eq!(safe_slice_up_to(text, 4), "こ");
assert_eq!(safe_slice_up_to(text, 5), "こ");
}

#[test]
fn test_safe_slice_from_multibyte() {
let text = "こんにちは";
assert_eq!(safe_slice_from(text, 3), "んにちは"); // Valid boundary
// Position 4 is inside second character, should skip to position 6
assert_eq!(safe_slice_from(text, 4), "にちは");
assert_eq!(safe_slice_from(text, 5), "にちは");
}

#[test]
fn test_extract_mention_with_multibyte_prefix() {
let valid = vec!["general"];

// Multi-byte characters before mention
let result = extract_mention_and_text("日本語 @general search files", &valid);
assert!(result.is_some());
let (mention, remaining) = result.unwrap();
assert_eq!(mention.agent_name, "general");
// The prefix should be preserved without panicking
assert!(remaining.contains("search files"));
}

#[test]
fn test_starts_with_mention_multibyte() {
let valid = vec!["general"];

// Whitespace with multi-byte characters should not cause panic
assert!(starts_with_mention(" @general task", &valid));

// Multi-byte characters before mention - should return false, not panic
assert!(!starts_with_mention("日本語 @general task", &valid));
}

#[test]
fn test_parse_message_for_agent_multibyte() {
let valid = vec!["general"];

// Multi-byte prefix - should not panic
let parsed = parse_message_for_agent("日本語 @general find files", &valid);
// Since mention is not at the start, should not invoke task
assert!(!parsed.should_invoke_task);

// Multi-byte in the prompt (after mention)
let parsed = parse_message_for_agent("@general 日本語を検索", &valid);
assert!(parsed.should_invoke_task);
assert_eq!(parsed.agent, Some("general".to_string()));
assert_eq!(parsed.prompt, "日本語を検索");
}

#[test]
fn test_extract_mention_with_emoji() {
let valid = vec!["general"];

// Emojis are 4 bytes each
let result = extract_mention_and_text("🎉 @general celebrate", &valid);
assert!(result.is_some());
let (mention, remaining) = result.unwrap();
assert_eq!(mention.agent_name, "general");
assert!(remaining.contains("celebrate"));
}

#[test]
fn test_mixed_multibyte_and_ascii() {
let valid = vec!["general"];

// Mix of ASCII, CJK, and emoji
let text = "Hello 世界 🌍 @general search for 日本語";
let result = extract_mention_and_text(text, &valid);
assert!(result.is_some());
let (mention, remaining) = result.unwrap();
assert_eq!(mention.agent_name, "general");
// Should not panic and produce valid output
assert!(!remaining.is_empty());
}
}
81 changes: 54 additions & 27 deletions src/cortex-cli/src/import_cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,31 +357,47 @@ fn validate_export_messages(messages: &[ExportMessage]) -> Result<()> {
for (idx, message) in messages.iter().enumerate() {
// Check for base64-encoded image data in content
// Common pattern: "data:image/png;base64,..." or "data:image/jpeg;base64,..."
if let Some(data_uri_start) = message.content.find("data:image/")
&& let Some(base64_marker) = message.content[data_uri_start..].find(";base64,")
{
let base64_start = data_uri_start + base64_marker + 8; // 8 = len(";base64,")
let remaining = &message.content[base64_start..];

// Find end of base64 data (could end with quote, whitespace, or end of string)
let base64_end = remaining
.find(['"', '\'', ' ', '\n', ')'])
.unwrap_or(remaining.len());
let base64_data = &remaining[..base64_end];

// Validate the base64 data
if !base64_data.is_empty() {
let engine = base64::engine::general_purpose::STANDARD;
if let Err(e) = engine.decode(base64_data) {
bail!(
"Invalid base64 encoding in message {} (role: '{}'): {}\n\
The image data starting at position {} has invalid base64 encoding.\n\
Please ensure all embedded images use valid base64 encoding.",
idx + 1,
message.role,
e,
data_uri_start
);
if let Some(data_uri_start) = message.content.find("data:image/") {
// Use safe slicing with .get() to avoid panics on multi-byte UTF-8 boundaries
let content_after_start = match message.content.get(data_uri_start..) {
Some(s) => s,
None => continue, // Invalid byte offset, skip this message
};

if let Some(base64_marker) = content_after_start.find(";base64,") {
let base64_start = data_uri_start + base64_marker + 8; // 8 = len(";base64,")

// Safe slicing for the remaining content after base64 marker
let remaining = match message.content.get(base64_start..) {
Some(s) => s,
None => continue, // Invalid byte offset, skip this message
};

// Find end of base64 data (could end with quote, whitespace, or end of string)
let base64_end = remaining
.find(['"', '\'', ' ', '\n', ')'])
.unwrap_or(remaining.len());

// Safe slicing for the base64 data
let base64_data = match remaining.get(..base64_end) {
Some(s) => s,
None => continue, // Invalid byte offset, skip this message
};

// Validate the base64 data
if !base64_data.is_empty() {
let engine = base64::engine::general_purpose::STANDARD;
if let Err(e) = engine.decode(base64_data) {
bail!(
"Invalid base64 encoding in message {} (role: '{}'): {}\n\
The image data starting at position {} has invalid base64 encoding.\n\
Please ensure all embedded images use valid base64 encoding.",
idx + 1,
message.role,
e,
data_uri_start
);
}
}
}
}
Expand All @@ -395,13 +411,24 @@ fn validate_export_messages(messages: &[ExportMessage]) -> Result<()> {
// Try to find and validate any base64 in the arguments
for (pos, _) in args_str.match_indices(";base64,") {
let base64_start = pos + 8;
let remaining = &args_str[base64_start..];

// Safe slicing for the remaining content after base64 marker
let remaining = match args_str.get(base64_start..) {
Some(s) => s,
None => continue, // Invalid byte offset, skip this occurrence
};

let base64_end = remaining
.find(|c: char| {
c == '"' || c == '\'' || c == ' ' || c == '\n' || c == ')'
})
.unwrap_or(remaining.len());
let base64_data = &remaining[..base64_end];

// Safe slicing for the base64 data
let base64_data = match remaining.get(..base64_end) {
Some(s) => s,
None => continue, // Invalid byte offset, skip this occurrence
};

if !base64_data.is_empty() {
let engine = base64::engine::general_purpose::STANDARD;
Expand Down
52 changes: 48 additions & 4 deletions src/cortex-cli/src/lock_cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,15 @@
)
}

/// Safely get a string prefix by character count, not byte count.
/// This avoids panics on multi-byte UTF-8 characters.
fn safe_char_prefix(s: &str, max_chars: usize) -> &str {
match s.char_indices().nth(max_chars) {
Some((byte_idx, _)) => &s[..byte_idx],
None => s, // String has fewer than max_chars characters
}
}

/// Get the lock file path.
fn get_lock_file_path() -> PathBuf {
dirs::home_dir()
Expand Down Expand Up @@ -156,7 +165,7 @@
match load_lock_file() {
Ok(lock_file) => lock_file.locked_sessions.iter().any(|entry| {
entry.session_id == session_id
|| session_id.starts_with(&entry.session_id[..8.min(entry.session_id.len())])
|| session_id.starts_with(safe_char_prefix(&entry.session_id, 8))
}),
Err(_) => false,
}
Expand Down Expand Up @@ -300,7 +309,7 @@
serde_json::to_string_pretty(&lock_file.locked_sessions)?
);
} else if lock_file.locked_sessions.is_empty() {
println!("No sessions are locked.");

Check failure

Code scanning / CodeQL

Cleartext logging of sensitive information High

This operation writes
...[...]
to a log file.
println!();
println!("Use 'cortex lock <session-id>' to protect a session.");
} else {
Expand All @@ -308,7 +317,7 @@
println!("{}", "-".repeat(60));

for entry in &lock_file.locked_sessions {
let short_id = &entry.session_id[..8.min(entry.session_id.len())];
let short_id = safe_char_prefix(&entry.session_id, 8);
println!(" {} - locked at {}", short_id, entry.locked_at);
if let Some(ref reason) = entry.reason {
println!(" Reason: {}", reason);
Expand All @@ -332,7 +341,7 @@
e.session_id == args.session_id
|| args
.session_id
.starts_with(&e.session_id[..8.min(e.session_id.len())])
.starts_with(safe_char_prefix(&e.session_id, 8))
});

if is_locked {
Expand All @@ -342,7 +351,7 @@
e.session_id == args.session_id
|| args
.session_id
.starts_with(&e.session_id[..8.min(e.session_id.len())])
.starts_with(safe_char_prefix(&e.session_id, 8))
}) && let Some(ref reason) = entry.reason
{
println!("Reason: {}", reason);
Expand Down Expand Up @@ -508,4 +517,39 @@
let path_str = path.to_string_lossy();
assert!(path_str.contains(".cortex"));
}

#[test]
fn test_safe_char_prefix_ascii() {
// ASCII strings should work correctly
assert_eq!(safe_char_prefix("abcdefghij", 8), "abcdefgh");
assert_eq!(safe_char_prefix("abc", 8), "abc");
assert_eq!(safe_char_prefix("", 8), "");
assert_eq!(safe_char_prefix("12345678", 8), "12345678");
}

#[test]
fn test_safe_char_prefix_utf8_multibyte() {
// Multi-byte UTF-8 characters should not panic
// Each emoji is 4 bytes, so 8 chars = 32 bytes
let emoji_id = "🔥🎉🚀💡🌟✨🎯🔮extra";
assert_eq!(safe_char_prefix(emoji_id, 8), "🔥🎉🚀💡🌟✨🎯🔮");

// Mixed ASCII and multi-byte
let mixed = "ab🔥cd🎉ef";
assert_eq!(safe_char_prefix(mixed, 4), "ab🔥c");
assert_eq!(safe_char_prefix(mixed, 8), "ab🔥cd🎉ef");

// Chinese characters (3 bytes each)
let chinese = "中文测试会话标识符";
assert_eq!(safe_char_prefix(chinese, 4), "中文测试");
}

#[test]
fn test_safe_char_prefix_boundary() {
// Edge cases
assert_eq!(safe_char_prefix("a", 0), "");
assert_eq!(safe_char_prefix("a", 1), "a");
assert_eq!(safe_char_prefix("🔥", 1), "🔥");
assert_eq!(safe_char_prefix("🔥", 0), "");
}
}
9 changes: 8 additions & 1 deletion src/cortex-cli/src/utils/notification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,14 @@ pub fn send_task_notification(session_id: &str, success: bool) -> Result<()> {
"Cortex Task Failed"
};

let short_id = &session_id[..8.min(session_id.len())];
// Use safe UTF-8 slicing - find the last valid char boundary at or before position 8
let short_id = session_id
.char_indices()
.take_while(|(idx, _)| *idx < 8)
.map(|(idx, ch)| idx + ch.len_utf8())
.last()
.and_then(|end| session_id.get(..end))
.unwrap_or(session_id);
let body = format!("Session: {}", short_id);

let urgency = if success {
Expand Down
Loading
Loading