Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 182 additions & 16 deletions gts-id/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ pub enum GtsIdError {

/// Result of successfully parsing a single GTS segment.
#[derive(Debug, Clone, PartialEq, Eq)]
#[allow(clippy::struct_excessive_bools)]
pub struct ParsedSegment {
/// The raw segment string (including trailing `~` if present).
pub raw: String,
Expand All @@ -60,6 +61,8 @@ pub struct ParsedSegment {
pub is_type: bool,
/// Whether this segment contains a wildcard `*` token.
pub is_wildcard: bool,
/// Whether this segment is a UUID tail (combined anonymous instance).
pub is_uuid_tail: bool,
}

/// Expected format string for segment error messages.
Expand All @@ -76,6 +79,18 @@ fn expected_format(segment_num: usize) -> &'static str {
}
}

/// Checks whether a string matches the UUID format
/// `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx` (hex digits and dashes only).
#[inline]
#[must_use]
pub fn is_uuid(s: &str) -> bool {
s.len() == 36
&& s.char_indices().all(|(i, c)| match i {
8 | 13 | 18 | 23 => c == '-',
_ => c.is_ascii_hexdigit(),
})
}

/// Validates a GTS segment token without regex.
///
/// Valid tokens: start with `[a-z_]`, followed by `[a-z0-9_]*`.
Expand Down Expand Up @@ -209,6 +224,7 @@ pub fn validate_segment(
ver_minor: None,
is_type,
is_wildcard: false,
is_uuid_tail: false,
};

if !tokens.is_empty() {
Expand Down Expand Up @@ -278,8 +294,10 @@ pub fn validate_segment(

/// Validate a full GTS identifier string.
///
/// Checks the `gts.` prefix, lowercase, no hyphens, length, then splits
/// by `~` and validates each segment via [`validate_segment`].
/// Checks the `gts.` prefix, lowercase, length, then splits by `~` and
/// validates each segment via [`validate_segment`]. Hyphens are rejected
/// in the GTS segments portion but permitted in a trailing UUID
/// (combined anonymous instance, e.g. `gts.type.v1~schema.v1.0~<uuid>`).
///
/// # Arguments
/// * `id` - The raw GTS identifier string
Expand All @@ -304,13 +322,6 @@ pub fn validate_gts_id(id: &str, allow_wildcards: bool) -> Result<Vec<ParsedSegm
});
}

if raw.contains('-') {
return Err(GtsIdError::Id {
id: id.to_owned(),
cause: "must not contain '-'".to_owned(),
});
}

if raw.len() > GTS_MAX_LENGTH {
return Err(GtsIdError::Id {
id: id.to_owned(),
Expand All @@ -321,15 +332,53 @@ pub fn validate_gts_id(id: &str, allow_wildcards: bool) -> Result<Vec<ParsedSegm
let remainder = &raw[GTS_PREFIX.len()..];
let tilde_parts: Vec<&str> = remainder.split('~').collect();

let mut segments_raw = Vec::new();
for i in 0..tilde_parts.len() {
if i < tilde_parts.len() - 1 {
segments_raw.push(format!("{}~", tilde_parts[i]));
if i == tilde_parts.len() - 2 && tilde_parts[i + 1].is_empty() {
break;
// Detect combined anonymous instance: last tilde-part is a UUID.
// e.g. "gts.type.v1~schema.v1.0~7a1d2f34-5678-49ab-9012-abcdef123456"
// The UUID tail is only valid when preceded by at least one type segment (ending with ~).
let uuid_tail: Option<&str> = {
let last = tilde_parts.last().copied().unwrap_or("");
if is_uuid(last) && tilde_parts.len() >= 2 {
Some(last)
} else {
None
}
};

// Reject hyphens in the GTS segments portion (hyphens are only allowed in the UUID tail).
let segments_portion = match uuid_tail {
Some(uuid) => &raw[..raw.len() - uuid.len() - 1], // strip "~<uuid>"
None => raw,
};
if segments_portion.contains('-') {
return Err(GtsIdError::Id {
id: id.to_owned(),
cause: "must not contain '-'".to_owned(),
});
}

// Build the list of raw segment strings, excluding the UUID tail.
// When a UUID tail is present, every preceding tilde-part was followed by '~'
// in the original string, so each is a type segment — append '~' to all of them.
// Otherwise use the standard reconstruction (last part may or may not have '~').
let seg_count = tilde_parts.len() - usize::from(uuid_tail.is_some());
let mut segments_raw: Vec<String> = Vec::new();
for (i, &part) in tilde_parts.iter().enumerate().take(seg_count) {
let is_last = i == seg_count - 1;
if part.is_empty() {
// The only allowed empty part is the single trailing one produced by a
// type-marker `~` at the end (e.g. "gts.v.p.n.t.v1~"). Any other empty
// part means consecutive tildes (e.g. "~~") or a leading tilde, which
// are invalid.
if !(is_last && uuid_tail.is_none()) {
return Err(GtsIdError::Id {
id: id.to_owned(),
cause: format!("empty segment at tilde-part #{}", i + 1),
});
}
} else if is_last && uuid_tail.is_none() {
segments_raw.push(part.to_owned());
} else {
segments_raw.push(tilde_parts[i].to_owned());
segments_raw.push(format!("{part}~"));
}
}

Expand Down Expand Up @@ -362,6 +411,25 @@ pub fn validate_gts_id(id: &str, allow_wildcards: bool) -> Result<Vec<ParsedSegm
parsed_segments.push(parsed);
}

// Append the UUID tail as a special ParsedSegment if present.
// All preceding segments are guaranteed to be type segments because we
// appended '~' to every gts_part in the uuid_tail branch above.
if let Some(uuid) = uuid_tail {
parsed_segments.push(ParsedSegment {
raw: uuid.to_owned(),
offset,
vendor: String::new(),
package: String::new(),
namespace: String::new(),
type_name: String::new(),
ver_major: 0,
ver_minor: None,
is_type: false,
is_wildcard: false,
is_uuid_tail: true,
});
}

Ok(parsed_segments)
}

Expand Down Expand Up @@ -656,9 +724,107 @@ mod tests {
assert!(!segments[1].is_type);
}

#[test]
fn test_gts_id_double_tilde_rejected() {
let err = validate_gts_id("gts.x.test1.events.type.v1.0~~", false).unwrap_err();
match err {
GtsIdError::Id { cause, .. } => {
assert!(cause.contains("empty segment"), "got: {cause}");
}
GtsIdError::Segment { .. } => panic!("expected Id error, got: {err}"),
}
}

#[test]
fn test_gts_id_whitespace_trimmed() {
let segments = validate_gts_id(" gts.x.core.events.event.v1~ ", false).unwrap();
assert_eq!(segments.len(), 1);
}

// ---- is_uuid ----

#[test]
fn test_is_uuid_valid() {
assert!(is_uuid("7a1d2f34-5678-49ab-9012-abcdef123456"));
assert!(is_uuid("00000000-0000-0000-0000-000000000000"));
assert!(is_uuid("ffffffff-ffff-ffff-ffff-ffffffffffff"));
}

#[test]
fn test_is_uuid_invalid() {
assert!(!is_uuid("not-a-uuid"));
assert!(!is_uuid("7a1d2f34-5678-49ab-9012-abcdef12345")); // too short
assert!(!is_uuid("7a1d2f34-5678-49ab-9012-abcdef1234567")); // too long
assert!(!is_uuid("7a1d2f34-5678-49ab-9012-abcdef12345g")); // non-hex char
assert!(!is_uuid("7a1d2f3405678-49ab-9012-abcdef123456")); // dash in wrong place
}

// ---- combined anonymous instance ----

#[test]
fn test_combined_anonymous_instance_valid() {
let segments = validate_gts_id(
"gts.x.core.events.type.v1~x.commerce.orders.order_placed.v1.0~7a1d2f34-5678-49ab-9012-abcdef123456",
false,
)
.unwrap();
assert_eq!(segments.len(), 3);
assert!(segments[0].is_type);
assert!(segments[1].is_type);
assert!(segments[2].is_uuid_tail);
assert!(!segments[2].is_type);
assert_eq!(segments[2].raw, "7a1d2f34-5678-49ab-9012-abcdef123456");
}

#[test]
fn test_combined_anonymous_instance_single_prefix_valid() {
let segments = validate_gts_id(
"gts.x.core.events.type.v1~7a1d2f34-5678-49ab-9012-abcdef123456",
false,
)
.unwrap();
assert_eq!(segments.len(), 2);
assert!(segments[0].is_type);
assert!(segments[1].is_uuid_tail);
}

#[test]
fn test_combined_anonymous_instance_hyphen_in_segments_rejected() {
let err = validate_gts_id(
"gts.x-vendor.core.events.type.v1~x.commerce.orders.order_placed.v1.0~7a1d2f34-5678-49ab-9012-abcdef123456",
false,
)
.unwrap_err();
match err {
GtsIdError::Id { cause, .. } => {
assert!(cause.contains("'-'"), "got: {cause}");
}
GtsIdError::Segment { .. } => panic!("expected Id error, got: {err}"),
}
}

#[test]
fn test_uuid_alone_without_prefix_rejected() {
// A bare UUID with no GTS prefix is not a valid GTS ID
let err = validate_gts_id("7a1d2f34-5678-49ab-9012-abcdef123456", false).unwrap_err();
match err {
GtsIdError::Id { cause, .. } => {
assert!(cause.contains("must start with 'gts.'"), "got: {cause}");
}
GtsIdError::Segment { .. } => panic!("expected Id error, got: {err}"),
}
}

#[test]
fn test_uuid_tail_without_preceding_tilde_rejected() {
// UUID as the only segment (no preceding ~) must be rejected
// "gts." + UUID has no tilde_parts.len() >= 2
let err = validate_gts_id("gts.7a1d2f34-5678-49ab-9012-abcdef123456", false).unwrap_err();
match err {
GtsIdError::Id { cause, .. } => {
assert!(cause.contains("'-'"), "got: {cause}");
}
GtsIdError::Segment { .. } => panic!("expected Id error, got: {err}"),
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
15 changes: 14 additions & 1 deletion gts/src/gts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ pub enum GtsError {

/// Parsed GTS segment containing vendor, package, namespace, type, and version info.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[allow(clippy::struct_excessive_bools)]
pub struct GtsIdSegment {
pub num: usize,
pub offset: usize,
Expand All @@ -41,6 +42,7 @@ pub struct GtsIdSegment {
pub ver_minor: Option<u32>,
pub is_type: bool,
pub is_wildcard: bool,
pub is_uuid_tail: bool,
}

impl GtsIdSegment {
Expand All @@ -62,6 +64,7 @@ impl GtsIdSegment {
ver_minor: None,
is_type: false,
is_wildcard: false,
is_uuid_tail: false,
};

seg.parse_segment_id(&segment)?;
Expand All @@ -85,6 +88,7 @@ impl GtsIdSegment {
self.ver_minor = parsed.ver_minor;
self.is_type = parsed.is_type;
self.is_wildcard = parsed.is_wildcard;
self.is_uuid_tail = parsed.is_uuid_tail;
Ok(())
}
}
Expand Down Expand Up @@ -142,12 +146,16 @@ impl GtsID {
ver_minor: p.ver_minor,
is_type: p.is_type,
is_wildcard: p.is_wildcard,
is_uuid_tail: p.is_uuid_tail,
})
.collect();

// Issue #37: Single-segment instance IDs are prohibited.
// Instance IDs must be chained with at least one type segment (e.g., 'type~instance').
if gts_id_segments.len() == 1
// Exception: combined anonymous instances (UUID tail) are always valid.
let has_uuid_tail = gts_id_segments.last().is_some_and(|s| s.is_uuid_tail);
if !has_uuid_tail
&& gts_id_segments.len() == 1
&& !gts_id_segments[0].is_type
Comment thread
mattgarmon marked this conversation as resolved.
&& !gts_id_segments[0].is_wildcard
{
Expand Down Expand Up @@ -252,6 +260,11 @@ impl GtsID {
return true;
}

// Non-wildcard UUID tail - compare raw segment string (the actual UUID)
if p_seg.is_uuid_tail && p_seg.segment != c_seg.segment {
return false;
}

// Non-wildcard segment - all fields must match exactly
if p_seg.vendor != c_seg.vendor {
return false;
Expand Down
Loading