diff --git a/Cargo.lock b/Cargo.lock index c14db40..dcabfa8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -641,6 +641,7 @@ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" name = "gts" version = "0.7.8" dependencies = [ + "gts-id", "jsonschema", "schemars", "serde", @@ -675,11 +676,19 @@ dependencies = [ "walkdir", ] +[[package]] +name = "gts-id" +version = "0.7.8" +dependencies = [ + "thiserror 2.0.17", +] + [[package]] name = "gts-macros" version = "0.7.8" dependencies = [ "gts", + "gts-id", "jsonschema", "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 1fe99c3..b7ab8b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ categories = ["development-tools::build-utils"] readme = "README.md" [workspace] -members = ["gts", "gts-cli", "gts-macros", "gts-macros-cli"] +members = ["gts", "gts-cli", "gts-id", "gts-macros", "gts-macros-cli"] resolver = "2" [workspace.lints.rust] @@ -142,6 +142,7 @@ module_name_repetitions = "allow" [workspace.dependencies] gts = { version = "0.7.8", path = "gts" } gts-cli = { version = "0.7.8", path = "gts-cli" } +gts-id = { version = "0.7.8", path = "gts-id" } gts-macros = { version = "0.7.8", path = "gts-macros" } gts-macros-cli = { version = "0.7.8", path = "gts-macros-cli" } diff --git a/gts-id/Cargo.toml b/gts-id/Cargo.toml new file mode 100644 index 0000000..f95a124 --- /dev/null +++ b/gts-id/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "gts-id" +version = "0.7.8" +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true +description = "GTS ID validation and parsing primitives" +keywords = ["gts"] +categories.workspace = true +readme.workspace = true +publish = true + +[lints] +workspace = true + +[dependencies] +thiserror.workspace = true diff --git a/gts-id/src/lib.rs b/gts-id/src/lib.rs new file mode 100644 index 0000000..f40cc1b --- /dev/null +++ b/gts-id/src/lib.rs @@ -0,0 +1,664 @@ +//! Shared GTS ID validation and parsing primitives. +//! +//! This crate provides the single source of truth for GTS identifier validation, +//! used by both the `gts` runtime library and the `gts-macros` proc-macro crate. + +use thiserror::Error; + +/// The required prefix for all GTS identifiers. +pub const GTS_PREFIX: &str = "gts."; + +/// Maximum allowed length for a GTS identifier string. +pub const GTS_MAX_LENGTH: usize = 1024; + +/// Errors from GTS ID validation. +#[derive(Debug, Error)] +pub enum GtsIdError { + /// A specific segment within the ID is invalid. + #[error("Segment #{num}: {cause}")] + Segment { + /// 1-based segment number. + num: usize, + /// Byte offset of this segment within the full ID string. + offset: usize, + /// The raw segment string that failed validation. + segment: String, + /// Human-readable description of the problem. + cause: String, + }, + + /// The ID as a whole is invalid (prefix, case, length, etc.). + #[error("Invalid GTS ID: {cause}")] + Id { + /// The raw ID string that failed validation. + id: String, + /// Human-readable description of the problem. + cause: String, + }, +} + +/// Result of successfully parsing a single GTS segment. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ParsedSegment { + /// The raw segment string (including trailing `~` if present). + pub raw: String, + /// Byte offset of this segment within the full ID string. + pub offset: usize, + /// Vendor token (1st dot-separated token). + pub vendor: String, + /// Package token (2nd dot-separated token). + pub package: String, + /// Namespace token (3rd dot-separated token). + pub namespace: String, + /// Type name token (4th dot-separated token). + pub type_name: String, + /// Major version number. + pub ver_major: u32, + /// Optional minor version number. + pub ver_minor: Option, + /// Whether this segment ends with `~` (type marker). + pub is_type: bool, + /// Whether this segment contains a wildcard `*` token. + pub is_wildcard: bool, +} + +/// Expected format string for segment error messages. +/// +/// Segment #1 shows the `gts.` prefix because the user writes +/// `gts.vendor.package...`; segments #2+ omit it because they +/// come after a `~` delimiter. +#[must_use] +fn expected_format(segment_num: usize) -> &'static str { + if segment_num == 1 { + "gts.vendor.package.namespace.type.vMAJOR[.MINOR]" + } else { + "vendor.package.namespace.type.vMAJOR[.MINOR]" + } +} + +/// Validates a GTS segment token without regex. +/// +/// Valid tokens: start with `[a-z_]`, followed by `[a-z0-9_]*`. +#[inline] +#[must_use] +pub fn is_valid_segment_token(token: &str) -> bool { + if token.is_empty() { + return false; + } + let mut chars = token.chars(); + match chars.next() { + Some(c) if c.is_ascii_lowercase() || c == '_' => {} + _ => return false, + } + chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') +} + +/// Parse a `u32` and reject leading zeros (except `"0"` itself). +#[inline] +#[must_use] +pub fn parse_u32_exact(value: &str) -> Option { + let parsed = value.parse::().ok()?; + if parsed.to_string() == value { + Some(parsed) + } else { + None + } +} + +/// Validate and parse a single GTS segment (the part between `~` markers). +/// +/// # Arguments +/// * `segment_num` - 1-based segment number (used in error messages and format hints) +/// * `segment` - The raw segment string, possibly including a trailing `~` +/// * `allow_wildcards` - If `true`, a trailing wildcard `*` token is accepted as the final token +/// +/// # Errors +/// Returns a human-readable error message if the segment is invalid. +pub fn validate_segment( + segment_num: usize, + segment: &str, + allow_wildcards: bool, +) -> Result { + let mut seg = segment.to_owned(); + let mut is_type = false; + + // Check for type marker (~) + if seg.contains('~') { + let tilde_count = seg.matches('~').count(); + if tilde_count > 1 { + return Err("Too many '~' characters".to_owned()); + } + if seg.ends_with('~') { + is_type = true; + seg.pop(); + } else { + return Err("'~' must be at the end".to_owned()); + } + } + + let tokens: Vec<&str> = seg.split('.').collect(); + let fmt = expected_format(segment_num); + + if tokens.len() > 6 { + return Err(format!( + "Too many tokens (got {}, max 6). Expected format: {fmt}", + tokens.len() + )); + } + + let ends_with_wildcard = allow_wildcards && seg.ends_with('*'); + + if !ends_with_wildcard && tokens.len() < 5 { + return Err(format!( + "Too few tokens (got {}, min 5). Expected format: {fmt}", + tokens.len() + )); + } + + // Detect extra name token before version (e.g., vendor.package.namespace.type.extra.v1) + if !ends_with_wildcard && tokens.len() == 6 { + let has_wildcard = allow_wildcards && tokens.contains(&"*"); + if !has_wildcard + && !tokens[4].starts_with('v') + && tokens[5].starts_with('v') + && is_valid_segment_token(tokens[4]) + { + return Err(format!( + "Too many name tokens before version (got 5, expected 4). Expected format: {fmt}" + )); + } + } + + // Validate first 4 tokens (vendor, package, namespace, type). + // A trailing '*' wildcard is allowed as the final token, but all tokens + // before it must still pass validation. Wildcards in the middle + // (e.g., "x.*.ns.type.v1") are rejected because '*' fails is_valid_segment_token. + for (i, token) in tokens.iter().take(4).enumerate() { + if allow_wildcards && *token == "*" { + if i == tokens.len() - 1 { + break; // '*' as final token is handled in the parsing section below + } + return Err("Wildcard '*' is only allowed as the final token".to_owned()); + } + if !is_valid_segment_token(token) { + let token_name = match i { + 0 => "vendor", + 1 => "package", + 2 => "namespace", + 3 => "type", + _ => "token", + }; + return Err(format!( + "Invalid {token_name} token '{token}'. \ + Must start with [a-z_] and contain only [a-z0-9_]" + )); + } + } + + // Build the result, parsing tokens progressively. + // Offset is set to 0 here; callers like validate_gts_id() override it + // with the actual position within the full ID string. + let mut result = ParsedSegment { + raw: segment.to_owned(), + offset: 0, + vendor: String::new(), + package: String::new(), + namespace: String::new(), + type_name: String::new(), + ver_major: 0, + ver_minor: None, + is_type, + is_wildcard: false, + }; + + if !tokens.is_empty() { + if allow_wildcards && tokens[0] == "*" { + result.is_wildcard = true; + return Ok(result); + } + tokens[0].clone_into(&mut result.vendor); + } + + if tokens.len() > 1 { + if allow_wildcards && tokens[1] == "*" { + result.is_wildcard = true; + return Ok(result); + } + tokens[1].clone_into(&mut result.package); + } + + if tokens.len() > 2 { + if allow_wildcards && tokens[2] == "*" { + result.is_wildcard = true; + return Ok(result); + } + tokens[2].clone_into(&mut result.namespace); + } + + if tokens.len() > 3 { + if allow_wildcards && tokens[3] == "*" { + result.is_wildcard = true; + return Ok(result); + } + tokens[3].clone_into(&mut result.type_name); + } + + if tokens.len() > 4 { + if allow_wildcards && tokens[4] == "*" { + if 4 != tokens.len() - 1 { + return Err("Wildcard '*' is only allowed as the final token".to_owned()); + } + result.is_wildcard = true; + return Ok(result); + } + + if !tokens[4].starts_with('v') { + return Err("Major version must start with 'v'".to_owned()); + } + + let major_str = &tokens[4][1..]; + result.ver_major = parse_u32_exact(major_str) + .ok_or_else(|| format!("Major version must be an integer, got '{major_str}'"))?; + } + + if tokens.len() > 5 { + if allow_wildcards && tokens[5] == "*" { + result.is_wildcard = true; + return Ok(result); + } + + result.ver_minor = Some( + parse_u32_exact(tokens[5]) + .ok_or_else(|| format!("Minor version must be an integer, got '{}'", tokens[5]))?, + ); + } + + Ok(result) +} + +/// Validate a full GTS identifier string. +/// +/// Checks the `gts.` prefix, lowercase, no hyphens, length, then splits +/// by `~` and validates each segment via [`validate_segment`]. +/// +/// # Arguments +/// * `id` - The raw GTS identifier string +/// * `allow_wildcards` - If `true`, wildcard `*` tokens are accepted +/// +/// # Errors +/// Returns [`GtsIdError`] on validation failure. +pub fn validate_gts_id(id: &str, allow_wildcards: bool) -> Result, GtsIdError> { + let raw = id.trim(); + + if !raw.starts_with(GTS_PREFIX) { + return Err(GtsIdError::Id { + id: id.to_owned(), + cause: format!("must start with '{GTS_PREFIX}'"), + }); + } + + if raw != raw.to_lowercase() { + return Err(GtsIdError::Id { + id: id.to_owned(), + cause: "must be lowercase".to_owned(), + }); + } + + if raw.contains('-') { + return Err(GtsIdError::Id { + id: id.to_owned(), + cause: "must not contain '-'".to_owned(), + }); + } + + if raw.len() > GTS_MAX_LENGTH { + return Err(GtsIdError::Id { + id: id.to_owned(), + cause: format!("too long ({} chars, max {GTS_MAX_LENGTH})", raw.len()), + }); + } + + let remainder = &raw[GTS_PREFIX.len()..]; + let tilde_parts: Vec<&str> = remainder.split('~').collect(); + + let mut segments_raw = Vec::new(); + for i in 0..tilde_parts.len() { + if i < tilde_parts.len() - 1 { + segments_raw.push(format!("{}~", tilde_parts[i])); + if i == tilde_parts.len() - 2 && tilde_parts[i + 1].is_empty() { + break; + } + } else { + segments_raw.push(tilde_parts[i].to_owned()); + } + } + + if segments_raw.is_empty() { + return Err(GtsIdError::Id { + id: id.to_owned(), + cause: "no segments found".to_owned(), + }); + } + + let mut parsed_segments = Vec::new(); + let mut offset = GTS_PREFIX.len(); + for (i, seg) in segments_raw.iter().enumerate() { + if seg.is_empty() || seg == "~" { + return Err(GtsIdError::Id { + id: id.to_owned(), + cause: format!("segment #{} @ offset {offset} is empty", i + 1), + }); + } + + let mut parsed = + validate_segment(i + 1, seg, allow_wildcards).map_err(|cause| GtsIdError::Segment { + num: i + 1, + offset, + segment: seg.clone(), + cause, + })?; + parsed.offset = offset; + offset += seg.len(); + parsed_segments.push(parsed); + } + + Ok(parsed_segments) +} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::expect_used)] +mod tests { + use super::*; + + // ---- is_valid_segment_token ---- + + #[test] + fn test_valid_tokens() { + assert!(is_valid_segment_token("abc")); + assert!(is_valid_segment_token("a1b2")); + assert!(is_valid_segment_token("_private")); + assert!(is_valid_segment_token("a_b_c")); + } + + #[test] + fn test_invalid_tokens() { + assert!(!is_valid_segment_token("")); + assert!(!is_valid_segment_token("1abc")); + assert!(!is_valid_segment_token("ABC")); + assert!(!is_valid_segment_token("a-b")); + assert!(!is_valid_segment_token("a.b")); + } + + // ---- parse_u32_exact ---- + + #[test] + fn test_parse_u32_exact_valid() { + assert_eq!(parse_u32_exact("0"), Some(0)); + assert_eq!(parse_u32_exact("1"), Some(1)); + assert_eq!(parse_u32_exact("42"), Some(42)); + } + + #[test] + fn test_parse_u32_exact_rejects_leading_zeros() { + assert_eq!(parse_u32_exact("01"), None); + assert_eq!(parse_u32_exact("007"), None); + } + + #[test] + fn test_parse_u32_exact_rejects_non_numeric() { + assert_eq!(parse_u32_exact("abc"), None); + assert_eq!(parse_u32_exact(""), None); + } + + // ---- validate_segment ---- + + #[test] + fn test_valid_segment_basic() { + let parsed = validate_segment(1, "x.core.events.event.v1~", false).unwrap(); + assert_eq!(parsed.vendor, "x"); + assert_eq!(parsed.package, "core"); + assert_eq!(parsed.namespace, "events"); + assert_eq!(parsed.type_name, "event"); + assert_eq!(parsed.ver_major, 1); + assert_eq!(parsed.ver_minor, None); + assert!(parsed.is_type); + assert!(!parsed.is_wildcard); + } + + #[test] + fn test_valid_segment_with_minor() { + let parsed = validate_segment(1, "x.core.events.event.v1.2~", false).unwrap(); + assert_eq!(parsed.ver_major, 1); + assert_eq!(parsed.ver_minor, Some(2)); + } + + #[test] + fn test_segment_too_many_tildes() { + let err = validate_segment(1, "x.core.events.event.v1~~", false).unwrap_err(); + assert!(err.contains("Too many '~' characters"), "got: {err}"); + } + + #[test] + fn test_segment_tilde_not_at_end() { + let err = validate_segment(1, "x.core~mid.events.event.v1", false).unwrap_err(); + assert!(err.contains("'~' must be at the end"), "got: {err}"); + } + + #[test] + fn test_segment_too_many_tokens() { + let err = validate_segment(1, "x.core.events.event.v1.2.extra~", false).unwrap_err(); + assert!(err.contains("Too many tokens"), "got: {err}"); + } + + #[test] + fn test_segment_too_few_tokens() { + let err = validate_segment(1, "x.core.events.event~", false).unwrap_err(); + assert!(err.contains("Too few tokens"), "got: {err}"); + } + + #[test] + fn test_segment_too_many_name_tokens() { + let err = validate_segment(2, "x.core.ns.type.extra.v1~", false).unwrap_err(); + assert!( + err.contains("Too many name tokens before version"), + "got: {err}" + ); + } + + #[test] + fn test_segment_version_without_v() { + let err = validate_segment(1, "x.core.events.event.1~", false).unwrap_err(); + assert!( + err.contains("Major version must start with 'v'"), + "got: {err}" + ); + } + + #[test] + fn test_segment_version_not_integer() { + let err = validate_segment(1, "x.core.events.event.vX~", false).unwrap_err(); + assert!( + err.contains("Major version must be an integer"), + "got: {err}" + ); + } + + #[test] + fn test_segment_version_leading_zeros() { + let err = validate_segment(1, "x.core.events.event.v01~", false).unwrap_err(); + assert!( + err.contains("Major version must be an integer"), + "got: {err}" + ); + } + + #[test] + fn test_segment_invalid_vendor_token() { + let err = validate_segment(1, "1bad.core.events.event.v1~", false).unwrap_err(); + assert!(err.contains("Invalid vendor token"), "got: {err}"); + } + + // ---- expected_format ---- + + #[test] + fn test_segment1_format_has_gts_prefix() { + let err = validate_segment(1, "x.core.events.event~", false).unwrap_err(); + assert!( + err.contains("gts.vendor.package.namespace.type.vMAJOR"), + "segment #1 format should include gts. prefix, got: {err}" + ); + } + + #[test] + fn test_segment2_format_no_gts_prefix() { + let err = validate_segment(2, "x.core.events.event~", false).unwrap_err(); + assert!( + !err.contains("gts.vendor"), + "segment #2 format should NOT include gts. prefix, got: {err}" + ); + assert!( + err.contains("vendor.package.namespace.type.vMAJOR"), + "segment #2 should show vendor.package format, got: {err}" + ); + } + + // ---- wildcards ---- + + #[test] + fn test_wildcard_at_vendor() { + let parsed = validate_segment(1, "*", true).unwrap(); + assert!(parsed.is_wildcard); + } + + #[test] + fn test_wildcard_at_package() { + let parsed = validate_segment(1, "x.*", true).unwrap(); + assert!(parsed.is_wildcard); + assert_eq!(parsed.vendor, "x"); + } + + #[test] + fn test_wildcard_invalid_token_before_star() { + // Tokens before '*' must still be validated + let err = validate_segment(1, "1bad.*", true).unwrap_err(); + assert!(err.contains("Invalid vendor token"), "got: {err}"); + } + + #[test] + fn test_wildcard_in_middle_rejected() { + // '*' in a non-final position must be rejected + let err = validate_segment(1, "x.*.ns.type.v1", true).unwrap_err(); + assert!( + err.contains("only allowed as the final token"), + "got: {err}" + ); + } + + #[test] + fn test_wildcard_at_version_position_not_final() { + // '*' at version position (4) with extra token after it must be rejected + let err = validate_segment(1, "x.pkg.ns.type.*.extra", true).unwrap_err(); + assert!( + err.contains("only allowed as the final token"), + "got: {err}" + ); + } + + #[test] + fn test_wildcard_rejected_without_flag() { + let err = validate_segment(1, "x.*", false).unwrap_err(); + assert!(err.contains("Too few tokens"), "got: {err}"); + } + + // ---- validate_gts_id ---- + + #[test] + fn test_valid_gts_id() { + let segments = validate_gts_id("gts.x.core.events.event.v1~", false).unwrap(); + assert_eq!(segments.len(), 1); + assert_eq!(segments[0].vendor, "x"); + assert!(segments[0].is_type); + } + + #[test] + fn test_valid_gts_id_chained() { + let segments = validate_gts_id( + "gts.x.core.events.type.v1~vendor.app._.custom_event.v1~", + false, + ) + .unwrap(); + assert_eq!(segments.len(), 2); + assert_eq!(segments[0].vendor, "x"); + assert_eq!(segments[1].vendor, "vendor"); + } + + #[test] + fn test_gts_id_missing_prefix() { + let err = validate_gts_id("x.core.events.event.v1~", false).unwrap_err(); + match err { + GtsIdError::Id { cause, .. } => { + assert!(cause.contains("must start with 'gts.'"), "got: {cause}"); + } + GtsIdError::Segment { .. } => panic!("expected Id error, got: {err}"), + } + } + + #[test] + fn test_gts_id_uppercase() { + let err = validate_gts_id("gts.X.core.events.event.v1~", false).unwrap_err(); + match err { + GtsIdError::Id { cause, .. } => { + assert!(cause.contains("lowercase"), "got: {cause}"); + } + GtsIdError::Segment { .. } => panic!("expected Id error, got: {err}"), + } + } + + #[test] + fn test_gts_id_hyphen() { + let err = validate_gts_id("gts.x-vendor.core.events.event.v1~", false).unwrap_err(); + match err { + GtsIdError::Id { cause, .. } => { + assert!(cause.contains("'-'"), "got: {cause}"); + } + GtsIdError::Segment { .. } => panic!("expected Id error, got: {err}"), + } + } + + #[test] + fn test_gts_id_segment_error_carries_num_and_offset() { + let err = validate_gts_id( + "gts.x.core.modkit.plugin.v1~x.core.license_enforcer.integration.plugin.v1~", + false, + ) + .unwrap_err(); + match err { + GtsIdError::Segment { + num, offset, cause, .. + } => { + assert_eq!(num, 2); + // offset = "gts.".len() + "x.core.modkit.plugin.v1~".len() = 4 + 24 = 28 + assert_eq!(offset, 28); + assert!( + cause.contains("Too many name tokens before version"), + "got: {cause}" + ); + } + GtsIdError::Id { .. } => panic!("expected Segment error, got: {err}"), + } + } + + #[test] + fn test_gts_id_instance_no_tilde_end() { + let segments = validate_gts_id("gts.x.core.events.event.v1~a.b.c.d.v1.0", false).unwrap(); + assert_eq!(segments.len(), 2); + assert!(segments[0].is_type); + assert!(!segments[1].is_type); + } + + #[test] + fn test_gts_id_whitespace_trimmed() { + let segments = validate_gts_id(" gts.x.core.events.event.v1~ ", false).unwrap(); + assert_eq!(segments.len(), 1); + } +} diff --git a/gts-macros/Cargo.toml b/gts-macros/Cargo.toml index b06b96d..fc82b14 100644 --- a/gts-macros/Cargo.toml +++ b/gts-macros/Cargo.toml @@ -20,6 +20,7 @@ proc-macro = true test = false [dependencies] +gts-id.workspace = true syn = { version = "2.0", features = ["full", "extra-traits"] } quote = "1.0" proc-macro2 = "1.0" diff --git a/gts-macros/src/lib.rs b/gts-macros/src/lib.rs index 8cca165..d82a890 100644 --- a/gts-macros/src/lib.rs +++ b/gts-macros/src/lib.rs @@ -459,7 +459,32 @@ impl Parse for GtsSchemaArgs { } "schema_id" => { let value: LitStr = input.parse()?; - schema_id = Some(value.value()); + let id = value.value(); + // Schema-specific check: must end with ~ + if !id.ends_with('~') { + return Err(syn::Error::new_spanned( + value, + format!( + "struct_to_gts_schema: Invalid GTS schema ID: must end with '~' (type marker), got '{id}'" + ), + )); + } + // General GTS ID validation via shared crate + if let Err(e) = gts_id::validate_gts_id(&id, false) { + let msg = match &e { + gts_id::GtsIdError::Id { cause, .. } => { + format!("Invalid GTS schema ID: {cause}") + } + gts_id::GtsIdError::Segment { num, cause, .. } => { + format!("Segment #{num}: {cause}") + } + }; + return Err(syn::Error::new_spanned( + value, + format!("struct_to_gts_schema: {msg}"), + )); + } + schema_id = Some(id); } "description" => { let value: LitStr = input.parse()?; diff --git a/gts-macros/tests/compile_fail/base_parent_mismatch.rs b/gts-macros/tests/compile_fail/base_parent_mismatch.rs index bf9309f..f92940e 100644 --- a/gts-macros/tests/compile_fail/base_parent_mismatch.rs +++ b/gts-macros/tests/compile_fail/base_parent_mismatch.rs @@ -19,11 +19,11 @@ pub struct BaseEventV1

{ // This should fail: parent schema_id doesn't match the parent segment // Parent's ID is "gts.x.core.events.type.v1~" but schema_id's parent -// segment is "gts.x.wrong.parent.v1~" +// segment is "gts.x.wrong.parent.type.v1~" #[struct_to_gts_schema( dir_path = "schemas", base = BaseEventV1, - schema_id = "gts.x.wrong.parent.v1~x.core.audit.event.v1~", + schema_id = "gts.x.wrong.parent.type.v1~x.core.audit.event.v1~", description = "This should fail", properties = "user_id" )] diff --git a/gts-macros/tests/compile_fail/invalid_gts_id_missing_prefix.rs b/gts-macros/tests/compile_fail/invalid_gts_id_missing_prefix.rs new file mode 100644 index 0000000..ffe8493 --- /dev/null +++ b/gts-macros/tests/compile_fail/invalid_gts_id_missing_prefix.rs @@ -0,0 +1,16 @@ +//! Test: GTS schema ID missing 'gts.' prefix + +use gts_macros::struct_to_gts_schema; + +#[struct_to_gts_schema( + dir_path = "schemas", + base = true, + schema_id = "x.core.events.type.v1~", + description = "Missing gts. prefix", + properties = "id" +)] +pub struct InvalidPrefixV1 { + pub id: gts::GtsInstanceId, +} + +fn main() {} diff --git a/gts-macros/tests/compile_fail/invalid_gts_id_missing_prefix.stderr b/gts-macros/tests/compile_fail/invalid_gts_id_missing_prefix.stderr new file mode 100644 index 0000000..53e3f21 --- /dev/null +++ b/gts-macros/tests/compile_fail/invalid_gts_id_missing_prefix.stderr @@ -0,0 +1,5 @@ +error: struct_to_gts_schema: Invalid GTS schema ID: must start with 'gts.' + --> tests/compile_fail/invalid_gts_id_missing_prefix.rs:8:17 + | +8 | schema_id = "x.core.events.type.v1~", + | ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/gts-macros/tests/compile_fail/invalid_gts_id_too_many_tokens.rs b/gts-macros/tests/compile_fail/invalid_gts_id_too_many_tokens.rs new file mode 100644 index 0000000..a443938 --- /dev/null +++ b/gts-macros/tests/compile_fail/invalid_gts_id_too_many_tokens.rs @@ -0,0 +1,31 @@ +//! Test: GTS schema ID with too many tokens in segment +//! This is the exact case from issue #47 +//! The second segment has 5 name tokens instead of 4: +//! x.core.license_enforcer.integration.plugin.v1 +//! Should be: vendor.package.namespace.type.vMAJOR + +use gts_macros::struct_to_gts_schema; + +// First define the base struct that we extend +#[struct_to_gts_schema( + dir_path = "schemas", + base = true, + schema_id = "gts.x.core.modkit.plugin.v1~", + description = "Base modkit plugin", + properties = "id" +)] +pub struct BaseModkitPluginV1 { + pub id: gts::GtsInstanceId, +} + +// This should fail - the second segment has too many tokens +#[struct_to_gts_schema( + dir_path = "schemas", + base = BaseModkitPluginV1, + schema_id = "gts.x.core.modkit.plugin.v1~x.core.license_enforcer.integration.plugin.v1~", + description = "License Enforcer platform integration plugin specification", + properties = "" +)] +pub struct LicensePlatformPluginSpecV1; + +fn main() {} diff --git a/gts-macros/tests/compile_fail/invalid_gts_id_too_many_tokens.stderr b/gts-macros/tests/compile_fail/invalid_gts_id_too_many_tokens.stderr new file mode 100644 index 0000000..48e4972 --- /dev/null +++ b/gts-macros/tests/compile_fail/invalid_gts_id_too_many_tokens.stderr @@ -0,0 +1,5 @@ +error: struct_to_gts_schema: Segment #2: Too many name tokens before version (got 5, expected 4). Expected format: vendor.package.namespace.type.vMAJOR[.MINOR] + --> tests/compile_fail/invalid_gts_id_too_many_tokens.rs:25:17 + | +25 | schema_id = "gts.x.core.modkit.plugin.v1~x.core.license_enforcer.integration.plugin.v1~", + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/gts-macros/tests/compile_fail/version_missing_both.stderr b/gts-macros/tests/compile_fail/version_missing_both.stderr index f813708..df32305 100644 --- a/gts-macros/tests/compile_fail/version_missing_both.stderr +++ b/gts-macros/tests/compile_fail/version_missing_both.stderr @@ -1,8 +1,8 @@ -error: struct_to_gts_schema: Both struct name and schema_id must have a version. Struct 'BaseEvent' has no version suffix (e.g., V1) and schema_id 'gts.x.core.events.type~' has no version (e.g., v1~). Add version to both (e.g., 'BaseEventV1' with 'gts.x.foo.v1~') - --> tests/compile_fail/version_missing_both.rs:14:12 +error: struct_to_gts_schema: Segment #1: Too few tokens (got 4, min 5). Expected format: gts.vendor.package.namespace.type.vMAJOR[.MINOR] + --> tests/compile_fail/version_missing_both.rs:10:17 | -14 | pub struct BaseEvent { - | ^^^^^^^^^ +10 | schema_id = "gts.x.core.events.type~", + | ^^^^^^^^^^^^^^^^^^^^^^^^^ warning: unused import: `gts::GtsInstanceId` --> tests/compile_fail/version_missing_both.rs:4:5 diff --git a/gts-macros/tests/compile_fail/version_missing_in_schema.stderr b/gts-macros/tests/compile_fail/version_missing_in_schema.stderr index 1996d44..4eb8eba 100644 --- a/gts-macros/tests/compile_fail/version_missing_in_schema.stderr +++ b/gts-macros/tests/compile_fail/version_missing_in_schema.stderr @@ -1,8 +1,8 @@ -error: struct_to_gts_schema: Struct 'BaseEventV1' has version suffix 'V1' but cannot extract version from schema_id 'gts.x.core.events.type~'. Expected format with version like 'gts.x.foo.v1~' or 'gts.x.foo.v1.0~' - --> tests/compile_fail/version_missing_in_schema.rs:14:12 +error: struct_to_gts_schema: Segment #1: Too few tokens (got 4, min 5). Expected format: gts.vendor.package.namespace.type.vMAJOR[.MINOR] + --> tests/compile_fail/version_missing_in_schema.rs:10:17 | -14 | pub struct BaseEventV1 { - | ^^^^^^^^^^^ +10 | schema_id = "gts.x.core.events.type~", + | ^^^^^^^^^^^^^^^^^^^^^^^^^ warning: unused import: `gts::GtsInstanceId` --> tests/compile_fail/version_missing_in_schema.rs:4:5 diff --git a/gts-macros/tests/inheritance_tests.rs b/gts-macros/tests/inheritance_tests.rs index 2800669..8ce351a 100644 --- a/gts-macros/tests/inheritance_tests.rs +++ b/gts-macros/tests/inheritance_tests.rs @@ -209,7 +209,7 @@ pub struct ContainerV1 { #[struct_to_gts_schema( dir_path = "schemas", base = ContainerV1, - schema_id = "gts.x.core.events.container.v1~x.app.content.v1~", + schema_id = "gts.x.core.events.container.v1~x.app.entities.content.v1~", description = "Content extending container", properties = "content_value" )] diff --git a/gts-macros/tests/pretty_test.rs b/gts-macros/tests/pretty_test.rs index 6126826..02f1bb3 100644 --- a/gts-macros/tests/pretty_test.rs +++ b/gts-macros/tests/pretty_test.rs @@ -4,7 +4,7 @@ mod tests { #[struct_to_gts_schema( dir_path = "test_schemas", - schema_id = "gts.test.pretty.v1~", + schema_id = "gts.x.test.entities.pretty.v1~", description = "Test schema for pretty formatting", properties = "id,name,value", base = true diff --git a/gts/Cargo.toml b/gts/Cargo.toml index 7432ccc..8e8be26 100644 --- a/gts/Cargo.toml +++ b/gts/Cargo.toml @@ -16,6 +16,7 @@ publish = true workspace = true [dependencies] +gts-id.workspace = true serde.workspace = true serde_json.workspace = true thiserror.workspace = true diff --git a/gts/src/gts.rs b/gts/src/gts.rs index 3c7bc43..7d35c18 100644 --- a/gts/src/gts.rs +++ b/gts/src/gts.rs @@ -4,29 +4,12 @@ use std::sync::LazyLock; use thiserror::Error; use uuid::Uuid; -pub const GTS_PREFIX: &str = "gts."; +pub const GTS_PREFIX: &str = gts_id::GTS_PREFIX; /// URI-compatible prefix for GTS identifiers in JSON Schema `$id` field (e.g., `gts://gts.x.y.z...`). /// This is ONLY used for JSON Schema serialization/deserialization, not for GTS ID parsing. pub const GTS_URI_PREFIX: &str = "gts://"; static GTS_NS: LazyLock = LazyLock::new(|| Uuid::new_v5(&Uuid::NAMESPACE_URL, b"gts")); -/// Validates a GTS segment token without regex for better performance. -/// Valid tokens: start with [a-z_], followed by [a-z0-9_]* -#[inline] -fn is_valid_segment_token(token: &str) -> bool { - if token.is_empty() { - return false; - } - let mut chars = token.chars(); - // First character must be [a-z_] - match chars.next() { - Some(c) if c.is_ascii_lowercase() || c == '_' => {} - _ => return false, - } - // Remaining characters must be [a-z0-9_] - chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') -} - #[derive(Debug, Error)] pub enum GtsError { #[error("Invalid GTS segment #{num} @ offset {offset}: '{segment}': {cause}")] @@ -85,159 +68,23 @@ impl GtsIdSegment { Ok(seg) } - #[allow(clippy::too_many_lines)] fn parse_segment_id(&mut self, segment: &str) -> Result<(), GtsError> { - let mut segment = segment.to_owned(); - - // Check for type marker - if segment.contains('~') { - let tilde_count = segment.matches('~').count(); - if tilde_count > 1 { - return Err(GtsError::Segment { - num: self.num, - offset: self.offset, - segment: self.segment.clone(), - cause: "Too many '~' characters".to_owned(), - }); - } - if segment.ends_with('~') { - self.is_type = true; - segment.pop(); - } else { - return Err(GtsError::Segment { - num: self.num, - offset: self.offset, - segment: self.segment.clone(), - cause: " '~' must be at the end".to_owned(), - }); - } - } - - let tokens: Vec<&str> = segment.split('.').collect(); - - if tokens.len() > 6 { - return Err(GtsError::Segment { + let parsed = gts_id::validate_segment(self.num, segment, true).map_err(|cause| { + GtsError::Segment { num: self.num, offset: self.offset, segment: self.segment.clone(), - cause: "Too many tokens".to_owned(), - }); - } - - if !segment.ends_with('*') && tokens.len() < 5 { - return Err(GtsError::Segment { - num: self.num, - offset: self.offset, - segment: self.segment.clone(), - cause: "Too few tokens".to_owned(), - }); - } - - // Validate tokens (except version tokens) - if !segment.ends_with('*') { - for (i, token) in tokens.iter().take(4).enumerate() { - if !is_valid_segment_token(token) { - return Err(GtsError::Segment { - num: self.num, - offset: self.offset, - segment: self.segment.clone(), - cause: format!("Invalid segment token: {}", tokens[i]), - }); - } + cause, } - } - - // Parse tokens - if !tokens.is_empty() { - if tokens[0] == "*" { - self.is_wildcard = true; - return Ok(()); - } - tokens[0].clone_into(&mut self.vendor); - } - - if tokens.len() > 1 { - if tokens[1] == "*" { - self.is_wildcard = true; - return Ok(()); - } - tokens[1].clone_into(&mut self.package); - } - - if tokens.len() > 2 { - if tokens[2] == "*" { - self.is_wildcard = true; - return Ok(()); - } - tokens[2].clone_into(&mut self.namespace); - } - - if tokens.len() > 3 { - if tokens[3] == "*" { - self.is_wildcard = true; - return Ok(()); - } - tokens[3].clone_into(&mut self.type_name); - } - - if tokens.len() > 4 { - if tokens[4] == "*" { - self.is_wildcard = true; - return Ok(()); - } - - if !tokens[4].starts_with('v') { - return Err(GtsError::Segment { - num: self.num, - offset: self.offset, - segment: self.segment.clone(), - cause: "Major version must start with 'v'".to_owned(), - }); - } - - let major_str = &tokens[4][1..]; - self.ver_major = major_str.parse().map_err(|_| GtsError::Segment { - num: self.num, - offset: self.offset, - segment: self.segment.clone(), - cause: "Major version must be an integer".to_owned(), - })?; - - if major_str != self.ver_major.to_string() { - return Err(GtsError::Segment { - num: self.num, - offset: self.offset, - segment: self.segment.clone(), - cause: "Major version must be an integer".to_owned(), - }); - } - } - - if tokens.len() > 5 { - if tokens[5] == "*" { - self.is_wildcard = true; - return Ok(()); - } - - let minor: u32 = tokens[5].parse().map_err(|_| GtsError::Segment { - num: self.num, - offset: self.offset, - segment: self.segment.clone(), - cause: "Minor version must be an integer".to_owned(), - })?; - - if tokens[5] != minor.to_string() { - return Err(GtsError::Segment { - num: self.num, - offset: self.offset, - segment: self.segment.clone(), - cause: "Minor version must be an integer".to_owned(), - }); - } - - self.ver_minor = Some(minor); - } - + })?; + self.vendor = parsed.vendor; + self.package = parsed.package; + self.namespace = parsed.namespace; + self.type_name = parsed.type_name; + self.ver_major = parsed.ver_major; + self.ver_minor = parsed.ver_minor; + self.is_type = parsed.is_type; + self.is_wildcard = parsed.is_wildcard; Ok(()) } } @@ -260,69 +107,46 @@ impl GtsID { pub fn new(id: &str) -> Result { let raw = id.trim(); - // Validate lowercase - if raw != raw.to_lowercase() { - return Err(GtsError::Id { - id: id.to_owned(), - cause: "Must be lower case".to_owned(), - }); - } - - if raw.contains('-') { - return Err(GtsError::Id { - id: id.to_owned(), - cause: "Must not contain '-'".to_owned(), - }); - } - - if !raw.starts_with(GTS_PREFIX) { - return Err(GtsError::Id { - id: id.to_owned(), - cause: format!("Does not start with '{GTS_PREFIX}'"), - }); - } - - if raw.len() > 1024 { - return Err(GtsError::Id { + // Delegate all validation to the shared gts-id crate (single source of truth). + let parsed_segments = gts_id::validate_gts_id(raw, true).map_err(|e| match e { + gts_id::GtsIdError::Id { cause, .. } => GtsError::Id { id: id.to_owned(), - cause: "Too long".to_owned(), - }); - } - - let mut gts_id_segments = Vec::new(); - let remainder = &raw[GTS_PREFIX.len()..]; - - // Split by ~ preserving empties to detect trailing ~ - let tilde_parts: Vec<&str> = remainder.split('~').collect(); - let mut parts = Vec::new(); - - for i in 0..tilde_parts.len() { - if i < tilde_parts.len() - 1 { - parts.push(format!("{}~", tilde_parts[i])); - if i == tilde_parts.len() - 2 && tilde_parts[i + 1].is_empty() { - break; - } - } else { - parts.push(tilde_parts[i].to_owned()); - } - } - - let mut offset = GTS_PREFIX.len(); - for (i, part) in parts.iter().enumerate() { - if part.is_empty() || part == "~" { - return Err(GtsError::Id { - id: id.to_owned(), - cause: format!("GTS segment #{} @ offset {offset} is empty", i + 1), - }); - } - - gts_id_segments.push(GtsIdSegment::new(i + 1, offset, part)?); - offset += part.len(); - } + cause, + }, + gts_id::GtsIdError::Segment { + num, + offset, + segment, + cause, + } => GtsError::Segment { + num, + offset, + segment, + cause, + }, + })?; - // Issue #37: Single-segment instance IDs are prohibited - // Instance IDs must be chained with at least one type segment (e.g., 'type~instance') - // This check should only apply to non-wildcard, non-type single-segment IDs + // Convert ParsedSegment → GtsIdSegment + let gts_id_segments: Vec = parsed_segments + .into_iter() + .enumerate() + .map(|(i, p)| GtsIdSegment { + num: i + 1, + offset: p.offset, + segment: p.raw, + vendor: p.vendor, + package: p.package, + namespace: p.namespace, + type_name: p.type_name, + ver_major: p.ver_major, + ver_minor: p.ver_minor, + is_type: p.is_type, + is_wildcard: p.is_wildcard, + }) + .collect(); + + // Issue #37: Single-segment instance IDs are prohibited. + // Instance IDs must be chained with at least one type segment (e.g., 'type~instance'). if gts_id_segments.len() == 1 && !gts_id_segments[0].is_type && !gts_id_segments[0].is_wildcard