From 6e86dad2010c11b61d5388cb2329828a62ee5d18 Mon Sep 17 00:00:00 2001 From: devjow Date: Fri, 27 Feb 2026 14:49:03 +0000 Subject: [PATCH 01/10] feat: generate schemas and well-known instances resolves #70 Signed-off-by: devjow --- Cargo.lock | 1 + gts-cli/Cargo.toml | 1 + gts-cli/src/cli.rs | 42 +- gts-cli/src/gen_common.rs | 362 ++++++++ gts-cli/src/gen_instances/attrs.rs | 289 +++++++ gts-cli/src/gen_instances/mod.rs | 337 ++++++++ gts-cli/src/gen_instances/parser.rs | 640 +++++++++++++++ gts-cli/src/gen_instances/string_lit.rs | 132 +++ gts-cli/src/gen_instances/writer.rs | 181 ++++ gts-cli/src/gen_schemas.rs | 210 +---- gts-cli/src/lib.rs | 2 + gts-cli/src/main.rs | 2 + gts-cli/tests/cli_run_tests.rs | 1 + gts-cli/tests/gen_instances_tests.rs | 774 ++++++++++++++++++ gts-macros/README.md | 90 ++ gts-macros/src/lib.rs | 238 ++++++ .../compile_fail/instance_const_wrong_type.rs | 12 + .../instance_const_wrong_type.stderr | 5 + .../compile_fail/instance_missing_dir_path.rs | 11 + .../instance_missing_dir_path.stderr | 10 + .../instance_missing_instance_segment.rs | 11 + .../instance_missing_instance_segment.stderr | 10 + .../instance_missing_schema_id.rs | 11 + .../instance_missing_schema_id.stderr | 10 + .../compile_fail/instance_on_non_const.rs | 12 + .../compile_fail/instance_on_non_const.stderr | 11 + .../instance_schema_id_no_tilde.rs | 12 + .../instance_schema_id_no_tilde.stderr | 5 + .../instance_segment_bare_wildcard.rs | 12 + .../instance_segment_bare_wildcard.stderr | 5 + .../instance_segment_ends_with_tilde.rs | 12 + .../instance_segment_ends_with_tilde.stderr | 5 + 32 files changed, 3272 insertions(+), 184 deletions(-) create mode 100644 gts-cli/src/gen_common.rs create mode 100644 gts-cli/src/gen_instances/attrs.rs create mode 100644 gts-cli/src/gen_instances/mod.rs create mode 100644 gts-cli/src/gen_instances/parser.rs create mode 100644 gts-cli/src/gen_instances/string_lit.rs create mode 100644 gts-cli/src/gen_instances/writer.rs create mode 100644 gts-cli/tests/gen_instances_tests.rs create mode 100644 gts-macros/tests/compile_fail/instance_const_wrong_type.rs create mode 100644 gts-macros/tests/compile_fail/instance_const_wrong_type.stderr create mode 100644 gts-macros/tests/compile_fail/instance_missing_dir_path.rs create mode 100644 gts-macros/tests/compile_fail/instance_missing_dir_path.stderr create mode 100644 gts-macros/tests/compile_fail/instance_missing_instance_segment.rs create mode 100644 gts-macros/tests/compile_fail/instance_missing_instance_segment.stderr create mode 100644 gts-macros/tests/compile_fail/instance_missing_schema_id.rs create mode 100644 gts-macros/tests/compile_fail/instance_missing_schema_id.stderr create mode 100644 gts-macros/tests/compile_fail/instance_on_non_const.rs create mode 100644 gts-macros/tests/compile_fail/instance_on_non_const.stderr create mode 100644 gts-macros/tests/compile_fail/instance_schema_id_no_tilde.rs create mode 100644 gts-macros/tests/compile_fail/instance_schema_id_no_tilde.stderr create mode 100644 gts-macros/tests/compile_fail/instance_segment_bare_wildcard.rs create mode 100644 gts-macros/tests/compile_fail/instance_segment_bare_wildcard.stderr create mode 100644 gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.rs create mode 100644 gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.stderr diff --git a/Cargo.lock b/Cargo.lock index e8c76fc..d515a73 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -547,6 +547,7 @@ dependencies = [ "chrono", "clap", "gts", + "gts-id", "regex", "serde", "serde_json", diff --git a/gts-cli/Cargo.toml b/gts-cli/Cargo.toml index 47373f4..1751dc5 100644 --- a/gts-cli/Cargo.toml +++ b/gts-cli/Cargo.toml @@ -21,6 +21,7 @@ path = "src/main.rs" [dependencies] gts.workspace = true +gts-id.workspace = true serde.workspace = true serde_json.workspace = true anyhow.workspace = true diff --git a/gts-cli/src/cli.rs b/gts-cli/src/cli.rs index 9b28cec..69b89bf 100644 --- a/gts-cli/src/cli.rs +++ b/gts-cli/src/cli.rs @@ -3,6 +3,7 @@ use clap::{Parser, Subcommand}; use gts::GtsOps; use std::io::Write; +use crate::gen_instances::generate_instances_from_rust; use crate::gen_schemas::generate_schemas_from_rust; use crate::server::GtsHttpServer; @@ -119,20 +120,35 @@ pub enum Commands { #[arg(long, default_value = "8000")] port: u16, }, - /// Generate GTS schemas from Rust source code with `#[struct_to_gts_schema]` annotations + /// Generate GTS artifacts from Rust source code with `#[struct_to_gts_schema]` / + /// `#[gts_well_known_instance]` annotations GenerateFromRust { - /// Source directory or file to scan for annotated structs + /// Source directory or file to scan for annotated items #[arg(long)] source: String, - /// Output directory for generated schemas (optional: uses paths from macro if not specified) + /// Output directory for generated files (optional: uses paths from macro if not specified) #[arg(long)] output: Option, /// Exclude patterns (can be specified multiple times). Supports glob patterns. /// Example: --exclude "tests/*" --exclude "examples/*" #[arg(long, action = clap::ArgAction::Append)] exclude: Vec, + /// What to generate: schemas (default), instances, or all + #[arg(long, default_value = "schemas")] + mode: GenerateMode, }, } + +/// Controls what `generate-from-rust` generates. +#[derive(clap::ValueEnum, Clone, Debug, PartialEq, Eq)] +pub enum GenerateMode { + /// Generate JSON schemas from `#[struct_to_gts_schema]` annotations (default) + Schemas, + /// Generate well-known instance JSON files from `#[gts_well_known_instance]` annotations + Instances, + /// Generate both schemas and instances + All, +} /// Run the CLI application /// /// # Errors @@ -259,9 +275,19 @@ async fn run_command(cli: Cli) -> Result<()> { source, output, exclude, - } => { - generate_schemas_from_rust(&source, output.as_deref(), &exclude, cli.verbose)?; - } + mode, + } => match mode { + GenerateMode::Schemas => { + generate_schemas_from_rust(&source, output.as_deref(), &exclude, cli.verbose)?; + } + GenerateMode::Instances => { + generate_instances_from_rust(&source, output.as_deref(), &exclude, cli.verbose)?; + } + GenerateMode::All => { + generate_schemas_from_rust(&source, output.as_deref(), &exclude, cli.verbose)?; + generate_instances_from_rust(&source, output.as_deref(), &exclude, cli.verbose)?; + } + }, } Ok(()) @@ -357,10 +383,12 @@ mod tests { source, output, exclude, + mode, } => { assert_eq!(source, "/src/path"); assert_eq!(output, Some("/out/path".to_owned())); assert_eq!(exclude, vec!["tests/*", "examples/*"]); + assert_eq!(mode, GenerateMode::Schemas); } _ => panic!("Expected GenerateFromRust command"), } @@ -639,10 +667,12 @@ mod tests { source, output, exclude, + mode, } => { assert_eq!(source, "/src/path"); assert_eq!(output, None); assert!(exclude.is_empty()); + assert_eq!(mode, GenerateMode::Schemas); } _ => panic!("Expected GenerateFromRust command"), } diff --git a/gts-cli/src/gen_common.rs b/gts-cli/src/gen_common.rs new file mode 100644 index 0000000..bb5e7fb --- /dev/null +++ b/gts-cli/src/gen_common.rs @@ -0,0 +1,362 @@ +use anyhow::{Result, bail}; +use regex::Regex; +use std::fs; +use std::path::{Path, PathBuf}; +use walkdir::WalkDir; + +/// Directories that are automatically ignored (e.g., trybuild `compile_fail` tests) +pub const AUTO_IGNORE_DIRS: &[&str] = &["compile_fail"]; + +/// Reason why a file was skipped +#[derive(Debug, Clone, Copy)] +pub enum SkipReason { + ExcludePattern, + AutoIgnoredDir, + IgnoreDirective, +} + +impl std::fmt::Display for SkipReason { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::ExcludePattern => write!(f, "matched --exclude pattern"), + Self::AutoIgnoredDir => write!(f, "in auto-ignored directory (compile_fail)"), + Self::IgnoreDirective => write!(f, "has // gts:ignore directive"), + } + } +} + +/// Check if a path matches any of the exclude patterns +#[must_use] +pub fn should_exclude_path(path: &Path, patterns: &[String]) -> bool { + let path_str = path.to_string_lossy(); + for pattern in patterns { + if matches_glob_pattern(&path_str, pattern) { + return true; + } + } + false +} + +/// Simple glob pattern matching +/// Supports: * (any characters), ** (any path segments) +#[must_use] +pub fn matches_glob_pattern(path: &str, pattern: &str) -> bool { + let regex_pattern = pattern + .replace('.', r"\.") + .replace("**", "<>") + .replace('*', "[^/]*") + .replace("<>", ".*"); + + if let Ok(re) = Regex::new(&format!("(^|/){regex_pattern}($|/)")) { + re.is_match(path) + } else { + path.contains(pattern) + } +} + +/// Check if path is in an auto-ignored directory (e.g., `compile_fail`) +#[must_use] +pub fn is_in_auto_ignored_dir(path: &Path) -> bool { + path.components().any(|component| { + if let Some(name) = component.as_os_str().to_str() { + AUTO_IGNORE_DIRS.contains(&name) + } else { + false + } + }) +} + +/// Check if file content starts with the gts:ignore directive +#[must_use] +pub fn has_ignore_directive(content: &str) -> bool { + for line in content.lines().take(10) { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + if trimmed.to_lowercase().starts_with("// gts:ignore") { + return true; + } + if !trimmed.starts_with("//") && !trimmed.starts_with("#!") { + break; + } + } + false +} + +/// Walk Rust source files in the given source path, applying exclusion rules. +/// Calls `visitor` for each file that should be processed, with (path, content). +/// Returns (`files_scanned`, `files_skipped`). +/// +/// # Errors +/// Returns an error if the visitor closure returns an error for any file. +pub fn walk_rust_files( + source_path: &Path, + exclude_patterns: &[String], + verbose: u8, + mut visitor: F, +) -> Result<(usize, usize)> +where + F: FnMut(&Path, &str) -> Result<()>, +{ + let mut files_scanned = 0; + let mut files_skipped = 0; + + for entry in WalkDir::new(source_path).follow_links(true) { + let entry = match entry { + Ok(e) => e, + Err(e) => { + eprintln!("warning: skipping unreadable path during walk: {e}"); + continue; + } + }; + let path = entry.path(); + if path.extension().and_then(|s| s.to_str()) != Some("rs") { + continue; + } + + if should_exclude_path(path, exclude_patterns) { + files_skipped += 1; + if verbose > 0 { + println!( + " Skipped: {} ({})", + path.display(), + SkipReason::ExcludePattern + ); + } + continue; + } + + if is_in_auto_ignored_dir(path) { + files_skipped += 1; + if verbose > 0 { + println!( + " Skipped: {} ({})", + path.display(), + SkipReason::AutoIgnoredDir + ); + } + continue; + } + + match fs::read_to_string(path) { + Err(e) => { + eprintln!("warning: skipping unreadable file {}: {e}", path.display()); + files_skipped += 1; + } + Ok(content) => { + if has_ignore_directive(&content) { + files_skipped += 1; + if verbose > 0 { + println!( + " Skipped: {} ({})", + path.display(), + SkipReason::IgnoreDirective + ); + } + continue; + } + + files_scanned += 1; + visitor(path, &content)?; + } + } + } + + Ok((files_scanned, files_skipped)) +} + +/// Compute the sandbox root from the source path and optional output override. +/// +/// - If `--output` is provided: sandbox root is the output directory (trusted root). +/// - If `--source` is a file (no `--output`): sandbox root is the file's parent directory. +/// - If `--source` is a directory (no `--output`): sandbox root is the directory itself. +/// +/// # Errors +/// Returns an error if the output directory or source path cannot be canonicalized. +pub fn compute_sandbox_root( + source_canonical: &Path, + output_override: Option<&str>, +) -> Result { + if let Some(output_dir) = output_override { + let out = Path::new(output_dir); + if out.exists() { + Ok(out.canonicalize()?) + } else { + fs::create_dir_all(out)?; + Ok(out.canonicalize()?) + } + } else if source_canonical.is_file() { + Ok(source_canonical + .parent() + .unwrap_or(source_canonical) + .to_path_buf()) + } else { + Ok(source_canonical.to_path_buf()) + } +} + +/// Safe canonicalization for potentially non-existent paths. +/// +/// Algorithm: +/// 1. Reject any raw `..` component anywhere in the path (checked before filesystem access). +/// 2. If the path already exists, canonicalize normally. +/// 3. Walk up parent components until an existing ancestor is found. +/// 4. Canonicalize that ancestor (resolves symlinks, `.`, `..`). +/// 5. Append the remaining suffix components. +/// 6. Returns the resulting path (not yet validated against sandbox). +/// +/// # Errors +/// Returns an error if a `..` component appears anywhere in the path, +/// or if canonicalization fails for the existing ancestor. +pub fn safe_canonicalize_nonexistent(path: &Path) -> Result { + // Eagerly reject any .. component in the raw path before any filesystem ops. + // This covers cases like /tmp/nonexistent/../escape where .. appears in the middle. + for component in path.components() { + if component == std::path::Component::ParentDir { + bail!( + "Security error: path traversal via '..' is not permitted in output paths: {}", + path.display() + ); + } + } + + if path.exists() { + return Ok(path.canonicalize()?); + } + + // Walk up to find the first existing ancestor + let mut existing_ancestor = path.to_path_buf(); + let mut suffix_components: Vec = Vec::new(); + + loop { + if existing_ancestor.exists() { + break; + } + match existing_ancestor.file_name() { + Some(name) => { + suffix_components.push(name.to_owned()); + } + None => { + // Reached root without finding existing ancestor + break; + } + } + match existing_ancestor.parent() { + Some(parent) => existing_ancestor = parent.to_path_buf(), + None => break, + } + } + + let canonical_ancestor = if existing_ancestor.exists() { + existing_ancestor.canonicalize()? + } else { + existing_ancestor + }; + + // Re-append suffix in original order (we built it in reverse) + suffix_components.reverse(); + let mut result = canonical_ancestor; + for component in suffix_components { + result = result.join(component); + } + + Ok(result) +} + +/// Validate that the output path is within the sandbox boundary. +/// Returns the safe canonical path on success. +/// +/// # Errors +/// Returns an error if the resolved path escapes the sandbox root. +#[allow(dead_code)] +pub fn validate_output_path_in_sandbox( + output_path: &Path, + sandbox_root: &Path, + annotation_name: &str, + source_file: &Path, + dir_path: &str, +) -> Result { + let canonical = safe_canonicalize_nonexistent(output_path)?; + + if !canonical.starts_with(sandbox_root) { + bail!( + "Security error in {} - dir_path '{}' attempts to write outside sandbox boundary. \ + Resolved to: {}, but must be within: {}", + source_file.display(), + dir_path, + canonical.display(), + sandbox_root.display(), + // annotation_name for diagnostics + ); + } + let _ = annotation_name; + + Ok(canonical) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_matches_glob_pattern() { + assert!(matches_glob_pattern( + "src/tests/compile_fail/test.rs", + "compile_fail" + )); + assert!(matches_glob_pattern( + "tests/compile_fail/test.rs", + "compile_fail" + )); + assert!(matches_glob_pattern("src/tests/foo.rs", "tests/*")); + assert!(matches_glob_pattern("src/examples/bar.rs", "examples/*")); + assert!(matches_glob_pattern("a/b/c/d/test.rs", "**/test.rs")); + } + + #[test] + fn test_is_in_auto_ignored_dir() { + assert!(is_in_auto_ignored_dir(Path::new( + "tests/compile_fail/test.rs" + ))); + assert!(is_in_auto_ignored_dir(Path::new("src/compile_fail/foo.rs"))); + assert!(!is_in_auto_ignored_dir(Path::new("src/models.rs"))); + assert!(!is_in_auto_ignored_dir(Path::new("tests/integration.rs"))); + } + + #[test] + fn test_has_ignore_directive() { + assert!(has_ignore_directive("// gts:ignore\nuse foo::bar;")); + assert!(has_ignore_directive("// GTS:IGNORE\nuse foo::bar;")); + assert!(has_ignore_directive( + "//! Module doc\n// gts:ignore\nuse foo::bar;" + )); + assert!(!has_ignore_directive("use foo::bar;\n// gts:ignore")); + assert!(!has_ignore_directive("use foo::bar;")); + } + + #[test] + fn test_should_exclude_path_matching_pattern() { + let patterns = vec!["test_*".to_owned(), "**/target/**".to_owned()]; + let path = Path::new("src/test_helper.rs"); + assert!(should_exclude_path(path, &patterns)); + } + + #[test] + fn test_should_exclude_path_no_match() { + let patterns = vec!["test_*".to_owned(), "**/compile_fail/**".to_owned()]; + let path = Path::new("src/main.rs"); + assert!(!should_exclude_path(path, &patterns)); + } + + #[test] + fn test_safe_canonicalize_nonexistent_traversal_rejected() { + let _path = Path::new("/tmp/../etc/passwd"); + // This path has .. in it and /tmp exists, but the .. is in the existing ancestor chain + // Safe canonicalize should resolve it via the existing /tmp parent + // The important test is a suffix with .. + let nonexistent = Path::new("/tmp/gts_test_nonexistent_12345/../escape"); + let result = safe_canonicalize_nonexistent(nonexistent); + assert!(result.is_err(), "Should reject '..' in non-existent suffix"); + } +} diff --git a/gts-cli/src/gen_instances/attrs.rs b/gts-cli/src/gen_instances/attrs.rs new file mode 100644 index 0000000..7b19b87 --- /dev/null +++ b/gts-cli/src/gen_instances/attrs.rs @@ -0,0 +1,289 @@ +use anyhow::{Result, bail}; +use regex::Regex; +use std::collections::HashSet; + +/// Parsed and validated attributes from `#[gts_well_known_instance(...)]`. +#[derive(Debug, Clone)] +pub struct InstanceAttrs { + pub dir_path: String, + pub schema_id: String, + pub instance_segment: String, +} + +/// Parse and validate instance annotation attribute body. +/// +/// # Errors +/// - Any required attribute (`dir_path`, `schema_id`, `instance_segment`) is missing +/// - `schema_id` does not end with `~` +/// - `instance_segment` ends with `~` or is a bare wildcard `*` +/// - The composed `schema_id + instance_segment` fails GTS ID validation +pub fn parse_instance_attrs( + attr_body: &str, + source_file: &str, + line: usize, +) -> Result { + check_duplicate_attr_keys(attr_body, source_file, line)?; + + let dir_path = extract_str_attr(attr_body, "dir_path").ok_or_else(|| { + anyhow::anyhow!("{source_file}:{line}: Missing required attribute 'dir_path' in #[gts_well_known_instance]") + })?; + + let schema_id = extract_str_attr(attr_body, "schema_id").ok_or_else(|| { + anyhow::anyhow!("{source_file}:{line}: Missing required attribute 'schema_id' in #[gts_well_known_instance]") + })?; + + let instance_segment = extract_str_attr(attr_body, "instance_segment").ok_or_else(|| { + anyhow::anyhow!("{source_file}:{line}: Missing required attribute 'instance_segment' in #[gts_well_known_instance]") + })?; + + if !schema_id.ends_with('~') { + bail!( + "{source_file}:{line}: schema_id '{schema_id}' must end with '~' (type marker). \ + Instance IDs are composed as schema_id + instance_segment." + ); + } + + if instance_segment.ends_with('~') { + bail!( + "{source_file}:{line}: instance_segment '{instance_segment}' must not end with '~' -- \ + that is a schema/type marker, not valid in an instance segment." + ); + } + + if instance_segment == "*" { + bail!( + "{source_file}:{line}: instance_segment must not be a bare wildcard '*'. \ + Wildcards are not valid in generated instance IDs." + ); + } + + let composed = format!("{schema_id}{instance_segment}"); + if let Err(e) = gts_id::validate_gts_id(&composed, false) { + let msg = match &e { + gts_id::GtsIdError::Id { cause, .. } => cause.clone(), + gts_id::GtsIdError::Segment { num, cause, .. } => { + format!("segment #{num}: {cause}") + } + }; + bail!("{source_file}:{line}: Invalid composed instance ID '{composed}': {msg}"); + } + + Ok(InstanceAttrs { + dir_path, + schema_id, + instance_segment, + }) +} + +/// Error if any of the known attribute keys appears more than once in the body. +/// +/// String literal content is stripped before scanning so that `key =` text +/// inside a string value (e.g. `dir_path = "schema_id = x"`) does not +/// trigger a false duplicate. +fn check_duplicate_attr_keys(attr_body: &str, source_file: &str, line: usize) -> Result<()> { + let key_re = Regex::new(r"([A-Za-z_][A-Za-z0-9_]*)\s*=").ok(); + let Some(re) = key_re else { + return Ok(()); + }; + let known: HashSet<&str> = ["dir_path", "schema_id", "instance_segment"] + .iter() + .copied() + .collect(); + // Blank out string literal content so `key =` inside a value can't match. + let stripped = blank_string_literals(attr_body); + let mut seen: HashSet = HashSet::new(); + for cap in re.captures_iter(&stripped) { + let key = cap.get(1).map_or("", |m| m.as_str()); + if !known.contains(key) { + continue; + } + if !seen.insert(key.to_owned()) { + bail!( + "{source_file}:{line}: Duplicate attribute '{key}' in \ + #[gts_well_known_instance]. Each attribute must appear exactly once." + ); + } + } + Ok(()) +} + +/// Replace the content of every string literal in `s` with spaces, +/// preserving byte positions so that other offsets remain valid. +/// Handles both regular `"..."` and raw `r#"..."#` (any number of hashes). +fn blank_string_literals(s: &str) -> String { + let bytes = s.as_bytes(); + let len = bytes.len(); + let mut out = s.to_owned().into_bytes(); + let mut pos = 0; + while pos < len { + // Raw string literal: r"..." or r#"..."# or r##"..."##, etc. + if bytes[pos] == b'r' { + let mut hash_end = pos + 1; + while hash_end < len && bytes[hash_end] == b'#' { + hash_end += 1; + } + let hashes = hash_end - (pos + 1); + if hash_end < len && bytes[hash_end] == b'"' { + // Found r", now scan for closing "# + let content_start = hash_end + 1; + let mut scan = content_start; + 'raw: while scan < len { + if bytes[scan] == b'"' { + // Check for the required number of closing hashes + let mut close = scan + 1; + let mut count = 0; + while close < len && bytes[close] == b'#' && count < hashes { + count += 1; + close += 1; + } + if count == hashes { + // Blank the content between opening and closing delimiters + for byte in &mut out[content_start..scan] { + if byte.is_ascii() { + *byte = b' '; + } + } + pos = close; + break 'raw; + } + } + scan += 1; + } + continue; + } + // Not a raw string — fall through to normal char handling + } + // Regular string literal: "..." + if bytes[pos] == b'"' { + pos += 1; + while pos < len { + if bytes[pos] == b'\\' { + // Replace both the backslash and the escaped char with spaces. + if bytes[pos].is_ascii() { + out[pos] = b' '; + } + pos += 1; + if pos < len && bytes[pos].is_ascii() { + out[pos] = b' '; + } + pos += 1; + continue; + } + if bytes[pos] == b'"' { + pos += 1; + break; + } + if bytes[pos].is_ascii() { + out[pos] = b' '; + } + pos += 1; + } + } else { + pos += 1; + } + } + String::from_utf8(out).unwrap_or_else(|_| s.to_owned()) +} + +/// Extract a `key = "value"` string attribute from an attribute body. +fn extract_str_attr(attr_body: &str, key: &str) -> Option { + let pattern = format!(r#"{key}\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)""#); + let re = Regex::new(&pattern).ok()?; + re.captures(attr_body) + .and_then(|c| c.get(1).map(|m| m.as_str().to_owned())) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_valid_attrs() { + let body = r#"dir_path = "instances", schema_id = "gts.x.core.events.topic.v1~", instance_segment = "x.commerce._.orders.v1.0""#; + let attrs = parse_instance_attrs(body, "test.rs", 1).unwrap(); + assert_eq!(attrs.dir_path, "instances"); + assert_eq!(attrs.schema_id, "gts.x.core.events.topic.v1~"); + assert_eq!(attrs.instance_segment, "x.commerce._.orders.v1.0"); + } + + #[test] + fn test_missing_dir_path() { + let body = r#"schema_id = "gts.x.foo.v1~", instance_segment = "x.bar.v1.0""#; + let err = parse_instance_attrs(body, "test.rs", 5).unwrap_err(); + assert!(err.to_string().contains("dir_path")); + } + + #[test] + fn test_missing_schema_id() { + let body = r#"dir_path = "instances", instance_segment = "x.bar.v1.0""#; + let err = parse_instance_attrs(body, "test.rs", 5).unwrap_err(); + assert!(err.to_string().contains("schema_id")); + } + + #[test] + fn test_missing_instance_segment() { + let body = r#"dir_path = "instances", schema_id = "gts.x.foo.v1~""#; + let err = parse_instance_attrs(body, "test.rs", 5).unwrap_err(); + assert!(err.to_string().contains("instance_segment")); + } + + #[test] + fn test_schema_id_missing_tilde() { + let body = r#"dir_path = "instances", schema_id = "gts.x.foo.v1", instance_segment = "x.bar.v1.0""#; + let err = parse_instance_attrs(body, "test.rs", 1).unwrap_err(); + assert!(err.to_string().contains("must end with '~'")); + } + + #[test] + fn test_instance_segment_with_tilde() { + let body = r#"dir_path = "instances", schema_id = "gts.x.foo.v1~", instance_segment = "x.bar.v1~""#; + let err = parse_instance_attrs(body, "test.rs", 1).unwrap_err(); + assert!(err.to_string().contains("must not end with '~'")); + } + + #[test] + fn test_instance_segment_bare_wildcard() { + let body = r#"dir_path = "instances", schema_id = "gts.x.foo.v1~", instance_segment = "*""#; + let err = parse_instance_attrs(body, "test.rs", 1).unwrap_err(); + assert!(err.to_string().contains("wildcard")); + } + + #[test] + fn test_error_contains_file_and_line() { + let body = r#"schema_id = "gts.x.foo.v1~", instance_segment = "x.bar.v1.0""#; + let err = parse_instance_attrs(body, "src/events.rs", 42).unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("src/events.rs")); + assert!(msg.contains("42")); + } + + #[test] + fn test_key_in_string_value_not_false_duplicate() { + // dir_path value contains "schema_id = x" — must not trigger a false duplicate. + let body = r#"dir_path = "schema_id = x", schema_id = "gts.x.core.events.topic.v1~", instance_segment = "x.commerce._.orders.v1.0""#; + let attrs = parse_instance_attrs(body, "test.rs", 1).unwrap(); + assert_eq!(attrs.dir_path, "schema_id = x"); + } + + #[test] + fn test_blank_string_literals_blanks_raw_strings() { + // Raw string content containing key= must be blanked so duplicate detection + // can't see it. Attribute values always use regular "..." in practice, but + // blank_string_literals is defensive. + // Input: r#"schema_id = x"# rest + let s = "r#\"schema_id = x\"# rest"; + let blanked = blank_string_literals(s); + // The content between r#" and "# must be spaces; the surrounding tokens intact. + assert!( + !blanked.contains("schema_id"), + "raw string content should be blanked, got: {blanked:?}" + ); + } + + #[test] + fn test_real_duplicate_key_is_rejected() { + let body = r#"dir_path = "instances", dir_path = "other", schema_id = "gts.x.core.events.topic.v1~", instance_segment = "x.commerce._.orders.v1.0""#; + let err = parse_instance_attrs(body, "test.rs", 1).unwrap_err(); + assert!(err.to_string().contains("Duplicate attribute")); + } +} diff --git a/gts-cli/src/gen_instances/mod.rs b/gts-cli/src/gen_instances/mod.rs new file mode 100644 index 0000000..ee15453 --- /dev/null +++ b/gts-cli/src/gen_instances/mod.rs @@ -0,0 +1,337 @@ +pub mod attrs; +pub mod parser; +pub mod string_lit; +pub mod writer; + +use anyhow::{Result, bail}; +use std::collections::HashMap; +use std::path::Path; + +use crate::gen_common::{compute_sandbox_root, walk_rust_files}; +use parser::ParsedInstance; +use writer::generate_single_instance; + +/// Generate GTS well-known instance files from Rust source code annotated with +/// `#[gts_well_known_instance]`. +/// +/// # Arguments +/// * `source` - Source directory or file to scan +/// * `output` - Optional output directory override (default: adjacent to the source file) +/// * `exclude_patterns` - Glob-style patterns to exclude during file walking +/// * `verbose` - Verbosity level (0 = normal, 1+ = show skipped files) +/// +/// # Errors +/// Returns an error if: +/// - The source path does not exist +/// - Any annotation is malformed or uses an unsupported form +/// - Duplicate instance IDs are detected (hard error, both locations reported) +/// - Duplicate output paths are detected +/// - Any output path escapes the sandbox boundary +/// - File I/O fails +pub fn generate_instances_from_rust( + source: &str, + output: Option<&str>, + exclude_patterns: &[String], + verbose: u8, +) -> Result<()> { + println!("Scanning Rust source files for instances in: {source}"); + + let source_path = Path::new(source); + if !source_path.exists() { + bail!("Source path does not exist: {source}"); + } + + let source_canonical = source_path.canonicalize()?; + let sandbox_root = compute_sandbox_root(&source_canonical, output)?; + + let mut all_instances: Vec = Vec::new(); + let mut parse_errors: Vec = Vec::new(); + + let (files_scanned, files_skipped) = + walk_rust_files(source_path, exclude_patterns, verbose, |path, content| { + match parser::extract_instances_from_source(content, path) { + Ok(instances) => all_instances.extend(instances), + Err(e) => parse_errors.push(format!("{}: {}", path.display(), e)), + } + Ok(()) + })?; + + // Report all parse errors before bailing + if !parse_errors.is_empty() { + let mut sorted = parse_errors; + sorted.sort(); + sorted.dedup(); + for err in &sorted { + eprintln!("error: {err}"); + } + bail!( + "Instance generation failed with {} parse error(s):\n{}", + sorted.len(), + sorted.join("\n") + ); + } + + check_duplicate_ids(&all_instances)?; + check_duplicate_output_paths(&all_instances, output, &sandbox_root)?; + + let instances_generated = emit_instances(&all_instances, output, &sandbox_root)?; + + print_summary(files_scanned, files_skipped, instances_generated); + Ok(()) +} + +/// Hard-error if two annotations share the same composed instance ID. +fn check_duplicate_ids(instances: &[ParsedInstance]) -> Result<()> { + let mut id_seen: HashMap = HashMap::new(); + let mut errors: Vec = Vec::new(); + + for inst in instances { + let composed = format!("{}{}", inst.attrs.schema_id, inst.attrs.instance_segment); + if let Some(prev) = id_seen.get(composed.as_str()) { + errors.push(format!( + "Duplicate instance ID '{composed}':\n first: {}\n second: {}:{}", + prev, inst.source_file, inst.line + )); + } else { + id_seen.insert(composed, format!("{}:{}", inst.source_file, inst.line)); + } + } + + if !errors.is_empty() { + errors.sort(); + for err in &errors { + eprintln!("error: {err}"); + } + bail!( + "Instance generation failed: {} duplicate instance ID(s)", + errors.len() + ); + } + Ok(()) +} + +/// Hard-error if two annotations would produce the same output file path. +fn check_duplicate_output_paths( + instances: &[ParsedInstance], + output: Option<&str>, + sandbox_root: &Path, +) -> Result<()> { + let mut path_seen: HashMap = HashMap::new(); + let mut errors: Vec = Vec::new(); + + for inst in instances { + let composed = format!("{}{}", inst.attrs.schema_id, inst.attrs.instance_segment); + let file_rel = format!("{}/{}.instance.json", inst.attrs.dir_path, composed); + let raw_path = if let Some(od) = output { + Path::new(od).join(&file_rel) + } else { + let src_dir = Path::new(inst.source_file.as_str()) + .parent() + .unwrap_or(sandbox_root); + src_dir.join(&file_rel) + }; + let key = raw_path + .components() + .collect::() + .to_string_lossy() + .into_owned(); + if let Some(prev) = path_seen.get(&key) { + errors.push(format!( + "Duplicate output path '{}':\n first: {}\n second: {}:{}", + raw_path.display(), + prev, + inst.source_file, + inst.line + )); + } else { + path_seen.insert(key, format!("{}:{}", inst.source_file, inst.line)); + } + } + + if !errors.is_empty() { + errors.sort(); + for err in &errors { + eprintln!("error: {err}"); + } + bail!( + "Instance generation failed: {} duplicate output path(s)", + errors.len() + ); + } + Ok(()) +} + +/// Generate all instance files, returning the count of files written. +fn emit_instances( + instances: &[ParsedInstance], + output: Option<&str>, + sandbox_root: &Path, +) -> Result { + let mut count = 0; + for inst in instances { + let file_path = generate_single_instance(inst, output, sandbox_root) + .map_err(|e| anyhow::anyhow!("{}: {}", inst.source_file, e))?; + let composed = format!("{}{}", inst.attrs.schema_id, inst.attrs.instance_segment); + println!(" Generated instance: {composed} @ {file_path}"); + count += 1; + } + Ok(count) +} + +fn print_summary(files_scanned: usize, files_skipped: usize, instances_generated: usize) { + println!("\nSummary:"); + println!(" Files scanned: {files_scanned}"); + println!(" Files skipped: {files_skipped}"); + println!(" Instances generated: {instances_generated}"); + if instances_generated == 0 { + println!("\n No instances found. Annotate consts with `#[gts_well_known_instance(...)]`."); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::TempDir; + + fn write_src(dir: &Path, name: &str, content: &str) { + fs::write(dir.join(name), content).unwrap(); + } + + fn valid_src(instance_segment: &str, json_body: &str) -> String { + format!( + concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"{}\"\n", + ")]\n", + "pub const FOO: &str = {};\n" + ), + instance_segment, json_body + ) + } + + #[test] + fn test_end_to_end_single_instance() { + let temp = TempDir::new().unwrap(); + let root = temp.path().canonicalize().unwrap(); + + write_src( + &root, + "module.rs", + &valid_src( + "x.commerce._.orders.v1.0", + r#""{\"name\": \"orders\", \"partitions\": 16}""#, + ), + ); + + generate_instances_from_rust(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[], 0) + .unwrap(); + + let expected = root + .join("instances") + .join("gts.x.core.events.topic.v1~x.commerce._.orders.v1.0.instance.json"); + assert!(expected.exists()); + + let val: serde_json::Value = + serde_json::from_str(&fs::read_to_string(&expected).unwrap()).unwrap(); + assert_eq!( + val["id"], + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" + ); + assert_eq!(val["name"], "orders"); + assert_eq!(val["partitions"], 16); + } + + #[test] + fn test_nonexistent_source_errors() { + let result = + generate_instances_from_rust("/nonexistent/path/that/does/not/exist", None, &[], 0); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("does not exist")); + } + + #[test] + fn test_duplicate_id_hard_error() { + let temp = TempDir::new().unwrap(); + let root = temp.path().canonicalize().unwrap(); + + let dup_src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "const A: &str = \"{\\\"name\\\": \\\"a\\\"}\";\n", + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "const B: &str = \"{\\\"name\\\": \\\"b\\\"}\";\n" + ); + write_src(&root, "dup.rs", dup_src); + + let result = generate_instances_from_rust( + root.to_str().unwrap(), + Some(root.to_str().unwrap()), + &[], + 0, + ); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("duplicate instance ID"), + "Expected duplicate ID error" + ); + } + + #[test] + fn test_exclude_pattern_skips_file() { + let temp = TempDir::new().unwrap(); + let root = temp.path().canonicalize().unwrap(); + + // This file has a malformed annotation that would error if scanned + write_src( + &root, + "excluded.rs", + concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"bad-no-tilde\",\n", + " instance_segment = \"x.a.v1.0\"\n", + ")]\n", + "const X: &str = \"{}\";\n" + ), + ); + + let result = generate_instances_from_rust( + root.to_str().unwrap(), + Some(root.to_str().unwrap()), + &["excluded.rs".to_owned()], + 0, + ); + assert!( + result.is_ok(), + "Expected excluded file to be skipped: {result:?}" + ); + } + + #[test] + fn test_no_annotations_succeeds_with_zero_generated() { + let temp = TempDir::new().unwrap(); + let root = temp.path().canonicalize().unwrap(); + write_src(&root, "plain.rs", "const FOO: u32 = 42;\n"); + + let result = generate_instances_from_rust( + root.to_str().unwrap(), + Some(root.to_str().unwrap()), + &[], + 0, + ); + assert!(result.is_ok()); + } +} diff --git a/gts-cli/src/gen_instances/parser.rs b/gts-cli/src/gen_instances/parser.rs new file mode 100644 index 0000000..0351679 --- /dev/null +++ b/gts-cli/src/gen_instances/parser.rs @@ -0,0 +1,640 @@ +use anyhow::{Result, bail}; +use regex::Regex; +use std::path::Path; + +use super::attrs::{InstanceAttrs, parse_instance_attrs}; +use super::string_lit::decode_string_literal; + +/// A parsed and validated instance annotation, ready for file generation. +#[derive(Debug)] +#[allow(dead_code)] +pub struct ParsedInstance { + pub attrs: InstanceAttrs, + /// Raw JSON body string (as written in the const value, decoded from the literal). + pub json_body: String, + /// Absolute path of the source file containing this annotation. + pub source_file: String, + /// 1-based line number of the annotation start, for diagnostics. + pub line: usize, +} + +/// Extract all `#[gts_well_known_instance]`-annotated consts from a source text. +/// +/// Three outcomes per the extraction contract: +/// 1. No annotation token found (preflight negative) → `Ok(vec![])` (fast path, no errors) +/// 2. Annotation token found, parse fails → `Err(...)` (hard error, reported upstream) +/// 3. Parse succeeds → `Ok(instances)` +/// +/// # Errors +/// Returns an error if an annotation is found but cannot be parsed or validated. +pub fn extract_instances_from_source( + content: &str, + source_file: &Path, +) -> Result> { + if !preflight_scan(content) { + return Ok(Vec::new()); + } + + let source_file_str = source_file.to_string_lossy().to_string(); + let line_offsets = build_line_offsets(content); + // Strip comments before parsing so annotations in doc/line/block comments + // are never matched as real annotations. Byte offsets are preserved because + // strip_comments replaces comment text with spaces (newlines kept). + let stripped = strip_comments(content); + let annotation_re = build_annotation_regex()?; + + let mut instances = Vec::new(); + + for cap in annotation_re.captures_iter(&stripped) { + let full_start = cap.get(0).map_or(0, |m| m.start()); + let line = byte_offset_to_line(full_start, &line_offsets); + + let attr_body = &cap[1]; + let attrs = parse_instance_attrs(attr_body, &source_file_str, line)?; + + let raw_literal = &cap[2]; + let json_body = decode_string_literal(raw_literal).map_err(|e| { + anyhow::anyhow!("{source_file_str}:{line}: Failed to decode string literal: {e}") + })?; + + validate_json_body(&json_body, &source_file_str, line)?; + + instances.push(ParsedInstance { + attrs, + json_body, + source_file: source_file_str.clone(), + line, + }); + } + + // Run unsupported-form checks on the same comment-stripped content. + check_unsupported_forms(&stripped, &source_file_str, &line_offsets)?; + + // Preflight was positive but neither the main regex nor unsupported-form + // checks matched anything — the annotation is in a form we don't recognise + // (e.g. applied to a fn, enum, or a completely garbled attribute body). + // This is a hard error per the extraction contract. + if instances.is_empty() { + let needle_line = find_needle_line(content, &line_offsets); + bail!( + "{source_file_str}:{needle_line}: `#[gts_well_known_instance]` annotation found \ + but could not be parsed. The annotation must be on a `const NAME: &str = ;` \ + item. Check for typos, unsupported item kinds, or missing required attributes." + ); + } + + Ok(instances) +} + +/// Validate that the decoded JSON body is a non-empty object without an `"id"` field. +fn validate_json_body(json_body: &str, source_file: &str, line: usize) -> Result<()> { + let json_val: serde_json::Value = serde_json::from_str(json_body).map_err(|e| { + anyhow::anyhow!( + "{}:{}: Malformed JSON in instance body: {} (at JSON line {}, col {})", + source_file, + line, + e, + e.line(), + e.column() + ) + })?; + + if !json_val.is_object() { + bail!( + "{}:{}: Instance JSON body must be a JSON object {{...}}, got {}. \ + Arrays, strings, numbers, booleans, and null are not valid instance bodies.", + source_file, + line, + json_type_name(&json_val) + ); + } + + if json_val.get("id").is_some() { + bail!( + "{source_file}:{line}: Instance JSON body must not contain an \"id\" field. \ + The id is automatically injected from schema_id + instance_segment. \ + Remove the \"id\" field from the JSON body." + ); + } + + Ok(()) +} + +/// Build the regex matching `#[gts_well_known_instance(...)] const NAME: &str = ;` +/// +/// Capture groups: +/// 1. Attribute body (everything inside the outer parentheses) +/// 2. The string literal token (raw or regular) +fn build_annotation_regex() -> Result { + let pattern = concat!( + // (1) Macro attribute body + r"#\[(?:gts_macros::)?gts_well_known_instance\(([\s\S]*?)\)\]", + // Optional additional attributes (e.g. #[allow(dead_code)]) + r"(?:\s*#\[[^\]]*\])*", + r"\s*", + // Optional visibility: pub / pub(crate) / pub(super) / pub(in path) + r"(?:pub\s*(?:\([^)]*\)\s*)?)?", + // const NAME: &str = (optional 'static lifetime) + r"const\s+\w+\s*:\s*&\s*(?:'static\s+)?str\s*=\s*", + // (2) String literal: raw r"..." / r#"..."# / r##"..."## (0+ hashes) or regular "..." + "(r#*\"[\\s\\S]*?\"#*|\"(?:[^\"\\\\]|\\\\.)*\")", + r"\s*;" + ); + Ok(Regex::new(pattern)?) +} + +/// Token-aware scan: finds `#[gts_well_known_instance` or +/// `#[gts_macros::gts_well_known_instance` outside comments and string literals. +/// Returns `true` if at least one candidate attribute token is found. +/// +/// The `#[` prefix is required — bare identifiers (e.g. in `use` statements) +/// do not trigger a positive result, preventing false hard-errors downstream. +#[must_use] +pub fn preflight_scan(content: &str) -> bool { + // Both needles require the `#[` attribute-open prefix so that a bare + // identifier like `use gts_macros::gts_well_known_instance;` is never + // a match. NEEDLE_BARE covers `#[gts_well_known_instance(`, + // NEEDLE_QUAL covers `#[gts_macros::gts_well_known_instance(`. + const NEEDLE_BARE: &[u8] = b"#[gts_well_known_instance"; + const NEEDLE_QUAL: &[u8] = b"#[gts_macros::gts_well_known_instance"; + let bytes = content.as_bytes(); + let len = bytes.len(); + let mut i = 0; + + while i < len { + // Skip line comment `// ...` + if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' { + while i < len && bytes[i] != b'\n' { + i += 1; + } + continue; + } + // Skip block comment `/* ... */` + if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' { + i += 2; + while i + 1 < len && !(bytes[i] == b'*' && bytes[i + 1] == b'/') { + i += 1; + } + i += 2; // skip closing */ + continue; + } + // Skip regular string literal `"..."` + if bytes[i] == b'"' { + i += 1; + while i < len { + if bytes[i] == b'\\' { + i += 2; + continue; + } + if bytes[i] == b'"' { + i += 1; + break; + } + i += 1; + } + continue; + } + // Skip raw string literal `r#"..."#` (any number of hashes) + #[allow(clippy::collapsible_if)] + if bytes[i] == b'r' { + if let Some(after) = try_skip_raw_string(bytes, i) { + i = after; + continue; + } + } + // Skip char literal `'x'` / `'\n'` / `'\u{..}'` to avoid false positives + // on e.g. `'#'` or `'['` appearing near the needle by coincidence. + // + // IMPORTANT: We must NOT mistake Rust lifetimes (`'a`, `'static`) for + // char literals — doing so would scan forward until the next `'` and + // could skip a real `#[gts_well_known_instance` annotation (false negative). + // + // Strategy: tentatively walk past the char body, then check whether the + // byte at that position is actually `'` (the closing delimiter). If it + // is, we have a confirmed char literal and skip past it. If it is not, + // we are looking at a lifetime annotation — just advance past the opening + // `'` and resume normal scanning so no content is skipped. + if bytes[i] == b'\'' { + let mut j = i + 1; + if j < len && bytes[j] == b'\\' { + // Escaped char literal: '\n', '\\', '\u{NNNN}', etc. + j += 1; // skip backslash + while j < len && bytes[j] != b'\'' { + j += 1; + } + // j now points at closing ' (or end of input) + if j < len && bytes[j] == b'\'' { + i = j + 1; // skip past closing ' + } else { + i += 1; // malformed — just skip opening ' + } + } else if j < len && bytes[j] != b'\'' { + // Could be a single char `'x'` or a lifetime `'name`. + // Peek one further: if bytes[j+1] == '\'' it's a 1-char literal. + if j + 1 < len && bytes[j + 1] == b'\'' { + i = j + 2; // skip 'x' + } else { + // Not a char literal — lifetime or other use. Skip only the + // opening '\'' so the rest of the token is scanned normally. + i += 1; + } + } else { + // `''` — empty char literal (invalid Rust, but don't get stuck) + i += 1; + } + continue; + } + // Check for attribute-syntax needle (byte comparison — both needles are pure ASCII). + // Qualified form is checked first because it is strictly longer. + if bytes[i..].starts_with(NEEDLE_QUAL) || bytes[i..].starts_with(NEEDLE_BARE) { + return true; + } + i += 1; + } + false +} + +/// Attempt to skip a raw string starting at `start`. Returns `Some(new_i)` on success. +fn try_skip_raw_string(bytes: &[u8], start: usize) -> Option { + let len = bytes.len(); + let mut j = start + 1; // skip 'r' + let mut hashes = 0usize; + while j < len && bytes[j] == b'#' { + hashes += 1; + j += 1; + } + if j >= len || bytes[j] != b'"' { + return None; // not a raw string + } + j += 1; // skip opening " + loop { + if j >= len { + return None; // unterminated + } + if bytes[j] == b'"' { + let mut k = j + 1; + let mut closing = 0usize; + while k < len && bytes[k] == b'#' && closing < hashes { + closing += 1; + k += 1; + } + if closing == hashes { + return Some(k); + } + } + j += 1; + } +} + +/// Detect known unsupported annotation forms and emit actionable errors. +/// +/// NOTE: uses `(?s)` (dotall) flag so the attr body may span multiple lines. +fn check_unsupported_forms(content: &str, source_file: &str, line_offsets: &[usize]) -> Result<()> { + // static instead of const + let static_re = Regex::new( + r"(?s)#\[(?:gts_macros::)?gts_well_known_instance\(.*?\)\]\s*(?:#\[[^\]]*\]\s*)*(?:pub\s*(?:\([^)]*\)\s*)?)?static\s", + )?; + if let Some(m) = static_re.find(content) { + let line = byte_offset_to_line(m.start(), line_offsets); + bail!( + "{source_file}:{line}: `#[gts_well_known_instance]` cannot be applied to `static` items. \ + Use `const NAME: &str = ...` instead." + ); + } + + // concat!() as value + let concat_re = Regex::new( + r"(?s)#\[(?:gts_macros::)?gts_well_known_instance\(.*?\)\]\s*(?:#\[[^\]]*\]\s*)*(?:pub\s*(?:\([^)]*\)\s*)?)?const\s+\w+\s*:\s*&\s*(?:'static\s+)?str\s*=\s*concat\s*!", + )?; + if let Some(m) = concat_re.find(content) { + let line = byte_offset_to_line(m.start(), line_offsets); + bail!( + "{source_file}:{line}: `concat!()` is not supported as the const value for \ + `#[gts_well_known_instance]`. Use a raw string literal `r#\"...\"#` instead." + ); + } + + // const with wrong type (not &str) — checked last as it's broader + // Note: we use a positive match for the non-&str case to avoid lookahead + let wrong_type_re = Regex::new( + r"(?s)#\[(?:gts_macros::)?gts_well_known_instance\(.*?\)\]\s*(?:#\[[^\]]*\]\s*)*(?:pub\s*(?:\([^)]*\)\s*)?)?const\s+\w+\s*:\s*&\s*(?:'static\s+)?([A-Za-z][A-Za-z0-9_]*)\b", + )?; + if let Some(cap) = wrong_type_re.captures(content) { + let ty = cap.get(1).map_or("", |m| m.as_str()); + if ty != "str" { + let start = cap.get(0).map_or(0, |m| m.start()); + let line = byte_offset_to_line(start, line_offsets); + bail!( + "{source_file}:{line}: `#[gts_well_known_instance]` requires `const NAME: &str`. \ + The annotated const must have type `&str`." + ); + } + } + + Ok(()) +} + +/// Build a byte-offset to line number index (line 1 = offset 0). +#[must_use] +pub fn build_line_offsets(content: &str) -> Vec { + let mut offsets = vec![0usize]; + for (i, b) in content.bytes().enumerate() { + if b == b'\n' { + offsets.push(i + 1); + } + } + offsets +} + +/// Convert a byte offset to a 1-based line number. +#[must_use] +pub fn byte_offset_to_line(offset: usize, line_offsets: &[usize]) -> usize { + match line_offsets.binary_search(&offset) { + Ok(i) => i + 1, + Err(i) => i, + } +} + +/// Strip line and block comments from source, replacing them with whitespace +/// to preserve byte offsets (and thus line numbers). +fn strip_comments(content: &str) -> String { + let bytes = content.as_bytes(); + let len = bytes.len(); + let mut out = content.to_owned().into_bytes(); + let mut i = 0; + while i < len { + // Line comment: replace up to (not including) the newline. + // Only blank ASCII bytes — non-ASCII bytes are left intact so the + // output remains valid UTF-8 (multi-byte sequences can't be part of + // the pure-ASCII annotation needle). + if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' { + while i < len && bytes[i] != b'\n' { + if bytes[i].is_ascii() { + out[i] = b' '; + } + i += 1; + } + continue; + } + // Block comment: replace including delimiters + if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' { + out[i] = b' '; + out[i + 1] = b' '; + i += 2; + while i + 1 < len && !(bytes[i] == b'*' && bytes[i + 1] == b'/') { + if bytes[i] != b'\n' && bytes[i].is_ascii() { + out[i] = b' '; + } + i += 1; + } + if i + 1 < len { + out[i] = b' '; + out[i + 1] = b' '; + i += 2; + } + continue; + } + // Skip over string literals unchanged (so we don't blank real code) + if bytes[i] == b'"' { + i += 1; + while i < len { + if bytes[i] == b'\\' { + i += 2; + continue; + } + if bytes[i] == b'"' { + i += 1; + break; + } + i += 1; + } + continue; + } + #[allow(clippy::collapsible_if)] + if bytes[i] == b'r' { + if let Some(after) = try_skip_raw_string(bytes, i) { + i = after; + continue; + } + } + i += 1; + } + String::from_utf8(out).unwrap_or_else(|_| content.to_owned()) +} + +/// Find the 1-based line of the first `#[...gts_well_known_instance` attribute in `content`. +/// Checks the qualified form first (longer), then the bare form. +fn find_needle_line(content: &str, line_offsets: &[usize]) -> usize { + let pos = content + .find("#[gts_macros::gts_well_known_instance") + .or_else(|| content.find("#[gts_well_known_instance")); + pos.map_or(1, |p| byte_offset_to_line(p, line_offsets)) +} + +fn json_type_name(val: &serde_json::Value) -> &'static str { + match val { + serde_json::Value::Null => "null", + serde_json::Value::Bool(_) => "boolean", + serde_json::Value::Number(_) => "number", + serde_json::Value::String(_) => "string", + serde_json::Value::Array(_) => "array", + serde_json::Value::Object(_) => "object", + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn src(body: &str) -> String { + format!( + concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"{}\"\n", + ")]\n", + "const FOO: &str = {};\n" + ), + "x.commerce._.orders.v1.0", body + ) + } + + #[test] + fn test_preflight_positive() { + assert!(preflight_scan("#[gts_well_known_instance(x)]")); + } + + #[test] + fn test_preflight_negative_in_comment() { + assert!(!preflight_scan("// #[gts_well_known_instance]")); + } + + #[test] + fn test_preflight_negative_in_block_comment() { + assert!(!preflight_scan("/* #[gts_well_known_instance] */")); + } + + #[test] + fn test_preflight_positive_qualified_path() { + assert!(preflight_scan("#[gts_macros::gts_well_known_instance(x)]")); + } + + #[test] + fn test_preflight_negative_bare_use_statement() { + // `use gts_macros::gts_well_known_instance;` must NOT be a positive — + // it lacks the required `#[` attribute-open prefix. + assert!(!preflight_scan( + "use gts_macros::gts_well_known_instance;\nconst X: u32 = 1;\n" + )); + } + + #[test] + fn test_preflight_positive_after_static_lifetime() { + // `'static` before the annotation must NOT suppress it (false-negative fix). + let src = concat!( + "fn foo(x: &'static str) -> u32 { 0 }\n", + "#[gts_well_known_instance(x)]\n" + ); + assert!(preflight_scan(src)); + } + + #[test] + fn test_preflight_positive_after_named_lifetime() { + // `'a` lifetime before the annotation must NOT suppress it. + let src = concat!( + "fn bar<'a>(x: &'a str) -> &'a str { x }\n", + "#[gts_well_known_instance(x)]\n" + ); + assert!(preflight_scan(src)); + } + + #[test] + fn test_preflight_positive_char_literal_hash() { + // A char literal containing '#' must not be the needle itself. + // But the real annotation after it must still be found. + let src = concat!( + "fn check(c: char) -> bool { c == '#' }\n", + "#[gts_well_known_instance(x)]\n" + ); + assert!(preflight_scan(src)); + } + + #[test] + fn test_extract_regular_string() { + let content = src(r#""{\"name\": \"orders\"}""#); + let result = extract_instances_from_source(&content, Path::new("t.rs")).unwrap(); + assert_eq!(result.len(), 1); + assert_eq!(result[0].attrs.schema_id, "gts.x.core.events.topic.v1~"); + assert_eq!(result[0].attrs.instance_segment, "x.commerce._.orders.v1.0"); + } + + #[test] + fn test_no_annotation_returns_empty() { + let content = "const FOO: &str = \"hello\";"; + let result = extract_instances_from_source(content, Path::new("t.rs")).unwrap(); + assert!(result.is_empty()); + } + + #[test] + fn test_rejects_id_in_body() { + let content = src(r#""{\"id\": \"bad\", \"name\": \"x\"}""#); + let err = extract_instances_from_source(&content, Path::new("t.rs")).unwrap_err(); + assert!(err.to_string().contains("\"id\" field")); + } + + #[test] + fn test_rejects_non_object_json() { + let content = src("\"[1, 2, 3]\""); + let err = extract_instances_from_source(&content, Path::new("t.rs")).unwrap_err(); + assert!(err.to_string().contains("JSON object")); + } + + #[test] + fn test_rejects_malformed_json() { + let content = src(r#""{not valid json}""#); + let err = extract_instances_from_source(&content, Path::new("t.rs")).unwrap_err(); + assert!(err.to_string().contains("Malformed JSON")); + } + + #[test] + fn test_rejects_static_item() { + let content = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.foo.v1~\",\n", + " instance_segment = \"x.bar.v1.0\"\n", + ")]\n", + "static FOO: &str = \"{}\";\n" + ); + let err = extract_instances_from_source(content, Path::new("t.rs")).unwrap_err(); + assert!(err.to_string().contains("static")); + } + + #[test] + fn test_rejects_concat_macro() { + let content = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.foo.v1~\",\n", + " instance_segment = \"x.bar.v1.0\"\n", + ")]\n", + "const FOO: &str = concat!(\"{\", \"}\");\n" + ); + let err = extract_instances_from_source(content, Path::new("t.rs")).unwrap_err(); + assert!(err.to_string().contains("concat!()")); + } + + #[test] + fn test_multiple_annotations_in_one_file() { + let content = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "const A: &str = \"{\\\"name\\\": \\\"orders\\\"}\";\n", + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.payments.v1.0\"\n", + ")]\n", + "const B: &str = \"{\\\"name\\\": \\\"payments\\\"}\";\n" + ); + let result = extract_instances_from_source(content, Path::new("t.rs")).unwrap(); + assert_eq!(result.len(), 2); + } + + #[test] + fn test_pub_visibility_accepted() { + let content = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const FOO: &str = \"{\\\"name\\\": \\\"orders\\\"}\";\n" + ); + let result = extract_instances_from_source(content, Path::new("t.rs")).unwrap(); + assert_eq!(result.len(), 1); + } + + #[test] + fn test_line_number_reported() { + let content = concat!( + "// line 1\n", + "// line 2\n", + "#[gts_well_known_instance(\n", // line 3 + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.foo.v1~\",\n", + " instance_segment = \"x.bar.v1.0\"\n", + ")]\n", + "const FOO: &str = \"{\\\"id\\\": \\\"bad\\\"}\";\n" + ); + let err = extract_instances_from_source(content, Path::new("events.rs")).unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("events.rs")); + // line 3 is where the annotation starts + assert!(msg.contains(":3:"), "Expected line 3 in: {msg}"); + } +} diff --git a/gts-cli/src/gen_instances/string_lit.rs b/gts-cli/src/gen_instances/string_lit.rs new file mode 100644 index 0000000..b517bd8 --- /dev/null +++ b/gts-cli/src/gen_instances/string_lit.rs @@ -0,0 +1,132 @@ +use anyhow::{Result, bail}; + +/// Decode a Rust string literal token to its actual string content. +/// +/// Supports: +/// - Raw strings: `r#"..."#`, `r##"..."##`, etc. (content is verbatim) +/// - Regular strings: `"..."` with standard Rust escape sequences +/// +/// # Errors +/// Returns an error if the token is not a recognized string literal form or contains invalid escapes. +pub fn decode_string_literal(token: &str) -> Result { + if token.starts_with('r') { + decode_raw_string(token) + } else if token.starts_with('"') && token.ends_with('"') && token.len() >= 2 { + let inner = &token[1..token.len() - 1]; + decode_string_escapes(inner) + } else { + bail!( + "Unrecognized string literal form: {}", + &token[..token.len().min(40)] + ) + } +} + +fn decode_raw_string(token: &str) -> Result { + let after_r = &token[1..]; + let hash_count = after_r.chars().take_while(|&c| c == '#').count(); + let inner = &after_r[hash_count..]; + let inner = inner + .strip_prefix('"') + .ok_or_else(|| anyhow::anyhow!("Invalid raw string literal: missing opening quote"))?; + let closing = format!("\"{}", "#".repeat(hash_count)); + let inner = inner.strip_suffix(closing.as_str()).ok_or_else(|| { + anyhow::anyhow!("Invalid raw string literal: missing closing quote+hashes") + })?; + Ok(inner.to_owned()) +} + +fn decode_string_escapes(s: &str) -> Result { + let mut result = String::with_capacity(s.len()); + let mut chars = s.chars().peekable(); + while let Some(c) = chars.next() { + if c != '\\' { + result.push(c); + continue; + } + match chars.next() { + Some('n') => result.push('\n'), + Some('r') => result.push('\r'), + Some('t') => result.push('\t'), + Some('\\') => result.push('\\'), + Some('"') => result.push('"'), + Some('\'') => result.push('\''), + Some('0') => result.push('\0'), + Some('u') => { + if chars.next() != Some('{') { + bail!("Invalid unicode escape: expected {{"); + } + let hex: String = chars.by_ref().take_while(|&c| c != '}').collect(); + let code = u32::from_str_radix(&hex, 16) + .map_err(|_| anyhow::anyhow!("Invalid unicode escape \\u{{{hex}}}"))?; + let ch = char::from_u32(code) + .ok_or_else(|| anyhow::anyhow!("Invalid unicode code point {code}"))?; + result.push(ch); + } + Some(c) => bail!("Unsupported escape sequence: \\{c}"), + None => bail!("Unexpected end of string after backslash"), + } + } + Ok(result) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_raw_string_no_hashes() { + let token = "r\"{\\\"k\\\": 1}\""; + // r"{\"k\": 1}" — content is verbatim including backslashes + let result = decode_string_literal(token).unwrap(); + assert_eq!(result, "{\\\"k\\\": 1}"); + } + + #[test] + fn test_raw_string_one_hash() { + // Simulated: r#"{"k": 1}"# + let token = "r#\"{\\\"k\\\": 1}\"#"; + let result = decode_string_literal(token).unwrap(); + assert_eq!(result, "{\\\"k\\\": 1}"); + } + + #[test] + fn test_regular_string_simple() { + let token = "\"hello world\""; + let result = decode_string_literal(token).unwrap(); + assert_eq!(result, "hello world"); + } + + #[test] + fn test_regular_string_escapes() { + let token = "\"line1\\nline2\\ttab\""; + let result = decode_string_literal(token).unwrap(); + assert_eq!(result, "line1\nline2\ttab"); + } + + #[test] + fn test_regular_string_escaped_quote() { + let token = r#""{\"name\":\"foo\"}""#; + let result = decode_string_literal(token).unwrap(); + assert_eq!(result, "{\"name\":\"foo\"}"); + } + + #[test] + fn test_unicode_escape() { + let token = "\"\\u{1F600}\""; + let result = decode_string_literal(token).unwrap(); + assert_eq!(result, "\u{1F600}"); + } + + #[test] + fn test_invalid_escape() { + let token = "\"\\q\""; + assert!(decode_string_literal(token).is_err()); + } + + #[test] + fn test_unrecognized_form() { + let token = "b\"bytes\""; + assert!(decode_string_literal(token).is_err()); + } +} diff --git a/gts-cli/src/gen_instances/writer.rs b/gts-cli/src/gen_instances/writer.rs new file mode 100644 index 0000000..6cbd3ac --- /dev/null +++ b/gts-cli/src/gen_instances/writer.rs @@ -0,0 +1,181 @@ +use anyhow::{Result, bail}; +use std::fs; +use std::path::Path; + +use crate::gen_common::safe_canonicalize_nonexistent; + +use super::parser::ParsedInstance; + +/// Generate the instance JSON file for a single parsed annotation. +/// +/// Validates the output path against the sandbox boundary **before** any +/// filesystem mutations (validate-before-mkdir policy). +/// +/// Injects the `"id"` field (composed as `schema_id + instance_segment`) into +/// the JSON object before writing. +/// +/// Returns the written file path as a `String` on success. +/// +/// # Errors +/// Returns an error if the output path escapes the sandbox or if the file cannot be written. +pub fn generate_single_instance( + inst: &ParsedInstance, + output: Option<&str>, + sandbox_root: &Path, +) -> Result { + let composed = format!("{}{}", inst.attrs.schema_id, inst.attrs.instance_segment); + let file_rel = format!("{}/{}.instance.json", inst.attrs.dir_path, composed); + + let raw_output_path = if let Some(od) = output { + Path::new(od).join(&file_rel) + } else { + let src_dir = Path::new(&inst.source_file) + .parent() + .unwrap_or(sandbox_root); + src_dir.join(&file_rel) + }; + + // Validate sandbox boundary BEFORE any filesystem writes + let output_canonical = safe_canonicalize_nonexistent(&raw_output_path).map_err(|e| { + anyhow::anyhow!( + "Security error - dir_path '{}' in {}: {}", + inst.attrs.dir_path, + inst.source_file, + e + ) + })?; + + if !output_canonical.starts_with(sandbox_root) { + bail!( + "Security error in {} - dir_path '{}' escapes sandbox boundary. \ + Resolved to: {}, but must be within: {}", + inst.source_file, + inst.attrs.dir_path, + output_canonical.display(), + sandbox_root.display() + ); + } + + // Build JSON with injected "id" field + let mut obj: serde_json::Map = + serde_json::from_str(&inst.json_body)?; + obj.insert("id".to_owned(), serde_json::Value::String(composed)); + let output_value = serde_json::Value::Object(obj); + + // Create parent directories only after sandbox validation passes + if let Some(parent) = raw_output_path.parent() { + fs::create_dir_all(parent)?; + } + fs::write( + &raw_output_path, + serde_json::to_string_pretty(&output_value)?, + )?; + + Ok(raw_output_path.display().to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::gen_instances::attrs::InstanceAttrs; + use tempfile::TempDir; + + fn make_inst( + dir_path: &str, + schema_id: &str, + instance_segment: &str, + json_body: &str, + source_file: &str, + ) -> ParsedInstance { + ParsedInstance { + attrs: InstanceAttrs { + dir_path: dir_path.to_owned(), + schema_id: schema_id.to_owned(), + instance_segment: instance_segment.to_owned(), + }, + json_body: json_body.to_owned(), + source_file: source_file.to_owned(), + line: 1, + } + } + + #[test] + fn test_generates_file_with_id_injected() { + let temp = TempDir::new().unwrap(); + let sandbox = temp.path().canonicalize().unwrap(); + let src = sandbox.join("module.rs"); + + let inst = make_inst( + "instances", + "gts.x.core.events.topic.v1~", + "x.commerce._.orders.v1.0", + r#"{"name": "orders", "partitions": 16}"#, + src.to_str().unwrap(), + ); + + let result = generate_single_instance(&inst, Some(sandbox.to_str().unwrap()), &sandbox); + assert!(result.is_ok(), "{result:?}"); + + let expected = sandbox + .join("instances") + .join("gts.x.core.events.topic.v1~x.commerce._.orders.v1.0.instance.json"); + assert!(expected.exists()); + + let content: serde_json::Value = + serde_json::from_str(&fs::read_to_string(&expected).unwrap()).unwrap(); + assert_eq!( + content["id"], + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" + ); + assert_eq!(content["name"], "orders"); + assert_eq!(content["partitions"], 16); + } + + #[test] + fn test_sandbox_escape_rejected() { + let temp = TempDir::new().unwrap(); + let sandbox = temp.path().canonicalize().unwrap(); + let src = sandbox.join("module.rs"); + + let inst = make_inst( + "../../etc", + "gts.x.core.events.topic.v1~", + "x.commerce._.orders.v1.0", + r#"{"name": "x"}"#, + src.to_str().unwrap(), + ); + + let result = generate_single_instance(&inst, Some(sandbox.to_str().unwrap()), &sandbox); + assert!(result.is_err()); + let msg = result.unwrap_err().to_string(); + assert!( + msg.contains("Security error") || msg.contains("sandbox"), + "Unexpected error: {msg}" + ); + } + + #[test] + fn test_output_uses_source_dir_when_no_output_override() { + let temp = TempDir::new().unwrap(); + let sandbox = temp.path().canonicalize().unwrap(); + let src = sandbox.join("subdir").join("module.rs"); + fs::create_dir_all(src.parent().unwrap()).unwrap(); + + let inst = make_inst( + "instances", + "gts.x.core.events.topic.v1~", + "x.commerce._.orders.v1.0", + r#"{"name": "x"}"#, + src.to_str().unwrap(), + ); + + let result = generate_single_instance(&inst, None, &sandbox); + assert!(result.is_ok(), "{result:?}"); + + let expected = sandbox + .join("subdir") + .join("instances") + .join("gts.x.core.events.topic.v1~x.commerce._.orders.v1.0.instance.json"); + assert!(expected.exists()); + } +} diff --git a/gts-cli/src/gen_schemas.rs b/gts-cli/src/gen_schemas.rs index bafdb58..0725abc 100644 --- a/gts-cli/src/gen_schemas.rs +++ b/gts-cli/src/gen_schemas.rs @@ -4,28 +4,8 @@ use regex::Regex; use std::collections::HashMap; use std::fs; use std::path::Path; -use walkdir::WalkDir; -/// Directories that are automatically ignored (e.g., trybuild `compile_fail` tests) -const AUTO_IGNORE_DIRS: &[&str] = &["compile_fail"]; - -/// Reason why a file was skipped -#[derive(Debug, Clone, Copy)] -enum SkipReason { - ExcludePattern, - AutoIgnoredDir, - IgnoreDirective, -} - -impl std::fmt::Display for SkipReason { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::ExcludePattern => write!(f, "matched --exclude pattern"), - Self::AutoIgnoredDir => write!(f, "in auto-ignored directory (compile_fail)"), - Self::IgnoreDirective => write!(f, "has // gts:ignore directive"), - } - } -} +use crate::gen_common::{compute_sandbox_root, safe_canonicalize_nonexistent, walk_rust_files}; /// Parsed macro attributes from `#[struct_to_gts_schema(...)]` #[derive(Debug, Clone)] @@ -58,7 +38,7 @@ enum BaseAttr { /// /// Returns an error if: /// - The source path does not exist -/// - The output path is outside the source repository +/// - The output path is outside the sandbox boundary /// - File I/O operations fail pub fn generate_schemas_from_rust( source: &str, @@ -73,73 +53,20 @@ pub fn generate_schemas_from_rust( bail!("Source path does not exist: {source}"); } - // Canonicalize source path to detect path traversal attempts let source_canonical = source_path.canonicalize()?; + let sandbox_root = compute_sandbox_root(&source_canonical, output)?; let mut schemas_generated = 0; - let mut files_scanned = 0; - let mut files_skipped = 0; - - // Walk through all .rs files - for entry in WalkDir::new(source_path) - .follow_links(true) - .into_iter() - .filter_map(Result::ok) - { - let path = entry.path(); - if path.extension().and_then(|s| s.to_str()) != Some("rs") { - continue; - } - - // Check if path should be excluded - if should_exclude_path(path, exclude_patterns) { - files_skipped += 1; - if verbose > 0 { - println!( - " Skipped: {} ({})", - path.display(), - SkipReason::ExcludePattern - ); - } - continue; - } - - // Check for auto-ignored directories (e.g., compile_fail) - if is_in_auto_ignored_dir(path) { - files_skipped += 1; - if verbose > 0 { - println!( - " Skipped: {} ({})", - path.display(), - SkipReason::AutoIgnoredDir - ); - } - continue; - } - files_scanned += 1; - if let Ok(content) = fs::read_to_string(path) { - // Check for gts:ignore directive - if has_ignore_directive(&content) { - files_skipped += 1; - if verbose > 0 { - println!( - " Skipped: {} ({})", - path.display(), - SkipReason::IgnoreDirective - ); - } - continue; - } - - // Parse the file and extract schema information - let results = extract_and_generate_schemas(&content, output, &source_canonical, path)?; + let (files_scanned, files_skipped) = + walk_rust_files(source_path, exclude_patterns, verbose, |path, content| { + let results = extract_and_generate_schemas(content, output, &sandbox_root, path)?; schemas_generated += results.len(); for (schema_id, file_path) in results { println!(" Generated schema: {schema_id} @ {file_path}"); } - } - } + Ok(()) + })?; println!("\nSummary:"); println!(" Files scanned: {files_scanned}"); @@ -155,68 +82,6 @@ pub fn generate_schemas_from_rust( Ok(()) } -/// Check if a path matches any of the exclude patterns -fn should_exclude_path(path: &Path, patterns: &[String]) -> bool { - let path_str = path.to_string_lossy(); - - for pattern in patterns { - if matches_glob_pattern(&path_str, pattern) { - return true; - } - } - - false -} - -/// Simple glob pattern matching -/// Supports: * (any characters), ** (any path segments) -fn matches_glob_pattern(path: &str, pattern: &str) -> bool { - // Convert glob pattern to regex - let regex_pattern = pattern - .replace('.', r"\.") - .replace("**", "<>") - .replace('*', "[^/]*") - .replace("<>", ".*"); - - if let Ok(re) = Regex::new(&format!("(^|/){regex_pattern}($|/)")) { - re.is_match(path) - } else { - // Fallback to simple contains check - path.contains(pattern) - } -} - -/// Check if path is in an auto-ignored directory (e.g., `compile_fail`) -fn is_in_auto_ignored_dir(path: &Path) -> bool { - path.components().any(|component| { - if let Some(name) = component.as_os_str().to_str() { - AUTO_IGNORE_DIRS.contains(&name) - } else { - false - } - }) -} - -/// Check if file content starts with the gts:ignore directive -fn has_ignore_directive(content: &str) -> bool { - // Check first few lines for the directive - for line in content.lines().take(10) { - let trimmed = line.trim(); - if trimmed.is_empty() { - continue; - } - // Check for the directive (case-insensitive) - if trimmed.to_lowercase().starts_with("// gts:ignore") { - return true; - } - // If we hit a non-comment, non-empty line, stop looking - if !trimmed.starts_with("//") && !trimmed.starts_with("#!") { - break; - } - } - false -} - /// Parse the attribute body of `#[struct_to_gts_schema(...)]` to extract individual attributes fn parse_macro_attrs(attr_body: &str) -> Option { // Patterns for extracting individual attributes @@ -267,7 +132,7 @@ fn parse_macro_attrs(attr_body: &str) -> Option { fn extract_and_generate_schemas( content: &str, output_override: Option<&str>, - source_root: &Path, + sandbox_root: &Path, source_file: &Path, ) -> Result> { // Match #[struct_to_gts_schema(...)] followed by struct definition @@ -292,44 +157,36 @@ fn extract_and_generate_schemas( continue; }; - // Convert schema_id to filename-safe format - // e.g., "gts.x.core.events.type.v1~" -> "gts.x.core.events.type.v1~" let schema_file_rel = format!("{}/{}.schema.json", attrs.dir_path, attrs.schema_id); - // Determine output path - let output_path = if let Some(output_dir) = output_override { - // Use CLI-provided output directory + // Determine raw (unvalidated) output path + let raw_output_path = if let Some(output_dir) = output_override { Path::new(output_dir).join(&schema_file_rel) } else { - // Use path from macro (relative to source file's directory) - let source_dir = source_file.parent().unwrap_or(source_root); + let source_dir = source_file.parent().unwrap_or(sandbox_root); source_dir.join(&schema_file_rel) }; - // Security check: ensure output path doesn't escape source repository - let output_canonical = if output_path.exists() { - output_path.canonicalize()? - } else { - // For non-existent files, canonicalize the parent directory - let parent = output_path.parent().unwrap_or(Path::new(".")); - fs::create_dir_all(parent)?; - let parent_canonical = parent.canonicalize()?; - let file_name = output_path - .file_name() - .ok_or_else(|| anyhow::anyhow!("Invalid output path: no file name"))?; - parent_canonical.join(file_name) - }; + // Validate path against sandbox BEFORE any filesystem writes (fix validate-before-mkdir bug) + let output_canonical = safe_canonicalize_nonexistent(&raw_output_path).map_err(|e| { + anyhow::anyhow!( + "Security error in {}:{} - dir_path '{}': {}", + source_file.display(), + struct_name, + attrs.dir_path, + e + ) + })?; - // Check if output path is within source repository - if !output_canonical.starts_with(source_root) { + if !output_canonical.starts_with(sandbox_root) { bail!( - "Security error in {}:{} - dir_path '{}' attempts to write outside source repository. \ + "Security error in {}:{} - dir_path '{}' attempts to write outside sandbox boundary. \ Resolved to: {}, but must be within: {}", source_file.display(), struct_name, attrs.dir_path, output_canonical.display(), - source_root.display() + sandbox_root.display() ); } @@ -352,16 +209,15 @@ fn extract_and_generate_schemas( &field_types, ); - // Create parent directories - if let Some(parent) = output_path.parent() { + // Create parent directories (only after sandbox validation passes) + if let Some(parent) = raw_output_path.parent() { fs::create_dir_all(parent)?; } // Write schema file - fs::write(&output_path, serde_json::to_string_pretty(&schema)?)?; + fs::write(&raw_output_path, serde_json::to_string_pretty(&schema)?)?; - // Add to results (schema_id, file_path) - results.push((attrs.schema_id, output_path.display().to_string())); + results.push((attrs.schema_id, raw_output_path.display().to_string())); } Ok(results) @@ -556,10 +412,12 @@ fn rust_type_to_json_schema(rust_type: &str) -> (bool, serde_json::Value) { #[cfg(test)] mod tests { use super::*; + use crate::gen_common::{ + has_ignore_directive, is_in_auto_ignored_dir, matches_glob_pattern, should_exclude_path, + }; #[test] fn test_matches_glob_pattern() { - // Test simple patterns assert!(matches_glob_pattern( "src/tests/compile_fail/test.rs", "compile_fail" @@ -568,12 +426,8 @@ mod tests { "tests/compile_fail/test.rs", "compile_fail" )); - - // Test wildcard patterns assert!(matches_glob_pattern("src/tests/foo.rs", "tests/*")); assert!(matches_glob_pattern("src/examples/bar.rs", "examples/*")); - - // Test double-star patterns assert!(matches_glob_pattern("a/b/c/d/test.rs", "**/test.rs")); } diff --git a/gts-cli/src/lib.rs b/gts-cli/src/lib.rs index 882c8c2..02aafc6 100644 --- a/gts-cli/src/lib.rs +++ b/gts-cli/src/lib.rs @@ -2,6 +2,8 @@ pub mod logging; // Other modules needed by CLI +pub mod gen_common; +pub mod gen_instances; pub mod gen_schemas; pub mod server; diff --git a/gts-cli/src/main.rs b/gts-cli/src/main.rs index 86fc274..b30e40b 100644 --- a/gts-cli/src/main.rs +++ b/gts-cli/src/main.rs @@ -11,6 +11,8 @@ )] mod cli; +mod gen_common; +mod gen_instances; mod gen_schemas; mod logging; mod server; diff --git a/gts-cli/tests/cli_run_tests.rs b/gts-cli/tests/cli_run_tests.rs index dd86a09..32a0f2f 100644 --- a/gts-cli/tests/cli_run_tests.rs +++ b/gts-cli/tests/cli_run_tests.rs @@ -220,6 +220,7 @@ async fn test_run_generate_from_rust_command() -> Result<()> { source: source_path.to_str().unwrap().to_owned(), output: Some(output_path.to_str().unwrap().to_owned()), exclude: vec![], + mode: gts_cli::cli::GenerateMode::Schemas, }, }; diff --git a/gts-cli/tests/gen_instances_tests.rs b/gts-cli/tests/gen_instances_tests.rs new file mode 100644 index 0000000..a6a0851 --- /dev/null +++ b/gts-cli/tests/gen_instances_tests.rs @@ -0,0 +1,774 @@ +#![allow(clippy::unwrap_used)] +//! Integration tests for `generate_instances_from_rust`. +//! +//! These tests cover: +//! - Single instance generation (golden fixture) +//! - Multiple instances in one file +//! - Multiple files in a directory +//! - `pub` and `pub(crate)` visibility +//! - Raw string literals (r#"..."#) +//! - `--output` override path +//! - Source file adjacent output (no --output) +//! - Duplicate instance ID hard error +//! - Duplicate output path hard error +//! - Sandbox escape rejection +//! - Exclude pattern skips file +//! - Missing source path error +//! - `// gts:ignore` directive skips file +//! - JSON `"id"` field injection (never in body) +//! - `concat!()` form rejected +//! - `static` form rejected +//! - Wrong const type rejected + +use anyhow::Result; +use gts_cli::gen_instances::generate_instances_from_rust; +use std::fs; +use std::path::Path; +use tempfile::TempDir; + +// ───────────────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────────────── + +fn sandbox() -> (TempDir, std::path::PathBuf) { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().canonicalize().unwrap(); + (tmp, root) +} + +fn write(dir: &Path, name: &str, content: &str) { + fs::write(dir.join(name), content).unwrap(); +} + +fn instance_src(instance_segment: &str, json_body: &str) -> String { + format!( + concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"{seg}\"\n", + ")]\n", + "pub const INST: &str = {body};\n" + ), + seg = instance_segment, + body = json_body + ) +} + +fn run(source: &str, output: Option<&str>, exclude: &[&str]) -> Result<()> { + let excl: Vec = exclude.iter().map(ToString::to_string).collect(); + generate_instances_from_rust(source, output, &excl, 0) +} + +fn read_json(path: &Path) -> serde_json::Value { + serde_json::from_str(&fs::read_to_string(path).unwrap()).unwrap() +} + +fn inst_path(root: &Path, id: &str) -> std::path::PathBuf { + root.join("instances").join(format!("{id}.instance.json")) +} + +// ───────────────────────────────────────────────────────────────────────────── +// Golden fixture – single instance +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn golden_single_instance() { + let (_tmp, root) = sandbox(); + let src = instance_src( + "x.commerce._.orders.v1.0", + r#""{\"name\":\"orders\",\"partitions\":16}""#, + ); + write(&root, "events.rs", &src); + + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); + + let id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0"; + let out = inst_path(&root, id); + assert!(out.exists(), "Expected file: {}", out.display()); + + let val = read_json(&out); + assert_eq!(val["id"], id); + assert_eq!(val["name"], "orders"); + assert_eq!(val["partitions"], 16); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Golden fixture – raw string literal +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn golden_raw_string_literal() { + let (_tmp, root) = sandbox(); + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.payments.v1.0\"\n", + ")]\n", + "pub const PAYMENTS: &str = r#\"{\"name\":\"payments\",\"partitions\":8}\"#;\n" + ); + write(&root, "events.rs", src); + + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); + + let id = "gts.x.core.events.topic.v1~x.commerce._.payments.v1.0"; + let out = inst_path(&root, id); + assert!(out.exists()); + + let val = read_json(&out); + assert_eq!(val["id"], id); + assert_eq!(val["name"], "payments"); + assert_eq!(val["partitions"], 8); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Multiple instances in one file +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn multiple_instances_in_one_file() { + let (_tmp, root) = sandbox(); + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const A: &str = \"{\\\"name\\\":\\\"orders\\\"}\";\n", + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.payments.v1.0\"\n", + ")]\n", + "pub const B: &str = \"{\\\"name\\\":\\\"payments\\\"}\";\n" + ); + write(&root, "events.rs", src); + + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); + + assert!(inst_path(&root, "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0").exists()); + assert!( + inst_path( + &root, + "gts.x.core.events.topic.v1~x.commerce._.payments.v1.0" + ) + .exists() + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Multiple files in a directory +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn multiple_files_in_directory() { + let (_tmp, root) = sandbox(); + + write( + &root, + "a.rs", + &instance_src("x.commerce._.orders.v1.0", "\"{\\\"name\\\":\\\"a\\\"}\""), + ); + write( + &root, + "b.rs", + &instance_src("x.commerce._.payments.v1.0", "\"{\\\"name\\\":\\\"b\\\"}\""), + ); + + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); + + assert!(inst_path(&root, "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0").exists()); + assert!( + inst_path( + &root, + "gts.x.core.events.topic.v1~x.commerce._.payments.v1.0" + ) + .exists() + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// pub(crate) visibility is accepted +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn pub_crate_visibility_accepted() { + let (_tmp, root) = sandbox(); + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub(crate) const FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" + ); + write(&root, "events.rs", src); + + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); + + assert!(inst_path(&root, "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0").exists()); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Output uses source file's parent directory when --output is not given +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn output_adjacent_to_source_when_no_override() { + let (_tmp, root) = sandbox(); + let subdir = root.join("submodule"); + fs::create_dir_all(&subdir).unwrap(); + write( + &subdir, + "topic.rs", + &instance_src( + "x.commerce._.orders.v1.0", + "\"{\\\"name\\\":\\\"orders\\\"}\"", + ), + ); + + // Pass the subdir as the source (single file) + let src_file = subdir.join("topic.rs"); + run(src_file.to_str().unwrap(), None, &[]).unwrap(); + + let expected = subdir + .join("instances") + .join("gts.x.core.events.topic.v1~x.commerce._.orders.v1.0.instance.json"); + assert!(expected.exists(), "Expected: {}", expected.display()); +} + +// ───────────────────────────────────────────────────────────────────────────── +// id field is injected and overrides any body field named "id" — BODY REJECTED +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn id_field_in_body_is_rejected() { + let (_tmp, root) = sandbox(); + write( + &root, + "events.rs", + &instance_src( + "x.commerce._.orders.v1.0", + "\"{\\\"id\\\":\\\"bad\\\",\\\"name\\\":\\\"x\\\"}\"", + ), + ); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + assert!(err.to_string().contains("\"id\" field"), "Got: {err}"); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Duplicate instance ID → hard error +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn duplicate_instance_id_hard_error() { + let (_tmp, root) = sandbox(); + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const A: &str = \"{\\\"name\\\":\\\"a\\\"}\";\n", + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const B: &str = \"{\\\"name\\\":\\\"b\\\"}\";\n" + ); + write(&root, "dup.rs", src); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + assert!( + err.to_string().contains("duplicate instance ID"), + "Got: {err}" + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Sandbox escape via dir_path → hard error (validate-before-mkdir) +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn sandbox_escape_rejected() { + let (_tmp, root) = sandbox(); + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"../../etc\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" + ); + write(&root, "escape.rs", src); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("Security error") || msg.contains("sandbox") || msg.contains("'..'"), + "Got: {msg}" + ); + // No directory should have been created outside sandbox + assert!(!root.join("../../etc").exists()); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Exclude pattern skips a file even if it contains valid annotations +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn exclude_pattern_skips_file() { + let (_tmp, root) = sandbox(); + // Write a file with a malformed annotation that would cause a hard error if scanned + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"bad-no-tilde\",\n", + " instance_segment = \"x.a.v1.0\"\n", + ")]\n", + "pub const X: &str = \"{}\";\n" + ); + write(&root, "excluded_file.rs", src); + + // Should succeed because the file is excluded + run( + root.to_str().unwrap(), + Some(root.to_str().unwrap()), + &["excluded_file.rs"], + ) + .unwrap(); +} + +// ───────────────────────────────────────────────────────────────────────────── +// gts:ignore directive skips the file +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn gts_ignore_directive_skips_file() { + let (_tmp, root) = sandbox(); + let src = concat!( + "// gts:ignore\n", + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"bad-no-tilde\",\n", + " instance_segment = \"x.a.v1.0\"\n", + ")]\n", + "pub const X: &str = \"{}\";\n" + ); + write(&root, "ignored.rs", src); + + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); + + // No instance file should have been produced + assert!(!root.join("instances").exists()); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Missing source path → error +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn missing_source_path_errors() { + let err = run("/nonexistent/path/absolutely/not/here", None, &[]).unwrap_err(); + assert!(err.to_string().contains("does not exist"), "Got: {err}"); +} + +// ───────────────────────────────────────────────────────────────────────────── +// No annotations → succeeds with zero generated (not an error) +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn no_annotations_produces_nothing() { + let (_tmp, root) = sandbox(); + write(&root, "plain.rs", "const FOO: u32 = 42;\n"); + + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); + + assert!(!root.join("instances").exists()); +} + +// ───────────────────────────────────────────────────────────────────────────── +// concat!() value is rejected with actionable message +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn concat_macro_value_is_rejected() { + let (_tmp, root) = sandbox(); + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const FOO: &str = concat!(\"{\", \"}\");\n" + ); + write(&root, "concat.rs", src); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + assert!(err.to_string().contains("concat!()"), "Got: {err}"); +} + +// ───────────────────────────────────────────────────────────────────────────── +// static item is rejected with actionable message +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn static_item_is_rejected() { + let (_tmp, root) = sandbox(); + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub static FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" + ); + write(&root, "static_item.rs", src); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + assert!(err.to_string().contains("static"), "Got: {err}"); +} + +// ───────────────────────────────────────────────────────────────────────────── +// schema_id without trailing ~ is rejected +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn schema_id_without_tilde_is_rejected() { + let (_tmp, root) = sandbox(); + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" + ); + write(&root, "notilde.rs", src); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + assert!(err.to_string().contains("'~'"), "Got: {err}"); +} + +// ───────────────────────────────────────────────────────────────────────────── +// instance_segment with trailing ~ is rejected +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn instance_segment_with_tilde_is_rejected() { + let (_tmp, root) = sandbox(); + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1~\"\n", + ")]\n", + "pub const FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" + ); + write(&root, "segtilde.rs", src); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + assert!( + err.to_string().contains("must not end with '~'"), + "Got: {err}" + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// JSON body must be an object — array is rejected +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn json_array_body_is_rejected() { + let (_tmp, root) = sandbox(); + write( + &root, + "events.rs", + &instance_src("x.commerce._.orders.v1.0", "\"[1,2,3]\""), + ); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + assert!(err.to_string().contains("JSON object"), "Got: {err}"); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Malformed JSON body is rejected +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn malformed_json_body_is_rejected() { + let (_tmp, root) = sandbox(); + write( + &root, + "events.rs", + &instance_src("x.commerce._.orders.v1.0", "\"{not valid json}\""), + ); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + assert!(err.to_string().contains("Malformed JSON"), "Got: {err}"); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Golden fixture: generated file content matches exactly +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn golden_file_content_exact() { + let (_tmp, root) = sandbox(); + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const ORDERS: &str = \"{\\\"name\\\":\\\"orders\\\",\\\"partitions\\\":16}\";\n" + ); + write(&root, "events.rs", src); + + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); + + let id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0"; + let out = inst_path(&root, id); + let val = read_json(&out); + + // Must have id injected + assert_eq!(val["id"], id); + // Must preserve original fields + assert_eq!(val["name"], "orders"); + assert_eq!(val["partitions"], 16); + // Must not have extra unexpected fields (only id, name, partitions) + let obj = val.as_object().unwrap(); + assert_eq!( + obj.len(), + 3, + "Expected exactly 3 fields, got: {:?}", + obj.keys().collect::>() + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Zero-hash raw string r"..." is accepted (Fix: regex r#* not r#+) +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn zero_hash_raw_string_is_accepted() { + let (_tmp, root) = sandbox(); + // r"..." with no hashes — was previously not matched by the annotation regex + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const ZERO_HASH: &str = r\"{\"name\":\"zero\"}\";\n" + ); + write(&root, "zero_hash.rs", src); + + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); + + let id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0"; + let out = inst_path(&root, id); + assert!(out.exists(), "Expected file: {}", out.display()); + let val = read_json(&out); + assert_eq!(val["name"], "zero"); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Char literals near the needle don't cause preflight false-positive +// (Fix: preflight_scan now skips char literals like '#' and '[') +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn char_literal_near_needle_does_not_false_positive() { + let (_tmp, root) = sandbox(); + // File contains '#' and '[' as char literals right before a regular ident, + // but no actual annotation — preflight must return false → quiet skip. + let src = concat!( + "fn check(c: char) -> bool {\n", + " c == '#' || c == '['\n", + "}\n", + "// mentions gts_well_known_instance in a comment only\n", + "const X: u32 = 1;\n" + ); + write(&root, "char_lit.rs", src); + + // Must succeed with no output — not a hard error + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); + assert!(!root.join("instances").exists()); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Unsupported form mentioned only in a comment does NOT hard-error +// (Fix: check_unsupported_forms runs on comment-stripped source) +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn unsupported_form_in_comment_does_not_error() { + let (_tmp, root) = sandbox(); + // The doc comment contains a concat!() example that would have previously + // triggered a hard error from check_unsupported_forms. + let src = concat!( + "/// Example (do NOT use):\n", + "/// #[gts_well_known_instance(\n", + "/// dir_path = \"instances\",\n", + "/// schema_id = \"gts.x.core.events.topic.v1~\",\n", + "/// instance_segment = \"x.a.v1.0\"\n", + "/// )]\n", + "/// pub const BAD: &str = concat!(\"{\", \"}\");\n", + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const REAL: &str = \"{\\\"name\\\":\\\"real\\\"}\";\n" + ); + write(&root, "comment_example.rs", src); + + // Must succeed — the concat!() is only in a doc comment + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); + + let id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0"; + assert!(inst_path(&root, id).exists()); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Annotation applied to a fn (not a const) is a hard error +// (Fix: preflight-positive + no match → hard error, not silent skip) +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn annotation_on_fn_is_hard_error() { + let (_tmp, root) = sandbox(); + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub fn not_a_const() -> &'static str { \"{}\" }\n" + ); + write(&root, "on_fn.rs", src); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("could not be parsed") || msg.contains("const NAME"), + "Got: {msg}" + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Duplicate attribute key in annotation is a hard error +// (Fix: check_duplicate_attr_keys added to parse_instance_attrs) +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn duplicate_attribute_key_is_hard_error() { + let (_tmp, root) = sandbox(); + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " dir_path = \"other\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const DUP: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" + ); + write(&root, "dup_key.rs", src); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("Duplicate attribute") || msg.contains("dir_path"), + "Got: {msg}" + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// ./ prefix in dir_path with same ID → duplicate instance ID error +// (dir_path differs via ./ prefix but composed ID is identical, so +// check_duplicate_ids fires. The path normalisation in +// check_duplicate_output_paths is a defence-in-depth guard for the +// hypothetical future case where filenames could diverge from the ID.) +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn dot_slash_dir_path_same_id_is_duplicate() { + let (_tmp, root) = sandbox(); + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const A: &str = \"{\\\"name\\\":\\\"a\\\"}\";\n", + "#[gts_well_known_instance(\n", + " dir_path = \"./instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const B: &str = \"{\\\"name\\\":\\\"b\\\"}\";\n" + ); + write(&root, "dotslash.rs", src); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("duplicate instance ID") || msg.contains("Duplicate"), + "Got: {msg}" + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Qualified path form #[gts_macros::gts_well_known_instance(...)] is accepted +// (Fix: NEEDLE and regex updated to match optional `gts_macros::` prefix) +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn qualified_path_form_is_accepted() { + let (_tmp, root) = sandbox(); + let src = concat!( + "#[gts_macros::gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"gts.x.core.events.topic.v1~\",\n", + " instance_segment = \"x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const QUALIFIED: &str = r#\"{\"name\":\"qualified\"}\"#;\n" + ); + write(&root, "qualified.rs", src); + + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); + + let id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0"; + let out = inst_path(&root, id); + assert!(out.exists(), "Expected file: {}", out.display()); + let val = read_json(&out); + assert_eq!(val["name"], "qualified"); +} + +// ───────────────────────────────────────────────────────────────────────────── +// compile_fail dir is auto-skipped (auto-ignored dir) +// ───────────────────────────────────────────────────────────────────────────── + +#[test] +fn compile_fail_dir_is_auto_skipped() { + let (_tmp, root) = sandbox(); + let cf_dir = root.join("compile_fail"); + fs::create_dir_all(&cf_dir).unwrap(); + + // Place a malformed annotation in compile_fail/ — should be silently skipped + let src = concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"instances\",\n", + " schema_id = \"bad-no-tilde\",\n", + " instance_segment = \"x.a.v1.0\"\n", + ")]\n", + "pub const X: &str = \"{}\";\n" + ); + write(&cf_dir, "test.rs", src); + + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); +} diff --git a/gts-macros/README.md b/gts-macros/README.md index d3bee97..f8fe72c 100644 --- a/gts-macros/README.md +++ b/gts-macros/README.md @@ -423,6 +423,96 @@ The CLI automatically maps Rust types to JSON Schema types: --- +## `#[gts_well_known_instance]` — Well-Known Instance Declaration + +The `#[gts_well_known_instance]` attribute macro declares a **well-known GTS instance** as a `const` JSON string literal. It provides: + +1. **Compile-time validation** of `schema_id`, `instance_segment`, and the composed instance ID format. +2. **CLI extraction** — the `gts generate-from-rust --mode instances` command scans for these annotations, validates the JSON payload, injects the `"id"` field, and writes the instance file. + +The macro passes the annotated `const` through **unchanged** at compile time. It is purely metadata for the CLI extraction step. + +### Usage + +```rust +#[gts_macros::gts_well_known_instance( + dir_path = "instances", + schema_id = "gts.x.core.events.topic.v1~", + instance_segment = "x.commerce._.orders.v1.0" +)] +pub const ORDERS_TOPIC: &str = r#"{ + "name": "orders", + "description": "Order lifecycle events topic", + "retention": "P90D", + "partitions": 16 +}"#; +``` + +### Parameters + +| Parameter | Required | Description | +|-----------|----------|-------------| +| `dir_path` | Yes | Output directory for the generated instance file (relative to source file or `--output`) | +| `schema_id` | Yes | GTS schema ID this instance conforms to — **must end with `~`** | +| `instance_segment` | Yes | Appended to `schema_id` to form the full instance ID — **must not end with `~`**, must not be a bare `*` | + +The full instance ID is `schema_id + instance_segment`, e.g.: +``` +gts.x.core.events.topic.v1~x.commerce._.orders.v1.0 +``` + +### Generated file + +The CLI writes `{dir_path}/{schema_id}{instance_segment}.instance.json` with the `"id"` field automatically injected: + +```json +{ + "id": "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", + "name": "orders", + "description": "Order lifecycle events topic", + "retention": "P90D", + "partitions": 16 +} +``` + +### CLI command + +```bash +# Generate only instance files +gts generate-from-rust --source src/ --mode instances + +# Generate both schemas and instances +gts generate-from-rust --source src/ --mode all + +# Override output directory +gts generate-from-rust --source src/ --output out/ --mode instances +``` + +### Rules and restrictions + +- The annotated item **must be a `const`** (not `static`) of type `&str`. +- The const value **must be a string literal** — raw strings (`r#"..."#`) or regular strings (`"..."`). `concat!()` and other macro invocations are not supported. +- The JSON body **must be a JSON object** (`{ ... }`). Arrays, scalars, and `null` are not valid. +- The JSON body **must not contain an `"id"` field** — the CLI injects it automatically from `schema_id + instance_segment`. +- Files in `compile_fail/` directories and files with a `// gts:ignore` directive are skipped. +- Items behind `#[cfg(...)]` gates (e.g., `#[cfg(test)]`) are still extracted — extraction is lexical, not conditional. + +### Compile-time validation errors + +| Violation | Error | +|-----------|-------| +| Missing `schema_id` | `Missing required attribute: schema_id` | +| Missing `instance_segment` | `Missing required attribute: instance_segment` | +| Missing `dir_path` | `Missing required attribute: dir_path` | +| `schema_id` without trailing `~` | `schema_id must end with '~' (type marker)` | +| `instance_segment` ending with `~` | `instance_segment must not end with '~'` | +| `instance_segment = "*"` (bare wildcard) | `instance_segment must not be a bare wildcard '*'` | +| Applied to a `static` item | `Only \`const\` items are supported` | +| Applied to a `const` with type other than `&str` | `The annotated const must have type \`&str\`` | +| Const value is `concat!()` or other macro | `The const value must be a string literal` | + +--- + ## Purpose 3: Runtime API The macro generates associated constants, methods, and implements the `GtsSchema` trait for runtime use. diff --git a/gts-macros/src/lib.rs b/gts-macros/src/lib.rs index d6030cb..84ce060 100644 --- a/gts-macros/src/lib.rs +++ b/gts-macros/src/lib.rs @@ -1840,3 +1840,241 @@ pub fn struct_to_gts_schema(attr: TokenStream, item: TokenStream) -> TokenStream TokenStream::from(expanded) } + +/// Arguments for the `#[gts_well_known_instance]` macro. +/// This is a parse-only validation type; fields are validated during `Parse` and not retained. +struct GtsInstanceArgs; + +impl Parse for GtsInstanceArgs { + fn parse(input: ParseStream) -> syn::Result { + let mut dir_path: Option = None; + let mut schema_id: Option = None; + let mut instance_segment: Option = None; + let mut seen_keys: std::collections::HashSet = std::collections::HashSet::new(); + + while !input.is_empty() { + let key: syn::Ident = input.parse()?; + input.parse::()?; + + let key_str = key.to_string(); + if !seen_keys.insert(key_str.clone()) { + return Err(syn::Error::new_spanned( + key, + format!( + "gts_well_known_instance: Duplicate attribute '{key_str}'. Each attribute must appear exactly once." + ), + )); + } + + match key_str.as_str() { + "dir_path" => { + let value: LitStr = input.parse()?; + dir_path = Some(value.value()); + } + "schema_id" => { + let value: LitStr = input.parse()?; + let id = value.value(); + // schema_id must end with ~ (type marker) + if !id.ends_with('~') { + return Err(syn::Error::new_spanned( + value, + format!( + "gts_well_known_instance: schema_id must end with '~' (type marker), got '{id}'" + ), + )); + } + // General GTS ID validation + if let Err(e) = gts_id::validate_gts_id(&id, false) { + let msg = match &e { + gts_id::GtsIdError::Id { cause, .. } => { + format!("Invalid GTS schema ID: {cause}") + } + gts_id::GtsIdError::Segment { num, cause, .. } => { + format!("Segment #{num}: {cause}") + } + }; + return Err(syn::Error::new_spanned( + value, + format!("gts_well_known_instance: {msg}"), + )); + } + schema_id = Some(id); + } + "instance_segment" => { + let value: LitStr = input.parse()?; + let seg = value.value(); + + // Reject instance_segment ending with ~ (type/schema marker) + if seg.ends_with('~') { + return Err(syn::Error::new_spanned( + value, + "gts_well_known_instance: instance_segment must not end with '~' \u{2014} that is a schema/type marker. Instance segments do not end with '~'", + )); + } + + // Reject wildcard-only instance_segment + if seg == "*" { + return Err(syn::Error::new_spanned( + value, + "gts_well_known_instance: instance_segment must not be a bare wildcard '*'. Wildcards are not valid in generated instance IDs", + )); + } + + // Validate the segment using gts_id::validate_segment (no wildcards) + if let Err(cause) = gts_id::validate_segment(2, &seg, false) { + return Err(syn::Error::new_spanned( + value, + format!( + "gts_well_known_instance: Invalid instance_segment '{seg}': {cause}" + ), + )); + } + + instance_segment = Some(seg); + } + _ => { + return Err(syn::Error::new_spanned( + key, + "gts_well_known_instance: Unknown attribute. Expected: dir_path, schema_id, instance_segment", + )); + } + } + + if input.peek(Token![,]) { + input.parse::()?; + } + } + + // Validate composed instance ID after both parts are parsed + let schema_id_val = schema_id.ok_or_else(|| { + input.error("gts_well_known_instance: Missing required attribute: schema_id") + })?; + let instance_segment_val = instance_segment.ok_or_else(|| { + input.error("gts_well_known_instance: Missing required attribute: instance_segment") + })?; + let _dir_path_val = dir_path.ok_or_else(|| { + input.error("gts_well_known_instance: Missing required attribute: dir_path") + })?; + + // Validate the composed instance ID (schema_id + instance_segment forms a valid GTS instance ID) + let composed = format!("{schema_id_val}{instance_segment_val}"); + if let Err(e) = gts_id::validate_gts_id(&composed, false) { + let msg = match &e { + gts_id::GtsIdError::Id { cause, .. } => { + format!("Invalid composed instance ID '{composed}': {cause}") + } + gts_id::GtsIdError::Segment { num, cause, .. } => { + format!("Invalid composed instance ID '{composed}': Segment #{num}: {cause}") + } + }; + return Err(input.error(format!("gts_well_known_instance: {msg}"))); + } + + Ok(GtsInstanceArgs) + } +} + +/// Declare a well-known GTS instance as a const JSON string literal. +/// +/// This macro: +/// 1. **At compile time**: validates the `schema_id` and `instance_segment` GTS ID formats +/// and verifies the annotated item is a `const` of type `&str`. +/// 2. **At generate time**: the CLI (`gts generate-from-rust --mode instances`) scans for +/// these annotations, validates the JSON payload, injects the `"id"` field, and writes +/// `{dir_path}/{schema_id}{instance_segment}.instance.json`. +/// +/// The macro passes the annotated `const` item through unchanged -- it is purely metadata +/// for the CLI extraction step. +/// +/// # Arguments +/// +/// * `dir_path` - Output directory for the generated instance file (relative to crate root or `--output`) +/// * `schema_id` - GTS schema ID this instance conforms to (must end with `~`) +/// * `instance_segment` - Appended to `schema_id` to form the full instance ID (must not end with `~`) +/// +/// # Example +/// +/// ```ignore +/// use gts_macros::gts_well_known_instance; +/// +/// #[gts_well_known_instance( +/// dir_path = "instances", +/// schema_id = "gts.x.core.events.topic.v1~", +/// instance_segment = "x.commerce._.orders.v1.0" +/// )] +/// const ORDERS_TOPIC: &str = r#"{ +/// "name": "orders", +/// "description": "Order lifecycle events topic", +/// "retention": "P90D", +/// "partitions": 16 +/// }"#; +/// ``` +/// +/// The CLI generates: +/// `instances/gts.x.core.events.topic.v1~x.commerce._.orders.v1.0.instance.json` +/// with an injected `"id"` field: `"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0"`. +#[proc_macro_attribute] +pub fn gts_well_known_instance(attr: TokenStream, item: TokenStream) -> TokenStream { + let _args = parse_macro_input!(attr as GtsInstanceArgs); + + // Parse the annotated item -- must be a const item + let item_clone = item.clone(); + let parsed: syn::Result = syn::parse(item_clone); + + match parsed { + Err(_) => { + return syn::Error::new( + proc_macro2::Span::call_site(), + "gts_well_known_instance: Only `const` items are supported. \ + Usage: `const NAME: &str = r#\"{ ... }\"#;`", + ) + .to_compile_error() + .into(); + } + Ok(item_const) => { + // Validate the const has type &str (or &'lifetime str) using AST matching. + // This is more robust than stringifying tokens and avoids false rejections + // from formatting differences (spaces, lifetime names, etc.). + let ty = &item_const.ty; + let is_ref_str = match ty.as_ref() { + syn::Type::Reference(syn::TypeReference { elem, .. }) => { + matches!(elem.as_ref(), syn::Type::Path(p) if p.qself.is_none() && p.path.is_ident("str")) + } + _ => false, + }; + if !is_ref_str { + let ty_str = quote::quote!(#ty).to_string().replace(' ', ""); + return syn::Error::new_spanned( + ty, + format!( + "gts_well_known_instance: The annotated const must have type `&str`, got `{ty_str}`. \ + Usage: `const NAME: &str = r#\"{{ ... }}\"#;`" + ), + ) + .to_compile_error() + .into(); + } + + // Validate the const value is a string literal (not a macro invocation) + match item_const.expr.as_ref() { + syn::Expr::Lit(syn::ExprLit { + lit: syn::Lit::Str(_), + .. + }) => {} + _ => { + return syn::Error::new_spanned( + &item_const.expr, + "gts_well_known_instance: The const value must be a string literal \ + (raw string `r#\"...\"#` or regular string `\"...\"`). \ + Macro invocations like `concat!()` are not supported.", + ) + .to_compile_error() + .into(); + } + } + } + } + + // Pass the item through unchanged -- this macro is purely metadata for the CLI + item +} diff --git a/gts-macros/tests/compile_fail/instance_const_wrong_type.rs b/gts-macros/tests/compile_fail/instance_const_wrong_type.rs new file mode 100644 index 0000000..61b6975 --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_const_wrong_type.rs @@ -0,0 +1,12 @@ +//! Test: gts_well_known_instance applied to a const with wrong type (not &str) + +use gts_macros::gts_well_known_instance; + +#[gts_well_known_instance( + dir_path = "instances", + schema_id = "gts.x.core.events.topic.v1~", + instance_segment = "x.commerce._.orders.v1.0" +)] +const ORDERS_TOPIC: u32 = 42; + +fn main() {} diff --git a/gts-macros/tests/compile_fail/instance_const_wrong_type.stderr b/gts-macros/tests/compile_fail/instance_const_wrong_type.stderr new file mode 100644 index 0000000..11a83ef --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_const_wrong_type.stderr @@ -0,0 +1,5 @@ +error: gts_well_known_instance: The annotated const must have type `&str`, got `u32`. Usage: `const NAME: &str = r#"{ ... }"#;` + --> tests/compile_fail/instance_const_wrong_type.rs:10:21 + | +10 | const ORDERS_TOPIC: u32 = 42; + | ^^^ diff --git a/gts-macros/tests/compile_fail/instance_missing_dir_path.rs b/gts-macros/tests/compile_fail/instance_missing_dir_path.rs new file mode 100644 index 0000000..28842a1 --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_missing_dir_path.rs @@ -0,0 +1,11 @@ +//! Test: Missing required attribute dir_path in gts_well_known_instance + +use gts_macros::gts_well_known_instance; + +#[gts_well_known_instance( + schema_id = "gts.x.core.events.topic.v1~", + instance_segment = "x.commerce._.orders.v1.0" +)] +const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; + +fn main() {} diff --git a/gts-macros/tests/compile_fail/instance_missing_dir_path.stderr b/gts-macros/tests/compile_fail/instance_missing_dir_path.stderr new file mode 100644 index 0000000..9abf8a3 --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_missing_dir_path.stderr @@ -0,0 +1,10 @@ +error: unexpected end of input, gts_well_known_instance: Missing required attribute: dir_path + --> tests/compile_fail/instance_missing_dir_path.rs:5:1 + | +5 | / #[gts_well_known_instance( +6 | | schema_id = "gts.x.core.events.topic.v1~", +7 | | instance_segment = "x.commerce._.orders.v1.0" +8 | | )] + | |__^ + | + = note: this error originates in the attribute macro `gts_well_known_instance` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/gts-macros/tests/compile_fail/instance_missing_instance_segment.rs b/gts-macros/tests/compile_fail/instance_missing_instance_segment.rs new file mode 100644 index 0000000..66eb196 --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_missing_instance_segment.rs @@ -0,0 +1,11 @@ +//! Test: Missing required attribute instance_segment in gts_well_known_instance + +use gts_macros::gts_well_known_instance; + +#[gts_well_known_instance( + dir_path = "instances", + schema_id = "gts.x.core.events.topic.v1~" +)] +const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; + +fn main() {} diff --git a/gts-macros/tests/compile_fail/instance_missing_instance_segment.stderr b/gts-macros/tests/compile_fail/instance_missing_instance_segment.stderr new file mode 100644 index 0000000..fb14308 --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_missing_instance_segment.stderr @@ -0,0 +1,10 @@ +error: unexpected end of input, gts_well_known_instance: Missing required attribute: instance_segment + --> tests/compile_fail/instance_missing_instance_segment.rs:5:1 + | +5 | / #[gts_well_known_instance( +6 | | dir_path = "instances", +7 | | schema_id = "gts.x.core.events.topic.v1~" +8 | | )] + | |__^ + | + = note: this error originates in the attribute macro `gts_well_known_instance` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/gts-macros/tests/compile_fail/instance_missing_schema_id.rs b/gts-macros/tests/compile_fail/instance_missing_schema_id.rs new file mode 100644 index 0000000..77baaef --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_missing_schema_id.rs @@ -0,0 +1,11 @@ +//! Test: Missing required attribute schema_id in gts_well_known_instance + +use gts_macros::gts_well_known_instance; + +#[gts_well_known_instance( + dir_path = "instances", + instance_segment = "x.commerce._.orders.v1.0" +)] +const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; + +fn main() {} diff --git a/gts-macros/tests/compile_fail/instance_missing_schema_id.stderr b/gts-macros/tests/compile_fail/instance_missing_schema_id.stderr new file mode 100644 index 0000000..bfbe1c5 --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_missing_schema_id.stderr @@ -0,0 +1,10 @@ +error: unexpected end of input, gts_well_known_instance: Missing required attribute: schema_id + --> tests/compile_fail/instance_missing_schema_id.rs:5:1 + | +5 | / #[gts_well_known_instance( +6 | | dir_path = "instances", +7 | | instance_segment = "x.commerce._.orders.v1.0" +8 | | )] + | |__^ + | + = note: this error originates in the attribute macro `gts_well_known_instance` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/gts-macros/tests/compile_fail/instance_on_non_const.rs b/gts-macros/tests/compile_fail/instance_on_non_const.rs new file mode 100644 index 0000000..99ada20 --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_on_non_const.rs @@ -0,0 +1,12 @@ +//! Test: gts_well_known_instance applied to a non-const item (static) + +use gts_macros::gts_well_known_instance; + +#[gts_well_known_instance( + dir_path = "instances", + schema_id = "gts.x.core.events.topic.v1~", + instance_segment = "x.commerce._.orders.v1.0" +)] +static ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; + +fn main() {} diff --git a/gts-macros/tests/compile_fail/instance_on_non_const.stderr b/gts-macros/tests/compile_fail/instance_on_non_const.stderr new file mode 100644 index 0000000..cbd27c2 --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_on_non_const.stderr @@ -0,0 +1,11 @@ +error: gts_well_known_instance: Only `const` items are supported. Usage: `const NAME: &str = r#"{ ... }"#;` + --> tests/compile_fail/instance_on_non_const.rs:5:1 + | +5 | / #[gts_well_known_instance( +6 | | dir_path = "instances", +7 | | schema_id = "gts.x.core.events.topic.v1~", +8 | | instance_segment = "x.commerce._.orders.v1.0" +9 | | )] + | |__^ + | + = note: this error originates in the attribute macro `gts_well_known_instance` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.rs b/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.rs new file mode 100644 index 0000000..0a9f6de --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.rs @@ -0,0 +1,12 @@ +//! Test: schema_id does not end with ~ in gts_well_known_instance + +use gts_macros::gts_well_known_instance; + +#[gts_well_known_instance( + dir_path = "instances", + schema_id = "gts.x.core.events.topic.v1", + instance_segment = "x.commerce._.orders.v1.0" +)] +const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; + +fn main() {} diff --git a/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.stderr b/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.stderr new file mode 100644 index 0000000..cd8f55c --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.stderr @@ -0,0 +1,5 @@ +error: gts_well_known_instance: schema_id must end with '~' (type marker), got 'gts.x.core.events.topic.v1' + --> tests/compile_fail/instance_schema_id_no_tilde.rs:7:17 + | +7 | schema_id = "gts.x.core.events.topic.v1", + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.rs b/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.rs new file mode 100644 index 0000000..81cce7a --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.rs @@ -0,0 +1,12 @@ +//! Test: instance_segment is a bare wildcard * in gts_well_known_instance + +use gts_macros::gts_well_known_instance; + +#[gts_well_known_instance( + dir_path = "instances", + schema_id = "gts.x.core.events.topic.v1~", + instance_segment = "*" +)] +const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; + +fn main() {} diff --git a/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.stderr b/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.stderr new file mode 100644 index 0000000..accbe3b --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.stderr @@ -0,0 +1,5 @@ +error: gts_well_known_instance: instance_segment must not be a bare wildcard '*'. Wildcards are not valid in generated instance IDs + --> tests/compile_fail/instance_segment_bare_wildcard.rs:8:24 + | +8 | instance_segment = "*" + | ^^^ diff --git a/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.rs b/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.rs new file mode 100644 index 0000000..f2728a9 --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.rs @@ -0,0 +1,12 @@ +//! Test: instance_segment ends with ~ in gts_well_known_instance + +use gts_macros::gts_well_known_instance; + +#[gts_well_known_instance( + dir_path = "instances", + schema_id = "gts.x.core.events.topic.v1~", + instance_segment = "x.commerce._.orders.v1~" +)] +const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; + +fn main() {} diff --git a/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.stderr b/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.stderr new file mode 100644 index 0000000..c772728 --- /dev/null +++ b/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.stderr @@ -0,0 +1,5 @@ +error: gts_well_known_instance: instance_segment must not end with '~' — that is a schema/type marker. Instance segments do not end with '~' + --> tests/compile_fail/instance_segment_ends_with_tilde.rs:8:24 + | +8 | instance_segment = "x.commerce._.orders.v1~" + | ^^^^^^^^^^^^^^^^^^^^^^^^^ From 6978365e844e19006242c41684b882203d166e15 Mon Sep 17 00:00:00 2001 From: devjow Date: Sat, 28 Feb 2026 12:00:13 +0000 Subject: [PATCH 02/10] feat: gts builder file paths Signed-off-by: devjow --- gts-cli/src/gen_instances/mod.rs | 3 ++- gts-cli/src/gen_instances/writer.rs | 3 ++- gts-cli/tests/gen_instances_tests.rs | 9 ++++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/gts-cli/src/gen_instances/mod.rs b/gts-cli/src/gen_instances/mod.rs index ee15453..8da16cb 100644 --- a/gts-cli/src/gen_instances/mod.rs +++ b/gts-cli/src/gen_instances/mod.rs @@ -121,7 +121,8 @@ fn check_duplicate_output_paths( for inst in instances { let composed = format!("{}{}", inst.attrs.schema_id, inst.attrs.instance_segment); - let file_rel = format!("{}/{}.instance.json", inst.attrs.dir_path, composed); + let file_rel = + std::path::Path::new(&inst.attrs.dir_path).join(format!("{composed}.instance.json")); let raw_path = if let Some(od) = output { Path::new(od).join(&file_rel) } else { diff --git a/gts-cli/src/gen_instances/writer.rs b/gts-cli/src/gen_instances/writer.rs index 6cbd3ac..0b00ba6 100644 --- a/gts-cli/src/gen_instances/writer.rs +++ b/gts-cli/src/gen_instances/writer.rs @@ -24,7 +24,8 @@ pub fn generate_single_instance( sandbox_root: &Path, ) -> Result { let composed = format!("{}{}", inst.attrs.schema_id, inst.attrs.instance_segment); - let file_rel = format!("{}/{}.instance.json", inst.attrs.dir_path, composed); + let file_rel = + std::path::Path::new(&inst.attrs.dir_path).join(format!("{composed}.instance.json")); let raw_output_path = if let Some(od) = output { Path::new(od).join(&file_rel) diff --git a/gts-cli/tests/gen_instances_tests.rs b/gts-cli/tests/gen_instances_tests.rs index a6a0851..f72b836 100644 --- a/gts-cli/tests/gen_instances_tests.rs +++ b/gts-cli/tests/gen_instances_tests.rs @@ -372,7 +372,14 @@ fn gts_ignore_directive_skips_file() { #[test] fn missing_source_path_errors() { - let err = run("/nonexistent/path/absolutely/not/here", None, &[]).unwrap_err(); + // Use a path guaranteed not to exist on any platform by constructing it + // inside a TempDir that is immediately dropped (and thus deleted). + let nonexistent = { + let tmp = TempDir::new().unwrap(); + tmp.path().join("no_such_subdir_xyz") + // tmp is dropped here — the parent dir is deleted + }; + let err = run(nonexistent.to_str().unwrap(), None, &[]).unwrap_err(); assert!(err.to_string().contains("does not exist"), "Got: {err}"); } From a68f48f3d803ccb4a5ec274bc1a87be7bd7abe90 Mon Sep 17 00:00:00 2001 From: devjow Date: Sat, 28 Feb 2026 12:03:38 +0000 Subject: [PATCH 03/10] fix: remove bogus etc existence check in sandbox_escape_rejected test Signed-off-by: devjow --- gts-cli/src/gen_common.rs | 17 +++++++++-------- gts-cli/tests/gen_instances_tests.rs | 2 -- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/gts-cli/src/gen_common.rs b/gts-cli/src/gen_common.rs index bb5e7fb..594d2ef 100644 --- a/gts-cli/src/gen_common.rs +++ b/gts-cli/src/gen_common.rs @@ -41,6 +41,7 @@ pub fn should_exclude_path(path: &Path, patterns: &[String]) -> bool { /// Supports: * (any characters), ** (any path segments) #[must_use] pub fn matches_glob_pattern(path: &str, pattern: &str) -> bool { + let normalized = path.replace('\\', "/"); let regex_pattern = pattern .replace('.', r"\.") .replace("**", "<>") @@ -48,9 +49,9 @@ pub fn matches_glob_pattern(path: &str, pattern: &str) -> bool { .replace("<>", ".*"); if let Ok(re) = Regex::new(&format!("(^|/){regex_pattern}($|/)")) { - re.is_match(path) + re.is_match(&normalized) } else { - path.contains(pattern) + normalized.contains(pattern) } } @@ -351,12 +352,12 @@ mod tests { #[test] fn test_safe_canonicalize_nonexistent_traversal_rejected() { - let _path = Path::new("/tmp/../etc/passwd"); - // This path has .. in it and /tmp exists, but the .. is in the existing ancestor chain - // Safe canonicalize should resolve it via the existing /tmp parent - // The important test is a suffix with .. - let nonexistent = Path::new("/tmp/gts_test_nonexistent_12345/../escape"); - let result = safe_canonicalize_nonexistent(nonexistent); + // Build a path with .. in the non-existent suffix via a real temp dir. + // TempDir exists on all platforms so canonicalize of the parent succeeds, + // but the sub-path `nonexistent/../escape` contains `..` and must be rejected. + let tmp = tempfile::TempDir::new().unwrap(); + let nonexistent = tmp.path().join("nonexistent").join("..").join("escape"); + let result = safe_canonicalize_nonexistent(&nonexistent); assert!(result.is_err(), "Should reject '..' in non-existent suffix"); } } diff --git a/gts-cli/tests/gen_instances_tests.rs b/gts-cli/tests/gen_instances_tests.rs index f72b836..3ce621b 100644 --- a/gts-cli/tests/gen_instances_tests.rs +++ b/gts-cli/tests/gen_instances_tests.rs @@ -311,8 +311,6 @@ fn sandbox_escape_rejected() { msg.contains("Security error") || msg.contains("sandbox") || msg.contains("'..'"), "Got: {msg}" ); - // No directory should have been created outside sandbox - assert!(!root.join("../../etc").exists()); } // ───────────────────────────────────────────────────────────────────────────── From 934a0c59b64a4711ac764587c5f335716efcc677 Mon Sep 17 00:00:00 2001 From: devjow Date: Sat, 28 Feb 2026 14:50:23 +0000 Subject: [PATCH 04/10] fix: escape regex metacharacters in glob patterns, guard against infinite loop in raw-string blanking, and fix zero-hash raw-string test fixture Signed-off-by: devjow --- gts-cli/src/gen_common.rs | 17 +++++++++++++---- gts-cli/src/gen_instances/attrs.rs | 20 +++++++++++++------- gts-cli/tests/gen_instances_tests.rs | 2 +- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/gts-cli/src/gen_common.rs b/gts-cli/src/gen_common.rs index 594d2ef..7e901bb 100644 --- a/gts-cli/src/gen_common.rs +++ b/gts-cli/src/gen_common.rs @@ -42,10 +42,9 @@ pub fn should_exclude_path(path: &Path, patterns: &[String]) -> bool { #[must_use] pub fn matches_glob_pattern(path: &str, pattern: &str) -> bool { let normalized = path.replace('\\', "/"); - let regex_pattern = pattern - .replace('.', r"\.") - .replace("**", "<>") - .replace('*', "[^/]*") + let regex_pattern = regex::escape(pattern) + .replace(r"\*\*", "<>") + .replace(r"\*", "[^/]*") .replace("<>", ".*"); if let Ok(re) = Regex::new(&format!("(^|/){regex_pattern}($|/)")) { @@ -315,6 +314,16 @@ mod tests { assert!(matches_glob_pattern("a/b/c/d/test.rs", "**/test.rs")); } + #[test] + fn test_matches_glob_pattern_literal_metacharacters() { + // '+' is a regex quantifier but must be treated as a literal in glob patterns. + assert!(matches_glob_pattern("src/foo+bar.rs", "foo+bar.rs")); + assert!(!matches_glob_pattern("src/fooXbar.rs", "foo+bar.rs")); + // '[' and ']' are regex character-class delimiters but must be literal. + assert!(matches_glob_pattern("src/foo[0].rs", "foo[0].rs")); + assert!(!matches_glob_pattern("src/foo0.rs", "foo[0].rs")); + } + #[test] fn test_is_in_auto_ignored_dir() { assert!(is_in_auto_ignored_dir(Path::new( diff --git a/gts-cli/src/gen_instances/attrs.rs b/gts-cli/src/gen_instances/attrs.rs index 7b19b87..5d9acc2 100644 --- a/gts-cli/src/gen_instances/attrs.rs +++ b/gts-cli/src/gen_instances/attrs.rs @@ -127,6 +127,7 @@ fn blank_string_literals(s: &str) -> String { // Found r", now scan for closing "# let content_start = hash_end + 1; let mut scan = content_start; + let mut close_span: Option<(usize, usize)> = None; 'raw: while scan < len { if bytes[scan] == b'"' { // Check for the required number of closing hashes @@ -137,18 +138,23 @@ fn blank_string_literals(s: &str) -> String { close += 1; } if count == hashes { - // Blank the content between opening and closing delimiters - for byte in &mut out[content_start..scan] { - if byte.is_ascii() { - *byte = b' '; - } - } - pos = close; + close_span = Some((scan, close)); break 'raw; } } scan += 1; } + if let Some((content_end, close)) = close_span { + for byte in &mut out[content_start..content_end] { + if byte.is_ascii() { + *byte = b' '; + } + } + pos = close; + } else { + // Unterminated raw string: advance to avoid infinite loop. + pos += 1; + } continue; } // Not a raw string — fall through to normal char handling diff --git a/gts-cli/tests/gen_instances_tests.rs b/gts-cli/tests/gen_instances_tests.rs index 3ce621b..77f3b99 100644 --- a/gts-cli/tests/gen_instances_tests.rs +++ b/gts-cli/tests/gen_instances_tests.rs @@ -568,7 +568,7 @@ fn zero_hash_raw_string_is_accepted() { " schema_id = \"gts.x.core.events.topic.v1~\",\n", " instance_segment = \"x.commerce._.orders.v1.0\"\n", ")]\n", - "pub const ZERO_HASH: &str = r\"{\"name\":\"zero\"}\";\n" + "pub const ZERO_HASH: &str = r#\"{\"name\":\"zero\"}\"#;\n" ); write(&root, "zero_hash.rs", src); From 72ba9c634173b66579be34ca178e2ca84d90191c Mon Sep 17 00:00:00 2001 From: devjow Date: Tue, 3 Mar 2026 08:10:21 +0000 Subject: [PATCH 05/10] fix: normalize absolute path Signed-off-by: devjow --- gts-cli/src/gen_common.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/gts-cli/src/gen_common.rs b/gts-cli/src/gen_common.rs index 7e901bb..c157d99 100644 --- a/gts-cli/src/gen_common.rs +++ b/gts-cli/src/gen_common.rs @@ -221,6 +221,16 @@ pub fn safe_canonicalize_nonexistent(path: &Path) -> Result { } } + // Normalize to an absolute path so that canonical_ancestor is always absolute. + // This ensures starts_with(sandbox_root) comparisons work when the caller + // passes a relative output directory. + let path = if path.is_absolute() { + path.to_path_buf() + } else { + std::env::current_dir()?.join(path) + }; + let path = path.as_path(); + if path.exists() { return Ok(path.canonicalize()?); } From 253c80d8b77ea04a6a25a28688ef1c5a9168d8ab Mon Sep 17 00:00:00 2001 From: devjow Date: Tue, 3 Mar 2026 08:28:08 +0000 Subject: [PATCH 06/10] docs: well-known instance generation Signed-off-by: devjow --- gts-macros/README.md | 50 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/gts-macros/README.md b/gts-macros/README.md index f8fe72c..280ad18 100644 --- a/gts-macros/README.md +++ b/gts-macros/README.md @@ -448,6 +448,56 @@ pub const ORDERS_TOPIC: &str = r#"{ }"#; ``` +### Quick Start Guide + +**Step 1 — Declare the instance in a Rust source file:** + +```rust +// src/gts/mod.rs +#[gts_macros::gts_well_known_instance( + dir_path = "instances", + schema_id = "gts.x.core.events.topic.v1~", + instance_segment = "x.commerce._.orders.v1.0" +)] +pub const ORDERS_TOPIC: &str = r#"{ + "name": "orders", + "description": "Order lifecycle events topic", + "retention": "P90D", + "partitions": 16 +}"#; +``` + +**Step 2 — Run the CLI to generate the `.instance.json` file:** + +```bash +gts generate-from-rust --source src/ --mode instances +``` + +This produces `instances/gts.x.core.events.topic.v1~x.commerce._.orders.v1.0.instance.json` with the `"id"` field injected automatically. + +**Step 3 — Use the instance:** + +```rust +// Reference the const directly (it's just a &str containing JSON) +let topic: serde_json::Value = serde_json::from_str(ORDERS_TOPIC)?; + +// The full instance ID is schema_id + instance_segment +let instance_id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0"; + +// Register or look up via the types-registry +let entity = registry.get(instance_id).await?; +``` + +The generated `.instance.json` file can also be loaded by the types-registry at bootstrap as seed data, or validated against its parent schema. + +### When to use this vs runtime registration + +| Use `#[gts_well_known_instance]` | Use `gts_make_instance_id()` + `register()` | +|---|---| +| Instance payload is **fully known at compile time** | Payload depends on **runtime config** (vendor, priority, etc.) | +| Seed data, built-in defaults, test fixtures | Deployment-specific plugin registration | +| Produces a static `.instance.json` file | Registers in-memory at module `init()` | + ### Parameters | Parameter | Required | Description | From 0cfa147c7c64255f11d1d092857db3c9d8c3b76e Mon Sep 17 00:00:00 2001 From: devjow Date: Mon, 9 Mar 2026 15:05:09 +0000 Subject: [PATCH 07/10] refactor(gts-macros): consolidate schema_id + instance_segment into single id attribute Signed-off-by: devjow --- gts-cli/src/gen_instances/attrs.rs | 117 ++++++++---------- gts-cli/src/gen_instances/mod.rs | 38 +++--- gts-cli/src/gen_instances/parser.rs | 29 ++--- gts-cli/src/gen_instances/writer.rs | 34 +++-- gts-cli/tests/gen_instances_tests.rs | 110 ++++++++-------- gts-macros/README.md | 30 +++-- gts-macros/src/lib.rs | 112 ++++++++--------- .../compile_fail/instance_const_wrong_type.rs | 3 +- .../instance_const_wrong_type.stderr | 8 +- .../compile_fail/instance_missing_dir_path.rs | 3 +- .../instance_missing_dir_path.stderr | 5 +- .../instance_missing_instance_segment.rs | 4 +- .../instance_missing_instance_segment.stderr | 13 +- .../instance_missing_schema_id.rs | 5 +- .../instance_missing_schema_id.stderr | 7 +- .../compile_fail/instance_on_non_const.rs | 3 +- .../compile_fail/instance_on_non_const.stderr | 5 +- .../instance_schema_id_no_tilde.rs | 5 +- .../instance_schema_id_no_tilde.stderr | 8 +- .../instance_segment_bare_wildcard.rs | 5 +- .../instance_segment_bare_wildcard.stderr | 8 +- .../instance_segment_ends_with_tilde.rs | 5 +- .../instance_segment_ends_with_tilde.stderr | 8 +- 23 files changed, 253 insertions(+), 312 deletions(-) diff --git a/gts-cli/src/gen_instances/attrs.rs b/gts-cli/src/gen_instances/attrs.rs index 5d9acc2..4a7d1fa 100644 --- a/gts-cli/src/gen_instances/attrs.rs +++ b/gts-cli/src/gen_instances/attrs.rs @@ -6,17 +6,22 @@ use std::collections::HashSet; #[derive(Debug, Clone)] pub struct InstanceAttrs { pub dir_path: String, + pub id: String, + /// Schema portion of `id` (up to and including `~`). Derived, used in tests/debug. + #[allow(dead_code)] pub schema_id: String, + /// Instance segment of `id` (after `~`). Derived, used in tests/debug. + #[allow(dead_code)] pub instance_segment: String, } /// Parse and validate instance annotation attribute body. /// /// # Errors -/// - Any required attribute (`dir_path`, `schema_id`, `instance_segment`) is missing -/// - `schema_id` does not end with `~` -/// - `instance_segment` ends with `~` or is a bare wildcard `*` -/// - The composed `schema_id + instance_segment` fails GTS ID validation +/// - Any required attribute (`dir_path`, `id`) is missing +/// - `id` does not contain `~` +/// - `id` ends with `~` (that is a schema/type, not an instance) +/// - The `id` fails GTS ID validation pub fn parse_instance_attrs( attr_body: &str, source_file: &str, @@ -28,48 +33,45 @@ pub fn parse_instance_attrs( anyhow::anyhow!("{source_file}:{line}: Missing required attribute 'dir_path' in #[gts_well_known_instance]") })?; - let schema_id = extract_str_attr(attr_body, "schema_id").ok_or_else(|| { - anyhow::anyhow!("{source_file}:{line}: Missing required attribute 'schema_id' in #[gts_well_known_instance]") + let id = extract_str_attr(attr_body, "id").ok_or_else(|| { + anyhow::anyhow!( + "{source_file}:{line}: Missing required attribute 'id' in #[gts_well_known_instance]" + ) })?; - let instance_segment = extract_str_attr(attr_body, "instance_segment").ok_or_else(|| { - anyhow::anyhow!("{source_file}:{line}: Missing required attribute 'instance_segment' in #[gts_well_known_instance]") + // Instance ID must contain ~ (separating schema from instance segment) + let tilde_pos = id.find('~').ok_or_else(|| { + anyhow::anyhow!( + "{source_file}:{line}: id '{id}' must contain '~' separating schema from instance segment" + ) })?; - if !schema_id.ends_with('~') { + // Instance ID must NOT end with ~ (that would be a schema/type, not an instance) + if id.ends_with('~') { bail!( - "{source_file}:{line}: schema_id '{schema_id}' must end with '~' (type marker). \ - Instance IDs are composed as schema_id + instance_segment." + "{source_file}:{line}: id '{id}' must not end with '~' \ + (that is a schema/type ID, not an instance ID)" ); } - if instance_segment.ends_with('~') { - bail!( - "{source_file}:{line}: instance_segment '{instance_segment}' must not end with '~' -- \ - that is a schema/type marker, not valid in an instance segment." - ); - } - - if instance_segment == "*" { - bail!( - "{source_file}:{line}: instance_segment must not be a bare wildcard '*'. \ - Wildcards are not valid in generated instance IDs." - ); - } + // Split into schema portion and instance segment + let schema_id = id[..=tilde_pos].to_string(); + let instance_segment = id[tilde_pos + 1..].to_string(); - let composed = format!("{schema_id}{instance_segment}"); - if let Err(e) = gts_id::validate_gts_id(&composed, false) { + // Validate the full ID + if let Err(e) = gts_id::validate_gts_id(&id, false) { let msg = match &e { gts_id::GtsIdError::Id { cause, .. } => cause.clone(), gts_id::GtsIdError::Segment { num, cause, .. } => { format!("segment #{num}: {cause}") } }; - bail!("{source_file}:{line}: Invalid composed instance ID '{composed}': {msg}"); + bail!("{source_file}:{line}: Invalid instance ID '{id}': {msg}"); } Ok(InstanceAttrs { dir_path, + id, schema_id, instance_segment, }) @@ -85,10 +87,7 @@ fn check_duplicate_attr_keys(attr_body: &str, source_file: &str, line: usize) -> let Some(re) = key_re else { return Ok(()); }; - let known: HashSet<&str> = ["dir_path", "schema_id", "instance_segment"] - .iter() - .copied() - .collect(); + let known: HashSet<&str> = ["dir_path", "id"].iter().copied().collect(); // Blank out string literal content so `key =` inside a value can't match. let stripped = blank_string_literals(attr_body); let mut seen: HashSet = HashSet::new(); @@ -205,58 +204,49 @@ mod tests { #[test] fn test_parse_valid_attrs() { - let body = r#"dir_path = "instances", schema_id = "gts.x.core.events.topic.v1~", instance_segment = "x.commerce._.orders.v1.0""#; + let body = + r#"dir_path = "instances", id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0""#; let attrs = parse_instance_attrs(body, "test.rs", 1).unwrap(); assert_eq!(attrs.dir_path, "instances"); + assert_eq!( + attrs.id, + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" + ); assert_eq!(attrs.schema_id, "gts.x.core.events.topic.v1~"); assert_eq!(attrs.instance_segment, "x.commerce._.orders.v1.0"); } #[test] fn test_missing_dir_path() { - let body = r#"schema_id = "gts.x.foo.v1~", instance_segment = "x.bar.v1.0""#; + let body = r#"id = "gts.x.foo.v1~x.bar.v1.0""#; let err = parse_instance_attrs(body, "test.rs", 5).unwrap_err(); assert!(err.to_string().contains("dir_path")); } #[test] - fn test_missing_schema_id() { - let body = r#"dir_path = "instances", instance_segment = "x.bar.v1.0""#; + fn test_missing_id() { + let body = r#"dir_path = "instances""#; let err = parse_instance_attrs(body, "test.rs", 5).unwrap_err(); - assert!(err.to_string().contains("schema_id")); + assert!(err.to_string().contains("id")); } #[test] - fn test_missing_instance_segment() { - let body = r#"dir_path = "instances", schema_id = "gts.x.foo.v1~""#; - let err = parse_instance_attrs(body, "test.rs", 5).unwrap_err(); - assert!(err.to_string().contains("instance_segment")); - } - - #[test] - fn test_schema_id_missing_tilde() { - let body = r#"dir_path = "instances", schema_id = "gts.x.foo.v1", instance_segment = "x.bar.v1.0""#; + fn test_id_missing_tilde() { + let body = r#"dir_path = "instances", id = "gts.x.foo.v1.x.bar.v1.0""#; let err = parse_instance_attrs(body, "test.rs", 1).unwrap_err(); - assert!(err.to_string().contains("must end with '~'")); + assert!(err.to_string().contains("'~'")); } #[test] - fn test_instance_segment_with_tilde() { - let body = r#"dir_path = "instances", schema_id = "gts.x.foo.v1~", instance_segment = "x.bar.v1~""#; + fn test_id_ends_with_tilde() { + let body = r#"dir_path = "instances", id = "gts.x.foo.v1~""#; let err = parse_instance_attrs(body, "test.rs", 1).unwrap_err(); assert!(err.to_string().contains("must not end with '~'")); } - #[test] - fn test_instance_segment_bare_wildcard() { - let body = r#"dir_path = "instances", schema_id = "gts.x.foo.v1~", instance_segment = "*""#; - let err = parse_instance_attrs(body, "test.rs", 1).unwrap_err(); - assert!(err.to_string().contains("wildcard")); - } - #[test] fn test_error_contains_file_and_line() { - let body = r#"schema_id = "gts.x.foo.v1~", instance_segment = "x.bar.v1.0""#; + let body = r#"id = "gts.x.foo.v1~x.bar.v1.0""#; let err = parse_instance_attrs(body, "src/events.rs", 42).unwrap_err(); let msg = err.to_string(); assert!(msg.contains("src/events.rs")); @@ -265,10 +255,11 @@ mod tests { #[test] fn test_key_in_string_value_not_false_duplicate() { - // dir_path value contains "schema_id = x" — must not trigger a false duplicate. - let body = r#"dir_path = "schema_id = x", schema_id = "gts.x.core.events.topic.v1~", instance_segment = "x.commerce._.orders.v1.0""#; + // dir_path value contains "id = x" — must not trigger a false duplicate. + let body = + r#"dir_path = "id = x", id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0""#; let attrs = parse_instance_attrs(body, "test.rs", 1).unwrap(); - assert_eq!(attrs.dir_path, "schema_id = x"); + assert_eq!(attrs.dir_path, "id = x"); } #[test] @@ -276,19 +267,19 @@ mod tests { // Raw string content containing key= must be blanked so duplicate detection // can't see it. Attribute values always use regular "..." in practice, but // blank_string_literals is defensive. - // Input: r#"schema_id = x"# rest - let s = "r#\"schema_id = x\"# rest"; + // Input: r#"id = x"# rest + let s = "r#\"id = x\"# rest"; let blanked = blank_string_literals(s); // The content between r#" and "# must be spaces; the surrounding tokens intact. assert!( - !blanked.contains("schema_id"), + !blanked.contains("id = x"), "raw string content should be blanked, got: {blanked:?}" ); } #[test] fn test_real_duplicate_key_is_rejected() { - let body = r#"dir_path = "instances", dir_path = "other", schema_id = "gts.x.core.events.topic.v1~", instance_segment = "x.commerce._.orders.v1.0""#; + let body = r#"dir_path = "instances", dir_path = "other", id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0""#; let err = parse_instance_attrs(body, "test.rs", 1).unwrap_err(); assert!(err.to_string().contains("Duplicate attribute")); } diff --git a/gts-cli/src/gen_instances/mod.rs b/gts-cli/src/gen_instances/mod.rs index 8da16cb..2ed60d1 100644 --- a/gts-cli/src/gen_instances/mod.rs +++ b/gts-cli/src/gen_instances/mod.rs @@ -86,14 +86,16 @@ fn check_duplicate_ids(instances: &[ParsedInstance]) -> Result<()> { let mut errors: Vec = Vec::new(); for inst in instances { - let composed = format!("{}{}", inst.attrs.schema_id, inst.attrs.instance_segment); - if let Some(prev) = id_seen.get(composed.as_str()) { + if let Some(prev) = id_seen.get(inst.attrs.id.as_str()) { errors.push(format!( - "Duplicate instance ID '{composed}':\n first: {}\n second: {}:{}", - prev, inst.source_file, inst.line + "Duplicate instance ID '{}':\n first: {}\n second: {}:{}", + inst.attrs.id, prev, inst.source_file, inst.line )); } else { - id_seen.insert(composed, format!("{}:{}", inst.source_file, inst.line)); + id_seen.insert( + inst.attrs.id.clone(), + format!("{}:{}", inst.source_file, inst.line), + ); } } @@ -120,9 +122,8 @@ fn check_duplicate_output_paths( let mut errors: Vec = Vec::new(); for inst in instances { - let composed = format!("{}{}", inst.attrs.schema_id, inst.attrs.instance_segment); - let file_rel = - std::path::Path::new(&inst.attrs.dir_path).join(format!("{composed}.instance.json")); + let file_rel = std::path::Path::new(&inst.attrs.dir_path) + .join(format!("{}.instance.json", inst.attrs.id)); let raw_path = if let Some(od) = output { Path::new(od).join(&file_rel) } else { @@ -172,8 +173,7 @@ fn emit_instances( for inst in instances { let file_path = generate_single_instance(inst, output, sandbox_root) .map_err(|e| anyhow::anyhow!("{}: {}", inst.source_file, e))?; - let composed = format!("{}{}", inst.attrs.schema_id, inst.attrs.instance_segment); - println!(" Generated instance: {composed} @ {file_path}"); + println!(" Generated instance: {} @ {file_path}", inst.attrs.id); count += 1; } Ok(count) @@ -199,17 +199,16 @@ mod tests { fs::write(dir.join(name), content).unwrap(); } - fn valid_src(instance_segment: &str, json_body: &str) -> String { + fn valid_src(id: &str, json_body: &str) -> String { format!( concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"{}\"\n", + " id = \"{}\"\n", ")]\n", "pub const FOO: &str = {};\n" ), - instance_segment, json_body + id, json_body ) } @@ -222,7 +221,7 @@ mod tests { &root, "module.rs", &valid_src( - "x.commerce._.orders.v1.0", + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", r#""{\"name\": \"orders\", \"partitions\": 16}""#, ), ); @@ -261,14 +260,12 @@ mod tests { let dup_src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "const A: &str = \"{\\\"name\\\": \\\"a\\\"}\";\n", "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "const B: &str = \"{\\\"name\\\": \\\"b\\\"}\";\n" ); @@ -302,8 +299,7 @@ mod tests { concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"bad-no-tilde\",\n", - " instance_segment = \"x.a.v1.0\"\n", + " id = \"bad-no-tilde\"\n", ")]\n", "const X: &str = \"{}\";\n" ), diff --git a/gts-cli/src/gen_instances/parser.rs b/gts-cli/src/gen_instances/parser.rs index 0351679..b571fbf 100644 --- a/gts-cli/src/gen_instances/parser.rs +++ b/gts-cli/src/gen_instances/parser.rs @@ -112,7 +112,7 @@ fn validate_json_body(json_body: &str, source_file: &str, line: usize) -> Result if json_val.get("id").is_some() { bail!( "{source_file}:{line}: Instance JSON body must not contain an \"id\" field. \ - The id is automatically injected from schema_id + instance_segment. \ + The id is automatically injected from the `id` attribute. \ Remove the \"id\" field from the JSON body." ); } @@ -451,12 +451,11 @@ mod tests { concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"{}\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "const FOO: &str = {};\n" ), - "x.commerce._.orders.v1.0", body + body ) } @@ -525,6 +524,10 @@ mod tests { let content = src(r#""{\"name\": \"orders\"}""#); let result = extract_instances_from_source(&content, Path::new("t.rs")).unwrap(); assert_eq!(result.len(), 1); + assert_eq!( + result[0].attrs.id, + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" + ); assert_eq!(result[0].attrs.schema_id, "gts.x.core.events.topic.v1~"); assert_eq!(result[0].attrs.instance_segment, "x.commerce._.orders.v1.0"); } @@ -562,8 +565,7 @@ mod tests { let content = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.foo.v1~\",\n", - " instance_segment = \"x.bar.v1.0\"\n", + " id = \"gts.x.foo.v1~x.bar.v1.0\"\n", ")]\n", "static FOO: &str = \"{}\";\n" ); @@ -576,8 +578,7 @@ mod tests { let content = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.foo.v1~\",\n", - " instance_segment = \"x.bar.v1.0\"\n", + " id = \"gts.x.foo.v1~x.bar.v1.0\"\n", ")]\n", "const FOO: &str = concat!(\"{\", \"}\");\n" ); @@ -590,14 +591,12 @@ mod tests { let content = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "const A: &str = \"{\\\"name\\\": \\\"orders\\\"}\";\n", "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.payments.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.payments.v1.0\"\n", ")]\n", "const B: &str = \"{\\\"name\\\": \\\"payments\\\"}\";\n" ); @@ -610,8 +609,7 @@ mod tests { let content = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub const FOO: &str = \"{\\\"name\\\": \\\"orders\\\"}\";\n" ); @@ -626,8 +624,7 @@ mod tests { "// line 2\n", "#[gts_well_known_instance(\n", // line 3 " dir_path = \"instances\",\n", - " schema_id = \"gts.x.foo.v1~\",\n", - " instance_segment = \"x.bar.v1.0\"\n", + " id = \"gts.x.foo.v1~x.bar.v1.0\"\n", ")]\n", "const FOO: &str = \"{\\\"id\\\": \\\"bad\\\"}\";\n" ); diff --git a/gts-cli/src/gen_instances/writer.rs b/gts-cli/src/gen_instances/writer.rs index 0b00ba6..476a8ae 100644 --- a/gts-cli/src/gen_instances/writer.rs +++ b/gts-cli/src/gen_instances/writer.rs @@ -11,8 +11,7 @@ use super::parser::ParsedInstance; /// Validates the output path against the sandbox boundary **before** any /// filesystem mutations (validate-before-mkdir policy). /// -/// Injects the `"id"` field (composed as `schema_id + instance_segment`) into -/// the JSON object before writing. +/// Injects the `"id"` field into the JSON object before writing. /// /// Returns the written file path as a `String` on success. /// @@ -23,9 +22,8 @@ pub fn generate_single_instance( output: Option<&str>, sandbox_root: &Path, ) -> Result { - let composed = format!("{}{}", inst.attrs.schema_id, inst.attrs.instance_segment); let file_rel = - std::path::Path::new(&inst.attrs.dir_path).join(format!("{composed}.instance.json")); + std::path::Path::new(&inst.attrs.dir_path).join(format!("{}.instance.json", inst.attrs.id)); let raw_output_path = if let Some(od) = output { Path::new(od).join(&file_rel) @@ -60,7 +58,10 @@ pub fn generate_single_instance( // Build JSON with injected "id" field let mut obj: serde_json::Map = serde_json::from_str(&inst.json_body)?; - obj.insert("id".to_owned(), serde_json::Value::String(composed)); + obj.insert( + "id".to_owned(), + serde_json::Value::String(inst.attrs.id.clone()), + ); let output_value = serde_json::Value::Object(obj); // Create parent directories only after sandbox validation passes @@ -81,18 +82,14 @@ mod tests { use crate::gen_instances::attrs::InstanceAttrs; use tempfile::TempDir; - fn make_inst( - dir_path: &str, - schema_id: &str, - instance_segment: &str, - json_body: &str, - source_file: &str, - ) -> ParsedInstance { + fn make_inst(dir_path: &str, id: &str, json_body: &str, source_file: &str) -> ParsedInstance { + let tilde_pos = id.find('~').unwrap(); ParsedInstance { attrs: InstanceAttrs { dir_path: dir_path.to_owned(), - schema_id: schema_id.to_owned(), - instance_segment: instance_segment.to_owned(), + id: id.to_owned(), + schema_id: id[..=tilde_pos].to_owned(), + instance_segment: id[tilde_pos + 1..].to_owned(), }, json_body: json_body.to_owned(), source_file: source_file.to_owned(), @@ -108,8 +105,7 @@ mod tests { let inst = make_inst( "instances", - "gts.x.core.events.topic.v1~", - "x.commerce._.orders.v1.0", + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", r#"{"name": "orders", "partitions": 16}"#, src.to_str().unwrap(), ); @@ -140,8 +136,7 @@ mod tests { let inst = make_inst( "../../etc", - "gts.x.core.events.topic.v1~", - "x.commerce._.orders.v1.0", + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", r#"{"name": "x"}"#, src.to_str().unwrap(), ); @@ -164,8 +159,7 @@ mod tests { let inst = make_inst( "instances", - "gts.x.core.events.topic.v1~", - "x.commerce._.orders.v1.0", + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", r#"{"name": "x"}"#, src.to_str().unwrap(), ); diff --git a/gts-cli/tests/gen_instances_tests.rs b/gts-cli/tests/gen_instances_tests.rs index 77f3b99..eafb549 100644 --- a/gts-cli/tests/gen_instances_tests.rs +++ b/gts-cli/tests/gen_instances_tests.rs @@ -40,17 +40,16 @@ fn write(dir: &Path, name: &str, content: &str) { fs::write(dir.join(name), content).unwrap(); } -fn instance_src(instance_segment: &str, json_body: &str) -> String { +fn instance_src(id: &str, json_body: &str) -> String { format!( concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"{seg}\"\n", + " id = \"{id}\"\n", ")]\n", "pub const INST: &str = {body};\n" ), - seg = instance_segment, + id = id, body = json_body ) } @@ -76,7 +75,7 @@ fn inst_path(root: &Path, id: &str) -> std::path::PathBuf { fn golden_single_instance() { let (_tmp, root) = sandbox(); let src = instance_src( - "x.commerce._.orders.v1.0", + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", r#""{\"name\":\"orders\",\"partitions\":16}""#, ); write(&root, "events.rs", &src); @@ -103,8 +102,7 @@ fn golden_raw_string_literal() { let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.payments.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.payments.v1.0\"\n", ")]\n", "pub const PAYMENTS: &str = r#\"{\"name\":\"payments\",\"partitions\":8}\"#;\n" ); @@ -132,14 +130,12 @@ fn multiple_instances_in_one_file() { let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub const A: &str = \"{\\\"name\\\":\\\"orders\\\"}\";\n", "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.payments.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.payments.v1.0\"\n", ")]\n", "pub const B: &str = \"{\\\"name\\\":\\\"payments\\\"}\";\n" ); @@ -168,12 +164,18 @@ fn multiple_files_in_directory() { write( &root, "a.rs", - &instance_src("x.commerce._.orders.v1.0", "\"{\\\"name\\\":\\\"a\\\"}\""), + &instance_src( + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", + "\"{\\\"name\\\":\\\"a\\\"}\"", + ), ); write( &root, "b.rs", - &instance_src("x.commerce._.payments.v1.0", "\"{\\\"name\\\":\\\"b\\\"}\""), + &instance_src( + "gts.x.core.events.topic.v1~x.commerce._.payments.v1.0", + "\"{\\\"name\\\":\\\"b\\\"}\"", + ), ); run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); @@ -198,8 +200,7 @@ fn pub_crate_visibility_accepted() { let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub(crate) const FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" ); @@ -223,7 +224,7 @@ fn output_adjacent_to_source_when_no_override() { &subdir, "topic.rs", &instance_src( - "x.commerce._.orders.v1.0", + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", "\"{\\\"name\\\":\\\"orders\\\"}\"", ), ); @@ -249,7 +250,7 @@ fn id_field_in_body_is_rejected() { &root, "events.rs", &instance_src( - "x.commerce._.orders.v1.0", + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", "\"{\\\"id\\\":\\\"bad\\\",\\\"name\\\":\\\"x\\\"}\"", ), ); @@ -268,14 +269,12 @@ fn duplicate_instance_id_hard_error() { let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub const A: &str = \"{\\\"name\\\":\\\"a\\\"}\";\n", "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub const B: &str = \"{\\\"name\\\":\\\"b\\\"}\";\n" ); @@ -298,8 +297,7 @@ fn sandbox_escape_rejected() { let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"../../etc\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub const FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" ); @@ -324,8 +322,7 @@ fn exclude_pattern_skips_file() { let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"bad-no-tilde\",\n", - " instance_segment = \"x.a.v1.0\"\n", + " id = \"bad-no-tilde\"\n", ")]\n", "pub const X: &str = \"{}\";\n" ); @@ -351,8 +348,7 @@ fn gts_ignore_directive_skips_file() { "// gts:ignore\n", "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"bad-no-tilde\",\n", - " instance_segment = \"x.a.v1.0\"\n", + " id = \"bad-no-tilde\"\n", ")]\n", "pub const X: &str = \"{}\";\n" ); @@ -405,8 +401,7 @@ fn concat_macro_value_is_rejected() { let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub const FOO: &str = concat!(\"{\", \"}\");\n" ); @@ -426,8 +421,7 @@ fn static_item_is_rejected() { let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub static FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" ); @@ -438,17 +432,16 @@ fn static_item_is_rejected() { } // ───────────────────────────────────────────────────────────────────────────── -// schema_id without trailing ~ is rejected +// id without ~ separator is rejected // ───────────────────────────────────────────────────────────────────────────── #[test] -fn schema_id_without_tilde_is_rejected() { +fn id_without_tilde_is_rejected() { let (_tmp, root) = sandbox(); let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1.x.commerce._.orders.v1.0\"\n", ")]\n", "pub const FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" ); @@ -459,17 +452,16 @@ fn schema_id_without_tilde_is_rejected() { } // ───────────────────────────────────────────────────────────────────────────── -// instance_segment with trailing ~ is rejected +// id ending with ~ (schema/type, not instance) is rejected // ───────────────────────────────────────────────────────────────────────────── #[test] -fn instance_segment_with_tilde_is_rejected() { +fn id_ending_with_tilde_is_rejected() { let (_tmp, root) = sandbox(); let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1~\"\n", + " id = \"gts.x.core.events.topic.v1~\"\n", ")]\n", "pub const FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" ); @@ -492,7 +484,10 @@ fn json_array_body_is_rejected() { write( &root, "events.rs", - &instance_src("x.commerce._.orders.v1.0", "\"[1,2,3]\""), + &instance_src( + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", + "\"[1,2,3]\"", + ), ); let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); @@ -509,7 +504,10 @@ fn malformed_json_body_is_rejected() { write( &root, "events.rs", - &instance_src("x.commerce._.orders.v1.0", "\"{not valid json}\""), + &instance_src( + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", + "\"{not valid json}\"", + ), ); let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); @@ -526,8 +524,7 @@ fn golden_file_content_exact() { let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub const ORDERS: &str = \"{\\\"name\\\":\\\"orders\\\",\\\"partitions\\\":16}\";\n" ); @@ -565,8 +562,7 @@ fn zero_hash_raw_string_is_accepted() { let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub const ZERO_HASH: &str = r#\"{\"name\":\"zero\"}\"#;\n" ); @@ -619,14 +615,12 @@ fn unsupported_form_in_comment_does_not_error() { "/// Example (do NOT use):\n", "/// #[gts_well_known_instance(\n", "/// dir_path = \"instances\",\n", - "/// schema_id = \"gts.x.core.events.topic.v1~\",\n", - "/// instance_segment = \"x.a.v1.0\"\n", + "/// id = \"gts.x.core.events.topic.v1~x.a.v1.0\"\n", "/// )]\n", "/// pub const BAD: &str = concat!(\"{\", \"}\");\n", "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub const REAL: &str = \"{\\\"name\\\":\\\"real\\\"}\";\n" ); @@ -650,8 +644,7 @@ fn annotation_on_fn_is_hard_error() { let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub fn not_a_const() -> &'static str { \"{}\" }\n" ); @@ -677,8 +670,7 @@ fn duplicate_attribute_key_is_hard_error() { "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", " dir_path = \"other\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub const DUP: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" ); @@ -706,14 +698,12 @@ fn dot_slash_dir_path_same_id_is_duplicate() { let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub const A: &str = \"{\\\"name\\\":\\\"a\\\"}\";\n", "#[gts_well_known_instance(\n", " dir_path = \"./instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub const B: &str = \"{\\\"name\\\":\\\"b\\\"}\";\n" ); @@ -738,8 +728,7 @@ fn qualified_path_form_is_accepted() { let src = concat!( "#[gts_macros::gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"gts.x.core.events.topic.v1~\",\n", - " instance_segment = \"x.commerce._.orders.v1.0\"\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", "pub const QUALIFIED: &str = r#\"{\"name\":\"qualified\"}\"#;\n" ); @@ -768,8 +757,7 @@ fn compile_fail_dir_is_auto_skipped() { let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", - " schema_id = \"bad-no-tilde\",\n", - " instance_segment = \"x.a.v1.0\"\n", + " id = \"bad-no-tilde\"\n", ")]\n", "pub const X: &str = \"{}\";\n" ); diff --git a/gts-macros/README.md b/gts-macros/README.md index 280ad18..40c99b3 100644 --- a/gts-macros/README.md +++ b/gts-macros/README.md @@ -427,7 +427,7 @@ The CLI automatically maps Rust types to JSON Schema types: The `#[gts_well_known_instance]` attribute macro declares a **well-known GTS instance** as a `const` JSON string literal. It provides: -1. **Compile-time validation** of `schema_id`, `instance_segment`, and the composed instance ID format. +1. **Compile-time validation** of the `id` (full instance ID) format. 2. **CLI extraction** — the `gts generate-from-rust --mode instances` command scans for these annotations, validates the JSON payload, injects the `"id"` field, and writes the instance file. The macro passes the annotated `const` through **unchanged** at compile time. It is purely metadata for the CLI extraction step. @@ -437,8 +437,7 @@ The macro passes the annotated `const` through **unchanged** at compile time. It ```rust #[gts_macros::gts_well_known_instance( dir_path = "instances", - schema_id = "gts.x.core.events.topic.v1~", - instance_segment = "x.commerce._.orders.v1.0" + id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" )] pub const ORDERS_TOPIC: &str = r#"{ "name": "orders", @@ -456,8 +455,7 @@ pub const ORDERS_TOPIC: &str = r#"{ // src/gts/mod.rs #[gts_macros::gts_well_known_instance( dir_path = "instances", - schema_id = "gts.x.core.events.topic.v1~", - instance_segment = "x.commerce._.orders.v1.0" + id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" )] pub const ORDERS_TOPIC: &str = r#"{ "name": "orders", @@ -481,7 +479,7 @@ This produces `instances/gts.x.core.events.topic.v1~x.commerce._.orders.v1.0.ins // Reference the const directly (it's just a &str containing JSON) let topic: serde_json::Value = serde_json::from_str(ORDERS_TOPIC)?; -// The full instance ID is schema_id + instance_segment +// The full instance ID is the `id` attribute value let instance_id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0"; // Register or look up via the types-registry @@ -503,17 +501,19 @@ The generated `.instance.json` file can also be loaded by the types-registry at | Parameter | Required | Description | |-----------|----------|-------------| | `dir_path` | Yes | Output directory for the generated instance file (relative to source file or `--output`) | -| `schema_id` | Yes | GTS schema ID this instance conforms to — **must end with `~`** | -| `instance_segment` | Yes | Appended to `schema_id` to form the full instance ID — **must not end with `~`**, must not be a bare `*` | +| `id` | Yes | Full GTS instance ID — **must contain `~`** separating the schema from the instance segment, **must not end with `~`** | -The full instance ID is `schema_id + instance_segment`, e.g.: +The `id` is the full instance ID, e.g.: ``` gts.x.core.events.topic.v1~x.commerce._.orders.v1.0 +│ │ │ │ +│ schema portion (→ ~) │ │ instance segment │ +└─────────────────────────┘ └───────────────────────┘ ``` ### Generated file -The CLI writes `{dir_path}/{schema_id}{instance_segment}.instance.json` with the `"id"` field automatically injected: +The CLI writes `{dir_path}/{id}.instance.json` with the `"id"` field automatically injected: ```json { @@ -543,7 +543,7 @@ gts generate-from-rust --source src/ --output out/ --mode instances - The annotated item **must be a `const`** (not `static`) of type `&str`. - The const value **must be a string literal** — raw strings (`r#"..."#`) or regular strings (`"..."`). `concat!()` and other macro invocations are not supported. - The JSON body **must be a JSON object** (`{ ... }`). Arrays, scalars, and `null` are not valid. -- The JSON body **must not contain an `"id"` field** — the CLI injects it automatically from `schema_id + instance_segment`. +- The JSON body **must not contain an `"id"` field** — the CLI injects it automatically from the `id` attribute. - Files in `compile_fail/` directories and files with a `// gts:ignore` directive are skipped. - Items behind `#[cfg(...)]` gates (e.g., `#[cfg(test)]`) are still extracted — extraction is lexical, not conditional. @@ -551,12 +551,10 @@ gts generate-from-rust --source src/ --output out/ --mode instances | Violation | Error | |-----------|-------| -| Missing `schema_id` | `Missing required attribute: schema_id` | -| Missing `instance_segment` | `Missing required attribute: instance_segment` | +| Missing `id` | `Missing required attribute: id` | | Missing `dir_path` | `Missing required attribute: dir_path` | -| `schema_id` without trailing `~` | `schema_id must end with '~' (type marker)` | -| `instance_segment` ending with `~` | `instance_segment must not end with '~'` | -| `instance_segment = "*"` (bare wildcard) | `instance_segment must not be a bare wildcard '*'` | +| `id` without `~` | `id must contain '~' separating schema from instance segment` | +| `id` ending with `~` | `id must not end with '~' (that is a schema/type ID)` | | Applied to a `static` item | `Only \`const\` items are supported` | | Applied to a `const` with type other than `&str` | `The annotated const must have type \`&str\`` | | Const value is `concat!()` or other macro | `The const value must be a string literal` | diff --git a/gts-macros/src/lib.rs b/gts-macros/src/lib.rs index 84ce060..7d849a5 100644 --- a/gts-macros/src/lib.rs +++ b/gts-macros/src/lib.rs @@ -1848,8 +1848,7 @@ struct GtsInstanceArgs; impl Parse for GtsInstanceArgs { fn parse(input: ParseStream) -> syn::Result { let mut dir_path: Option = None; - let mut schema_id: Option = None; - let mut instance_segment: Option = None; + let mut id: Option<(String, proc_macro2::Span)> = None; let mut seen_keys: std::collections::HashSet = std::collections::HashSet::new(); while !input.is_empty() { @@ -1871,26 +1870,44 @@ impl Parse for GtsInstanceArgs { let value: LitStr = input.parse()?; dir_path = Some(value.value()); } - "schema_id" => { + "id" => { let value: LitStr = input.parse()?; - let id = value.value(); - // schema_id must end with ~ (type marker) - if !id.ends_with('~') { + let full_id = value.value(); + + // Instance ID must contain ~ (separating schema from instance segment) + let Some(tilde_pos) = full_id.find('~') else { + return Err(syn::Error::new_spanned( + value, + format!( + "gts_well_known_instance: id must contain '~' separating schema from instance segment, got '{full_id}'" + ), + )); + }; + + // Instance ID must NOT end with ~ (that would be a schema/type, not an instance) + if full_id.ends_with('~') { return Err(syn::Error::new_spanned( value, format!( - "gts_well_known_instance: schema_id must end with '~' (type marker), got '{id}'" + "gts_well_known_instance: id must not end with '~' (that is a schema/type ID, not an instance ID), got '{full_id}'" ), )); } - // General GTS ID validation - if let Err(e) = gts_id::validate_gts_id(&id, false) { + + // Split into schema portion (up to and including ~) and instance segment + let schema_part = &full_id[..=tilde_pos]; + let instance_part = &full_id[tilde_pos + 1..]; + + // Validate schema portion + if let Err(e) = gts_id::validate_gts_id(schema_part, false) { let msg = match &e { gts_id::GtsIdError::Id { cause, .. } => { - format!("Invalid GTS schema ID: {cause}") + format!("Invalid schema portion '{schema_part}': {cause}") } gts_id::GtsIdError::Segment { num, cause, .. } => { - format!("Segment #{num}: {cause}") + format!( + "Invalid schema portion '{schema_part}': Segment #{num}: {cause}" + ) } }; return Err(syn::Error::new_spanned( @@ -1898,44 +1915,39 @@ impl Parse for GtsInstanceArgs { format!("gts_well_known_instance: {msg}"), )); } - schema_id = Some(id); - } - "instance_segment" => { - let value: LitStr = input.parse()?; - let seg = value.value(); - // Reject instance_segment ending with ~ (type/schema marker) - if seg.ends_with('~') { + // Validate instance segment (no wildcards) + if let Err(cause) = gts_id::validate_segment(2, instance_part, false) { return Err(syn::Error::new_spanned( value, - "gts_well_known_instance: instance_segment must not end with '~' \u{2014} that is a schema/type marker. Instance segments do not end with '~'", - )); - } - - // Reject wildcard-only instance_segment - if seg == "*" { - return Err(syn::Error::new_spanned( - value, - "gts_well_known_instance: instance_segment must not be a bare wildcard '*'. Wildcards are not valid in generated instance IDs", + format!( + "gts_well_known_instance: Invalid instance segment '{instance_part}': {cause}" + ), )); } - // Validate the segment using gts_id::validate_segment (no wildcards) - if let Err(cause) = gts_id::validate_segment(2, &seg, false) { + // Validate the full composed ID + if let Err(e) = gts_id::validate_gts_id(&full_id, false) { + let msg = match &e { + gts_id::GtsIdError::Id { cause, .. } => { + format!("Invalid instance ID '{full_id}': {cause}") + } + gts_id::GtsIdError::Segment { num, cause, .. } => { + format!("Invalid instance ID '{full_id}': Segment #{num}: {cause}") + } + }; return Err(syn::Error::new_spanned( value, - format!( - "gts_well_known_instance: Invalid instance_segment '{seg}': {cause}" - ), + format!("gts_well_known_instance: {msg}"), )); } - instance_segment = Some(seg); + id = Some((full_id, value.span())); } _ => { return Err(syn::Error::new_spanned( key, - "gts_well_known_instance: Unknown attribute. Expected: dir_path, schema_id, instance_segment", + "gts_well_known_instance: Unknown attribute. Expected: dir_path, id", )); } } @@ -1945,31 +1957,13 @@ impl Parse for GtsInstanceArgs { } } - // Validate composed instance ID after both parts are parsed - let schema_id_val = schema_id.ok_or_else(|| { - input.error("gts_well_known_instance: Missing required attribute: schema_id") - })?; - let instance_segment_val = instance_segment.ok_or_else(|| { - input.error("gts_well_known_instance: Missing required attribute: instance_segment") + let _id_val = id.ok_or_else(|| { + input.error("gts_well_known_instance: Missing required attribute: id") })?; let _dir_path_val = dir_path.ok_or_else(|| { input.error("gts_well_known_instance: Missing required attribute: dir_path") })?; - // Validate the composed instance ID (schema_id + instance_segment forms a valid GTS instance ID) - let composed = format!("{schema_id_val}{instance_segment_val}"); - if let Err(e) = gts_id::validate_gts_id(&composed, false) { - let msg = match &e { - gts_id::GtsIdError::Id { cause, .. } => { - format!("Invalid composed instance ID '{composed}': {cause}") - } - gts_id::GtsIdError::Segment { num, cause, .. } => { - format!("Invalid composed instance ID '{composed}': Segment #{num}: {cause}") - } - }; - return Err(input.error(format!("gts_well_known_instance: {msg}"))); - } - Ok(GtsInstanceArgs) } } @@ -1977,11 +1971,11 @@ impl Parse for GtsInstanceArgs { /// Declare a well-known GTS instance as a const JSON string literal. /// /// This macro: -/// 1. **At compile time**: validates the `schema_id` and `instance_segment` GTS ID formats +/// 1. **At compile time**: validates the `id` GTS instance ID format /// and verifies the annotated item is a `const` of type `&str`. /// 2. **At generate time**: the CLI (`gts generate-from-rust --mode instances`) scans for /// these annotations, validates the JSON payload, injects the `"id"` field, and writes -/// `{dir_path}/{schema_id}{instance_segment}.instance.json`. +/// `{dir_path}/{id}.instance.json`. /// /// The macro passes the annotated `const` item through unchanged -- it is purely metadata /// for the CLI extraction step. @@ -1989,8 +1983,7 @@ impl Parse for GtsInstanceArgs { /// # Arguments /// /// * `dir_path` - Output directory for the generated instance file (relative to crate root or `--output`) -/// * `schema_id` - GTS schema ID this instance conforms to (must end with `~`) -/// * `instance_segment` - Appended to `schema_id` to form the full instance ID (must not end with `~`) +/// * `id` - Full GTS instance ID (must contain `~` separating schema from instance segment, must not end with `~`) /// /// # Example /// @@ -1999,8 +1992,7 @@ impl Parse for GtsInstanceArgs { /// /// #[gts_well_known_instance( /// dir_path = "instances", -/// schema_id = "gts.x.core.events.topic.v1~", -/// instance_segment = "x.commerce._.orders.v1.0" +/// id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" /// )] /// const ORDERS_TOPIC: &str = r#"{ /// "name": "orders", diff --git a/gts-macros/tests/compile_fail/instance_const_wrong_type.rs b/gts-macros/tests/compile_fail/instance_const_wrong_type.rs index 61b6975..f49828d 100644 --- a/gts-macros/tests/compile_fail/instance_const_wrong_type.rs +++ b/gts-macros/tests/compile_fail/instance_const_wrong_type.rs @@ -4,8 +4,7 @@ use gts_macros::gts_well_known_instance; #[gts_well_known_instance( dir_path = "instances", - schema_id = "gts.x.core.events.topic.v1~", - instance_segment = "x.commerce._.orders.v1.0" + id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" )] const ORDERS_TOPIC: u32 = 42; diff --git a/gts-macros/tests/compile_fail/instance_const_wrong_type.stderr b/gts-macros/tests/compile_fail/instance_const_wrong_type.stderr index 11a83ef..db4defe 100644 --- a/gts-macros/tests/compile_fail/instance_const_wrong_type.stderr +++ b/gts-macros/tests/compile_fail/instance_const_wrong_type.stderr @@ -1,5 +1,5 @@ error: gts_well_known_instance: The annotated const must have type `&str`, got `u32`. Usage: `const NAME: &str = r#"{ ... }"#;` - --> tests/compile_fail/instance_const_wrong_type.rs:10:21 - | -10 | const ORDERS_TOPIC: u32 = 42; - | ^^^ + --> tests/compile_fail/instance_const_wrong_type.rs:9:21 + | +9 | const ORDERS_TOPIC: u32 = 42; + | ^^^ diff --git a/gts-macros/tests/compile_fail/instance_missing_dir_path.rs b/gts-macros/tests/compile_fail/instance_missing_dir_path.rs index 28842a1..d1a359b 100644 --- a/gts-macros/tests/compile_fail/instance_missing_dir_path.rs +++ b/gts-macros/tests/compile_fail/instance_missing_dir_path.rs @@ -3,8 +3,7 @@ use gts_macros::gts_well_known_instance; #[gts_well_known_instance( - schema_id = "gts.x.core.events.topic.v1~", - instance_segment = "x.commerce._.orders.v1.0" + id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" )] const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; diff --git a/gts-macros/tests/compile_fail/instance_missing_dir_path.stderr b/gts-macros/tests/compile_fail/instance_missing_dir_path.stderr index 9abf8a3..59d2b39 100644 --- a/gts-macros/tests/compile_fail/instance_missing_dir_path.stderr +++ b/gts-macros/tests/compile_fail/instance_missing_dir_path.stderr @@ -2,9 +2,8 @@ error: unexpected end of input, gts_well_known_instance: Missing required attrib --> tests/compile_fail/instance_missing_dir_path.rs:5:1 | 5 | / #[gts_well_known_instance( -6 | | schema_id = "gts.x.core.events.topic.v1~", -7 | | instance_segment = "x.commerce._.orders.v1.0" -8 | | )] +6 | | id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" +7 | | )] | |__^ | = note: this error originates in the attribute macro `gts_well_known_instance` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/gts-macros/tests/compile_fail/instance_missing_instance_segment.rs b/gts-macros/tests/compile_fail/instance_missing_instance_segment.rs index 66eb196..e78c0bd 100644 --- a/gts-macros/tests/compile_fail/instance_missing_instance_segment.rs +++ b/gts-macros/tests/compile_fail/instance_missing_instance_segment.rs @@ -1,10 +1,10 @@ -//! Test: Missing required attribute instance_segment in gts_well_known_instance +//! Test: Unknown attribute 'instance_segment' in gts_well_known_instance (no longer valid) use gts_macros::gts_well_known_instance; #[gts_well_known_instance( dir_path = "instances", - schema_id = "gts.x.core.events.topic.v1~" + instance_segment = "x.commerce._.orders.v1.0" )] const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; diff --git a/gts-macros/tests/compile_fail/instance_missing_instance_segment.stderr b/gts-macros/tests/compile_fail/instance_missing_instance_segment.stderr index fb14308..840728d 100644 --- a/gts-macros/tests/compile_fail/instance_missing_instance_segment.stderr +++ b/gts-macros/tests/compile_fail/instance_missing_instance_segment.stderr @@ -1,10 +1,5 @@ -error: unexpected end of input, gts_well_known_instance: Missing required attribute: instance_segment - --> tests/compile_fail/instance_missing_instance_segment.rs:5:1 +error: gts_well_known_instance: Unknown attribute. Expected: dir_path, id + --> tests/compile_fail/instance_missing_instance_segment.rs:7:5 | -5 | / #[gts_well_known_instance( -6 | | dir_path = "instances", -7 | | schema_id = "gts.x.core.events.topic.v1~" -8 | | )] - | |__^ - | - = note: this error originates in the attribute macro `gts_well_known_instance` (in Nightly builds, run with -Z macro-backtrace for more info) +7 | instance_segment = "x.commerce._.orders.v1.0" + | ^^^^^^^^^^^^^^^^ diff --git a/gts-macros/tests/compile_fail/instance_missing_schema_id.rs b/gts-macros/tests/compile_fail/instance_missing_schema_id.rs index 77baaef..cce979f 100644 --- a/gts-macros/tests/compile_fail/instance_missing_schema_id.rs +++ b/gts-macros/tests/compile_fail/instance_missing_schema_id.rs @@ -1,10 +1,9 @@ -//! Test: Missing required attribute schema_id in gts_well_known_instance +//! Test: Missing required attribute id in gts_well_known_instance use gts_macros::gts_well_known_instance; #[gts_well_known_instance( - dir_path = "instances", - instance_segment = "x.commerce._.orders.v1.0" + dir_path = "instances" )] const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; diff --git a/gts-macros/tests/compile_fail/instance_missing_schema_id.stderr b/gts-macros/tests/compile_fail/instance_missing_schema_id.stderr index bfbe1c5..53d2a00 100644 --- a/gts-macros/tests/compile_fail/instance_missing_schema_id.stderr +++ b/gts-macros/tests/compile_fail/instance_missing_schema_id.stderr @@ -1,10 +1,9 @@ -error: unexpected end of input, gts_well_known_instance: Missing required attribute: schema_id +error: unexpected end of input, gts_well_known_instance: Missing required attribute: id --> tests/compile_fail/instance_missing_schema_id.rs:5:1 | 5 | / #[gts_well_known_instance( -6 | | dir_path = "instances", -7 | | instance_segment = "x.commerce._.orders.v1.0" -8 | | )] +6 | | dir_path = "instances" +7 | | )] | |__^ | = note: this error originates in the attribute macro `gts_well_known_instance` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/gts-macros/tests/compile_fail/instance_on_non_const.rs b/gts-macros/tests/compile_fail/instance_on_non_const.rs index 99ada20..205e904 100644 --- a/gts-macros/tests/compile_fail/instance_on_non_const.rs +++ b/gts-macros/tests/compile_fail/instance_on_non_const.rs @@ -4,8 +4,7 @@ use gts_macros::gts_well_known_instance; #[gts_well_known_instance( dir_path = "instances", - schema_id = "gts.x.core.events.topic.v1~", - instance_segment = "x.commerce._.orders.v1.0" + id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" )] static ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; diff --git a/gts-macros/tests/compile_fail/instance_on_non_const.stderr b/gts-macros/tests/compile_fail/instance_on_non_const.stderr index cbd27c2..0f5e533 100644 --- a/gts-macros/tests/compile_fail/instance_on_non_const.stderr +++ b/gts-macros/tests/compile_fail/instance_on_non_const.stderr @@ -3,9 +3,8 @@ error: gts_well_known_instance: Only `const` items are supported. Usage: `const | 5 | / #[gts_well_known_instance( 6 | | dir_path = "instances", -7 | | schema_id = "gts.x.core.events.topic.v1~", -8 | | instance_segment = "x.commerce._.orders.v1.0" -9 | | )] +7 | | id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" +8 | | )] | |__^ | = note: this error originates in the attribute macro `gts_well_known_instance` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.rs b/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.rs index 0a9f6de..3250b84 100644 --- a/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.rs +++ b/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.rs @@ -1,11 +1,10 @@ -//! Test: schema_id does not end with ~ in gts_well_known_instance +//! Test: id without ~ separator in gts_well_known_instance use gts_macros::gts_well_known_instance; #[gts_well_known_instance( dir_path = "instances", - schema_id = "gts.x.core.events.topic.v1", - instance_segment = "x.commerce._.orders.v1.0" + id = "gts.x.core.events.topic.v1.x.commerce._.orders.v1.0" )] const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; diff --git a/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.stderr b/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.stderr index cd8f55c..8b1e5a3 100644 --- a/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.stderr +++ b/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.stderr @@ -1,5 +1,5 @@ -error: gts_well_known_instance: schema_id must end with '~' (type marker), got 'gts.x.core.events.topic.v1' - --> tests/compile_fail/instance_schema_id_no_tilde.rs:7:17 +error: gts_well_known_instance: id must contain '~' separating schema from instance segment, got 'gts.x.core.events.topic.v1.x.commerce._.orders.v1.0' + --> tests/compile_fail/instance_schema_id_no_tilde.rs:7:10 | -7 | schema_id = "gts.x.core.events.topic.v1", - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +7 | id = "gts.x.core.events.topic.v1.x.commerce._.orders.v1.0" + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.rs b/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.rs index 81cce7a..3dcb3bd 100644 --- a/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.rs +++ b/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.rs @@ -1,11 +1,10 @@ -//! Test: instance_segment is a bare wildcard * in gts_well_known_instance +//! Test: id with bare wildcard '*' as instance segment in gts_well_known_instance use gts_macros::gts_well_known_instance; #[gts_well_known_instance( dir_path = "instances", - schema_id = "gts.x.core.events.topic.v1~", - instance_segment = "*" + id = "gts.x.core.events.topic.v1~*" )] const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; diff --git a/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.stderr b/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.stderr index accbe3b..bfe5aec 100644 --- a/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.stderr +++ b/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.stderr @@ -1,5 +1,5 @@ -error: gts_well_known_instance: instance_segment must not be a bare wildcard '*'. Wildcards are not valid in generated instance IDs - --> tests/compile_fail/instance_segment_bare_wildcard.rs:8:24 +error: gts_well_known_instance: Invalid instance segment '*': Too few tokens (got 1, min 5). Expected format: vendor.package.namespace.type.vMAJOR[.MINOR] + --> tests/compile_fail/instance_segment_bare_wildcard.rs:7:10 | -8 | instance_segment = "*" - | ^^^ +7 | id = "gts.x.core.events.topic.v1~*" + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.rs b/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.rs index f2728a9..5709197 100644 --- a/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.rs +++ b/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.rs @@ -1,11 +1,10 @@ -//! Test: instance_segment ends with ~ in gts_well_known_instance +//! Test: id ends with ~ in gts_well_known_instance (schema/type, not instance) use gts_macros::gts_well_known_instance; #[gts_well_known_instance( dir_path = "instances", - schema_id = "gts.x.core.events.topic.v1~", - instance_segment = "x.commerce._.orders.v1~" + id = "gts.x.core.events.topic.v1~" )] const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; diff --git a/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.stderr b/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.stderr index c772728..aa40807 100644 --- a/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.stderr +++ b/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.stderr @@ -1,5 +1,5 @@ -error: gts_well_known_instance: instance_segment must not end with '~' — that is a schema/type marker. Instance segments do not end with '~' - --> tests/compile_fail/instance_segment_ends_with_tilde.rs:8:24 +error: gts_well_known_instance: id must not end with '~' (that is a schema/type ID, not an instance ID), got 'gts.x.core.events.topic.v1~' + --> tests/compile_fail/instance_segment_ends_with_tilde.rs:7:10 | -8 | instance_segment = "x.commerce._.orders.v1~" - | ^^^^^^^^^^^^^^^^^^^^^^^^^ +7 | id = "gts.x.core.events.topic.v1~" + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 8276d66b38530399f15663218029cc4924e753f5 Mon Sep 17 00:00:00 2001 From: devjow Date: Mon, 9 Mar 2026 16:48:16 +0000 Subject: [PATCH 08/10] fix(gts-cli): deduplicate path logic, harden sandbox test, add JSON error context Signed-off-by: devjow --- Makefile | 2 +- gts-cli/src/gen_common.rs | 31 ------------------ gts-cli/src/gen_instances/mod.rs | 13 ++------ gts-cli/src/gen_instances/writer.rs | 48 ++++++++++++++++++++-------- gts-cli/tests/gen_instances_tests.rs | 29 +++++++++++++---- gts-macros/README.md | 1 + gts-macros/src/lib.rs | 4 +++ 7 files changed, 64 insertions(+), 64 deletions(-) diff --git a/Makefile b/Makefile index b6a3e4c..93c4b27 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ CI := 1 -.PHONY: help build dev-fmt dev-clippy all check fmt clippy test deny security update-spec e2e generate-schemas +.PHONY: help build dev-fmt dev-clippy all check fmt clippy test deny security update-spec e2e generate-schemas coverage # Default target - show help .DEFAULT_GOAL := help diff --git a/gts-cli/src/gen_common.rs b/gts-cli/src/gen_common.rs index c157d99..c318d0e 100644 --- a/gts-cli/src/gen_common.rs +++ b/gts-cli/src/gen_common.rs @@ -274,37 +274,6 @@ pub fn safe_canonicalize_nonexistent(path: &Path) -> Result { Ok(result) } -/// Validate that the output path is within the sandbox boundary. -/// Returns the safe canonical path on success. -/// -/// # Errors -/// Returns an error if the resolved path escapes the sandbox root. -#[allow(dead_code)] -pub fn validate_output_path_in_sandbox( - output_path: &Path, - sandbox_root: &Path, - annotation_name: &str, - source_file: &Path, - dir_path: &str, -) -> Result { - let canonical = safe_canonicalize_nonexistent(output_path)?; - - if !canonical.starts_with(sandbox_root) { - bail!( - "Security error in {} - dir_path '{}' attempts to write outside sandbox boundary. \ - Resolved to: {}, but must be within: {}", - source_file.display(), - dir_path, - canonical.display(), - sandbox_root.display(), - // annotation_name for diagnostics - ); - } - let _ = annotation_name; - - Ok(canonical) -} - #[cfg(test)] mod tests { use super::*; diff --git a/gts-cli/src/gen_instances/mod.rs b/gts-cli/src/gen_instances/mod.rs index 2ed60d1..c84d5cc 100644 --- a/gts-cli/src/gen_instances/mod.rs +++ b/gts-cli/src/gen_instances/mod.rs @@ -9,7 +9,7 @@ use std::path::Path; use crate::gen_common::{compute_sandbox_root, walk_rust_files}; use parser::ParsedInstance; -use writer::generate_single_instance; +use writer::{generate_single_instance, instance_output_path}; /// Generate GTS well-known instance files from Rust source code annotated with /// `#[gts_well_known_instance]`. @@ -122,16 +122,7 @@ fn check_duplicate_output_paths( let mut errors: Vec = Vec::new(); for inst in instances { - let file_rel = std::path::Path::new(&inst.attrs.dir_path) - .join(format!("{}.instance.json", inst.attrs.id)); - let raw_path = if let Some(od) = output { - Path::new(od).join(&file_rel) - } else { - let src_dir = Path::new(inst.source_file.as_str()) - .parent() - .unwrap_or(sandbox_root); - src_dir.join(&file_rel) - }; + let raw_path = instance_output_path(inst, output, sandbox_root); let key = raw_path .components() .collect::() diff --git a/gts-cli/src/gen_instances/writer.rs b/gts-cli/src/gen_instances/writer.rs index 476a8ae..5100fa8 100644 --- a/gts-cli/src/gen_instances/writer.rs +++ b/gts-cli/src/gen_instances/writer.rs @@ -1,11 +1,33 @@ use anyhow::{Result, bail}; use std::fs; -use std::path::Path; +use std::path::{Path, PathBuf}; use crate::gen_common::safe_canonicalize_nonexistent; use super::parser::ParsedInstance; +/// Compute the raw output path for an instance file. +/// +/// If `output` is provided, the file is placed under that directory. +/// Otherwise, the file is placed adjacent to the source file. +#[must_use] +pub fn instance_output_path( + inst: &ParsedInstance, + output: Option<&str>, + sandbox_root: &Path, +) -> PathBuf { + let file_rel = Path::new(&inst.attrs.dir_path).join(format!("{}.instance.json", inst.attrs.id)); + + if let Some(od) = output { + Path::new(od).join(&file_rel) + } else { + let src_dir = Path::new(&inst.source_file) + .parent() + .unwrap_or(sandbox_root); + src_dir.join(&file_rel) + } +} + /// Generate the instance JSON file for a single parsed annotation. /// /// Validates the output path against the sandbox boundary **before** any @@ -22,17 +44,7 @@ pub fn generate_single_instance( output: Option<&str>, sandbox_root: &Path, ) -> Result { - let file_rel = - std::path::Path::new(&inst.attrs.dir_path).join(format!("{}.instance.json", inst.attrs.id)); - - let raw_output_path = if let Some(od) = output { - Path::new(od).join(&file_rel) - } else { - let src_dir = Path::new(&inst.source_file) - .parent() - .unwrap_or(sandbox_root); - src_dir.join(&file_rel) - }; + let raw_output_path = instance_output_path(inst, output, sandbox_root); // Validate sandbox boundary BEFORE any filesystem writes let output_canonical = safe_canonicalize_nonexistent(&raw_output_path).map_err(|e| { @@ -56,8 +68,16 @@ pub fn generate_single_instance( } // Build JSON with injected "id" field - let mut obj: serde_json::Map = - serde_json::from_str(&inst.json_body)?; + let mut obj: serde_json::Map = serde_json::from_str(&inst.json_body) + .map_err(|e| { + anyhow::anyhow!( + "{}:{}: Failed to parse JSON body for instance '{}': {}", + inst.source_file, + inst.line, + inst.attrs.id, + e + ) + })?; obj.insert( "id".to_owned(), serde_json::Value::String(inst.attrs.id.clone()), diff --git a/gts-cli/tests/gen_instances_tests.rs b/gts-cli/tests/gen_instances_tests.rs index eafb549..577491e 100644 --- a/gts-cli/tests/gen_instances_tests.rs +++ b/gts-cli/tests/gen_instances_tests.rs @@ -294,14 +294,21 @@ fn duplicate_instance_id_hard_error() { #[test] fn sandbox_escape_rejected() { let (_tmp, root) = sandbox(); - let src = concat!( - "#[gts_well_known_instance(\n", - " dir_path = \"../../etc\",\n", - " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", - ")]\n", - "pub const FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" + + // Use a unique escape target so we can assert it was NOT created on disk. + let escape_component = format!("gts_escape_{}", root.file_name().unwrap().to_string_lossy()); + let escape_dir = format!("../{escape_component}"); + let src = format!( + concat!( + "#[gts_well_known_instance(\n", + " dir_path = \"{dir}\",\n", + " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", + ")]\n", + "pub const FOO: &str = \"{{\\\"name\\\":\\\"x\\\"}}\";\n" + ), + dir = escape_dir ); - write(&root, "escape.rs", src); + write(&root, "escape.rs", &src); let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); let msg = err.to_string(); @@ -309,6 +316,14 @@ fn sandbox_escape_rejected() { msg.contains("Security error") || msg.contains("sandbox") || msg.contains("'..'"), "Got: {msg}" ); + + // Verify no out-of-sandbox directory was created as a side effect. + let outside = root.parent().unwrap().join(&escape_component); + assert!( + !outside.exists(), + "Sandbox escape created directory: {}", + outside.display() + ); } // ───────────────────────────────────────────────────────────────────────────── diff --git a/gts-macros/README.md b/gts-macros/README.md index 40c99b3..4977cc9 100644 --- a/gts-macros/README.md +++ b/gts-macros/README.md @@ -546,6 +546,7 @@ gts generate-from-rust --source src/ --output out/ --mode instances - The JSON body **must not contain an `"id"` field** — the CLI injects it automatically from the `id` attribute. - Files in `compile_fail/` directories and files with a `// gts:ignore` directive are skipped. - Items behind `#[cfg(...)]` gates (e.g., `#[cfg(test)]`) are still extracted — extraction is lexical, not conditional. +- **Schema conformance is not validated** — the macro validates the `id` format only. It does not validate the instance JSON body against its parent schema at compile time. This is a future enhancement. ### Compile-time validation errors diff --git a/gts-macros/src/lib.rs b/gts-macros/src/lib.rs index 7d849a5..f53a3e4 100644 --- a/gts-macros/src/lib.rs +++ b/gts-macros/src/lib.rs @@ -1980,6 +1980,10 @@ impl Parse for GtsInstanceArgs { /// The macro passes the annotated `const` item through unchanged -- it is purely metadata /// for the CLI extraction step. /// +/// **Note:** This macro validates the `id` *format* only (GTS ID structure). It does **not** +/// validate the instance JSON body against its parent schema at compile time. Schema conformance +/// validation is a future enhancement. +/// /// # Arguments /// /// * `dir_path` - Output directory for the generated instance file (relative to crate root or `--output`) From 92ac8bc3931d4d3f0dacdf22bbf2c40e5c61cf73 Mon Sep 17 00:00:00 2001 From: devjow Date: Tue, 10 Mar 2026 12:43:22 +0000 Subject: [PATCH 09/10] feat: instance schema validation Signed-off-by: devjow --- Cargo.lock | 1 + gts-cli/Cargo.toml | 1 + gts-cli/src/gen_instances/mod.rs | 3 + gts-cli/src/gen_instances/schema_check.rs | 542 ++++++++++++++++++++++ gts-cli/src/gen_instances/writer.rs | 41 +- gts-cli/tests/gen_instances_tests.rs | 252 ++++++++++ gts-macros/README.md | 2 +- gts-macros/src/lib.rs | 3 +- 8 files changed, 828 insertions(+), 17 deletions(-) create mode 100644 gts-cli/src/gen_instances/schema_check.rs diff --git a/Cargo.lock b/Cargo.lock index d515a73..0302f46 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -548,6 +548,7 @@ dependencies = [ "clap", "gts", "gts-id", + "jsonschema", "regex", "serde", "serde_json", diff --git a/gts-cli/Cargo.toml b/gts-cli/Cargo.toml index 1751dc5..01911ba 100644 --- a/gts-cli/Cargo.toml +++ b/gts-cli/Cargo.toml @@ -33,6 +33,7 @@ tracing-subscriber.workspace = true chrono.workspace = true regex.workspace = true walkdir.workspace = true +jsonschema.workspace = true [dev-dependencies] tempfile = "3.8" diff --git a/gts-cli/src/gen_instances/mod.rs b/gts-cli/src/gen_instances/mod.rs index c84d5cc..3e24f1f 100644 --- a/gts-cli/src/gen_instances/mod.rs +++ b/gts-cli/src/gen_instances/mod.rs @@ -1,5 +1,6 @@ pub mod attrs; pub mod parser; +pub mod schema_check; pub mod string_lit; pub mod writer; @@ -27,6 +28,7 @@ use writer::{generate_single_instance, instance_output_path}; /// - Duplicate instance IDs are detected (hard error, both locations reported) /// - Duplicate output paths are detected /// - Any output path escapes the sandbox boundary +/// - Any instance fails schema validation (when schemas are present on disk) /// - File I/O fails pub fn generate_instances_from_rust( source: &str, @@ -73,6 +75,7 @@ pub fn generate_instances_from_rust( check_duplicate_ids(&all_instances)?; check_duplicate_output_paths(&all_instances, output, &sandbox_root)?; + schema_check::validate_instances_against_schemas(&all_instances, &sandbox_root)?; let instances_generated = emit_instances(&all_instances, output, &sandbox_root)?; diff --git a/gts-cli/src/gen_instances/schema_check.rs b/gts-cli/src/gen_instances/schema_check.rs new file mode 100644 index 0000000..052bb77 --- /dev/null +++ b/gts-cli/src/gen_instances/schema_check.rs @@ -0,0 +1,542 @@ +use anyhow::{Result, bail}; +use serde_json::Value; +use std::collections::HashMap; +use std::fs; +use std::path::Path; +use std::sync::Arc; +use walkdir::WalkDir; + +use super::parser::ParsedInstance; +use super::writer::build_instance_value; + +/// GTS URI prefix used in schema `$id` fields. +const GTS_URI_PREFIX: &str = "gts://"; + +/// Registry of discovered GTS schemas, keyed by `$id` URI (e.g. `gts://gts.x.core.events.topic.v1~`). +/// +/// Implements `jsonschema::Retrieve` so the `jsonschema` crate can resolve `$ref` URIs +/// pointing to other GTS schemas during validation of inherited (allOf) schemas. +#[derive(Clone)] +struct SchemaRegistry { + schemas: Arc>, +} + +impl SchemaRegistry { + /// Walk `root` recursively for `*.schema.json` files and index by their `$id` field. + fn discover(root: &Path) -> Self { + let mut schemas = HashMap::new(); + + for entry in WalkDir::new(root).follow_links(true).max_depth(64) { + let entry = match entry { + Ok(e) => e, + Err(e) => { + eprintln!("warning: skipping unreadable path during schema discovery: {e}"); + continue; + } + }; + let path = entry.path(); + if path.extension().and_then(|s| s.to_str()) != Some("json") { + continue; + } + let name = path + .file_name() + .and_then(|s| s.to_str()) + .unwrap_or_default(); + if !name.ends_with(".schema.json") { + continue; + } + + let content = match fs::read_to_string(path) { + Ok(c) => c, + Err(e) => { + eprintln!( + "warning: skipping unreadable schema file {}: {e}", + path.display() + ); + continue; + } + }; + let value = match serde_json::from_str::(&content) { + Ok(v) => v, + Err(e) => { + eprintln!( + "warning: skipping malformed schema file {}: {e}", + path.display() + ); + continue; + } + }; + if let Some(id) = value.get("$id").and_then(Value::as_str) { + schemas.insert(id.to_owned(), value); + } else { + eprintln!( + "warning: schema file {} has no '$id' field -- skipping", + path.display() + ); + } + } + + Self { + schemas: Arc::new(schemas), + } + } + + /// Look up a schema by its full `$id` URI. + fn get(&self, id_uri: &str) -> Option<&Value> { + self.schemas.get(id_uri) + } + + fn is_empty(&self) -> bool { + self.schemas.is_empty() + } +} + +impl jsonschema::Retrieve for SchemaRegistry { + fn retrieve( + &self, + uri: &jsonschema::Uri, + ) -> std::result::Result> { + let uri_str = uri.as_str(); + + if !uri_str.starts_with(GTS_URI_PREFIX) { + return Err(format!("Unknown URI scheme: {uri_str}").into()); + } + + self.schemas + .get(uri_str) + .cloned() + .ok_or_else(|| format!("Schema not found: {uri_str}").into()) + } +} + +/// Cached result of compiling a JSON Schema validator for a given schema URI. +enum ValidatorCacheEntry { + /// Successfully compiled validator, ready for reuse. + Valid(jsonschema::Validator), + /// Schema URI was not found in the registry. + NotFound, + /// Schema was found but failed to compile. + CompileError(String), +} + +/// Validate all parsed instances against their parent GTS schemas. +/// +/// Schema discovery walks `sandbox_root` for `*.schema.json` files. +/// +/// - If no schemas are found on disk the check is silently skipped (supports `--mode instances`). +/// - If the schema for a specific instance is not found a warning is printed and that instance is skipped. +/// - If validation fails for any instance, **all** errors are collected and returned as a single hard error. +/// +/// # Errors +/// Returns an error if one or more instances fail schema validation. +pub fn validate_instances_against_schemas( + instances: &[ParsedInstance], + sandbox_root: &Path, +) -> Result<()> { + let registry = SchemaRegistry::discover(sandbox_root); + + if registry.is_empty() { + if !instances.is_empty() { + println!( + " Schema validation: skipped (no *.schema.json files found in {})", + sandbox_root.display() + ); + } + return Ok(()); + } + + // Cache compiled validators per schema URI to avoid re-compiling (and avoid + // cloning the registry) for every instance that shares the same schema. + let mut cache: HashMap = HashMap::new(); + let mut errors: Vec = Vec::new(); + + for inst in instances { + let schema_uri = format!("{GTS_URI_PREFIX}{}", inst.attrs.schema_id); + + // Track whether this is the first time we see this schema URI so we + // can emit warnings only once per missing/broken schema. + let is_new = !cache.contains_key(&schema_uri); + + // Build (or retrieve cached) validator for this schema. + let entry = cache.entry(schema_uri.clone()).or_insert_with(|| { + let Some(schema) = registry.get(&schema_uri) else { + return ValidatorCacheEntry::NotFound; + }; + match jsonschema::options() + .with_retriever(registry.clone()) + .build(schema) + { + Ok(v) => ValidatorCacheEntry::Valid(v), + Err(e) => ValidatorCacheEntry::CompileError(e.to_string()), + } + }); + + let validator = match entry { + ValidatorCacheEntry::Valid(v) => v, + ValidatorCacheEntry::NotFound => { + if is_new { + eprintln!( + "warning: schema '{}' not found on disk -- skipping validation", + inst.attrs.schema_id + ); + } + continue; + } + ValidatorCacheEntry::CompileError(reason) => { + errors.push(format!( + "{}:{}: Failed to compile schema '{}' for instance '{}': {}", + inst.source_file, inst.line, inst.attrs.schema_id, inst.attrs.id, reason + )); + continue; + } + }; + + // Build the complete instance JSON with injected "id". + let complete_instance = match build_instance_value(inst) { + Ok(v) => v, + Err(e) => { + errors.push(format!( + "{}:{}: Failed to build instance JSON for '{}': {}", + inst.source_file, inst.line, inst.attrs.id, e + )); + continue; + } + }; + + // Collect all validation errors for this instance. + let validation_errors: Vec = validator + .iter_errors(&complete_instance) + .map(|e| { + let path = e.instance_path().to_string(); + if path.is_empty() { + format!(" - {e}") + } else { + format!(" - {path}: {e}") + } + }) + .collect(); + + if !validation_errors.is_empty() { + errors.push(format!( + "{}:{}: Instance '{}' does not conform to schema '{}':\n{}", + inst.source_file, + inst.line, + inst.attrs.id, + inst.attrs.schema_id, + validation_errors.join("\n") + )); + } + } + + if !errors.is_empty() { + errors.sort(); + for err in &errors { + eprintln!("error: {err}"); + } + bail!( + "Instance generation failed: {} schema validation error(s)", + errors.len() + ); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::gen_instances::attrs::InstanceAttrs; + use tempfile::TempDir; + + fn make_inst(dir_path: &str, id: &str, json_body: &str, source_file: &str) -> ParsedInstance { + let tilde_pos = id.find('~').unwrap(); + ParsedInstance { + attrs: InstanceAttrs { + dir_path: dir_path.to_owned(), + id: id.to_owned(), + schema_id: id[..=tilde_pos].to_owned(), + instance_segment: id[tilde_pos + 1..].to_owned(), + }, + json_body: json_body.to_owned(), + source_file: source_file.to_owned(), + line: 1, + } + } + + fn write_schema(dir: &Path, schema_id: &str, schema: &Value) { + let name = format!("{schema_id}.schema.json"); + fs::create_dir_all(dir).unwrap(); + fs::write( + dir.join(name), + serde_json::to_string_pretty(schema).unwrap(), + ) + .unwrap(); + } + + fn base_schema(schema_id: &str, properties: &[&str]) -> Value { + let mut props = serde_json::Map::new(); + // Always include "id" property (GtsInstanceId) + props.insert( + "id".to_owned(), + serde_json::json!({ "type": "string", "format": "gts-instance-id" }), + ); + for p in properties { + props.insert((*p).to_owned(), serde_json::json!({ "type": "string" })); + } + let mut required: Vec<&str> = vec!["id"]; + required.extend_from_slice(properties); + required.sort_unstable(); + serde_json::json!({ + "$id": format!("{GTS_URI_PREFIX}{schema_id}"), + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": props, + "required": required + }) + } + + #[test] + fn valid_instance_passes() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().canonicalize().unwrap(); + + let schema_dir = root.join("schemas"); + let schema = base_schema("gts.x.test.v1~", &["name"]); + write_schema(&schema_dir, "gts.x.test.v1~", &schema); + + let inst = make_inst( + "instances", + "gts.x.test.v1~x.foo.v1", + r#"{"name": "foo"}"#, + root.join("src.rs").to_str().unwrap(), + ); + + let result = validate_instances_against_schemas(&[inst], &root); + assert!(result.is_ok(), "{result:?}"); + } + + #[test] + fn missing_required_field_fails() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().canonicalize().unwrap(); + + let schema_dir = root.join("schemas"); + let schema = base_schema("gts.x.test.v1~", &["name", "vendor"]); + write_schema(&schema_dir, "gts.x.test.v1~", &schema); + + // Instance missing "vendor" + let inst = make_inst( + "instances", + "gts.x.test.v1~x.foo.v1", + r#"{"name": "foo"}"#, + root.join("src.rs").to_str().unwrap(), + ); + + let result = validate_instances_against_schemas(&[inst], &root); + assert!(result.is_err()); + let msg = result.unwrap_err().to_string(); + assert!(msg.contains("schema validation error"), "Got: {msg}"); + } + + #[test] + fn extra_field_with_additional_properties_false_fails() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().canonicalize().unwrap(); + + let schema_dir = root.join("schemas"); + let schema = base_schema("gts.x.test.v1~", &["name"]); + write_schema(&schema_dir, "gts.x.test.v1~", &schema); + + // Instance has extra field "extra" + let inst = make_inst( + "instances", + "gts.x.test.v1~x.foo.v1", + r#"{"name": "foo", "extra": "bar"}"#, + root.join("src.rs").to_str().unwrap(), + ); + + let result = validate_instances_against_schemas(&[inst], &root); + assert!(result.is_err()); + let msg = result.unwrap_err().to_string(); + assert!(msg.contains("schema validation error"), "Got: {msg}"); + } + + #[test] + fn missing_schema_warns_not_errors() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().canonicalize().unwrap(); + + // Write a schema for a DIFFERENT schema_id so registry is non-empty + let schema_dir = root.join("schemas"); + let schema = base_schema("gts.x.other.v1~", &["name"]); + write_schema(&schema_dir, "gts.x.other.v1~", &schema); + + let inst = make_inst( + "instances", + "gts.x.test.v1~x.foo.v1", + r#"{"name": "foo"}"#, + root.join("src.rs").to_str().unwrap(), + ); + + // Should warn but NOT error + let result = validate_instances_against_schemas(&[inst], &root); + assert!(result.is_ok(), "{result:?}"); + } + + #[test] + fn no_schemas_on_disk_skips_silently() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().canonicalize().unwrap(); + + let inst = make_inst( + "instances", + "gts.x.test.v1~x.foo.v1", + r#"{"name": "foo"}"#, + root.join("src.rs").to_str().unwrap(), + ); + + let result = validate_instances_against_schemas(&[inst], &root); + assert!(result.is_ok(), "{result:?}"); + } + + /// Build a child schema that uses `allOf` + `$ref` to inherit from a parent. + fn child_schema( + schema_id: &str, + parent_schema_id: &str, + own_properties: &[(&str, &str)], + ) -> Value { + let mut own_props = serde_json::Map::new(); + let mut required = Vec::new(); + for (name, ty) in own_properties { + own_props.insert((*name).to_owned(), serde_json::json!({ "type": *ty })); + required.push(*name); + } + required.sort_unstable(); + serde_json::json!({ + "$id": format!("{GTS_URI_PREFIX}{schema_id}"), + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "allOf": [ + { "$ref": format!("{GTS_URI_PREFIX}{parent_schema_id}") }, + { + "type": "object", + "properties": own_props, + "required": required + } + ] + }) + } + + /// Build a base schema **without** `additionalProperties: false`. + /// Parent schemas used for `allOf` inheritance must be open so that child + /// properties are not rejected by the parent's constraint. + fn base_schema_open(schema_id: &str, properties: &[&str]) -> Value { + let mut props = serde_json::Map::new(); + props.insert( + "id".to_owned(), + serde_json::json!({ "type": "string", "format": "gts-instance-id" }), + ); + for p in properties { + props.insert((*p).to_owned(), serde_json::json!({ "type": "string" })); + } + let mut required: Vec<&str> = vec!["id"]; + required.extend_from_slice(properties); + required.sort_unstable(); + serde_json::json!({ + "$id": format!("{GTS_URI_PREFIX}{schema_id}"), + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": props, + "required": required + }) + } + + #[test] + fn allof_ref_inheritance_valid_passes() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().canonicalize().unwrap(); + let schema_dir = root.join("schemas"); + + // Parent schema (open — no additionalProperties: false) + let parent = base_schema_open("gts.x.test.v1~", &["name"]); + write_schema(&schema_dir, "gts.x.test.v1~", &parent); + + // Child schema: inherits parent via allOf + $ref, adds own "vendor" + let child = child_schema( + "gts.x.test.v1~x.child.v1~", + "gts.x.test.v1~", + &[("vendor", "string")], + ); + write_schema(&schema_dir, "gts.x.test.v1~x.child.v1~", &child); + + // Instance satisfies both parent ("id", "name") and child ("vendor") + let inst = make_inst( + "instances", + "gts.x.test.v1~x.child.v1~x.foo.v1", + r#"{"name": "foo", "vendor": "acme"}"#, + root.join("src.rs").to_str().unwrap(), + ); + + let result = validate_instances_against_schemas(&[inst], &root); + assert!(result.is_ok(), "{result:?}"); + } + + #[test] + fn allof_ref_inheritance_missing_parent_field_fails() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().canonicalize().unwrap(); + let schema_dir = root.join("schemas"); + + // Parent schema (open) + let parent = base_schema_open("gts.x.test.v1~", &["name"]); + write_schema(&schema_dir, "gts.x.test.v1~", &parent); + + // Child schema: inherits parent, adds own "vendor" + let child = child_schema( + "gts.x.test.v1~x.child.v1~", + "gts.x.test.v1~", + &[("vendor", "string")], + ); + write_schema(&schema_dir, "gts.x.test.v1~x.child.v1~", &child); + + // Instance provides "vendor" but missing parent-required "name" + let inst = make_inst( + "instances", + "gts.x.test.v1~x.child.v1~x.foo.v1", + r#"{"vendor": "acme"}"#, + root.join("src.rs").to_str().unwrap(), + ); + + let result = validate_instances_against_schemas(&[inst], &root); + assert!(result.is_err()); + let msg = result.unwrap_err().to_string(); + assert!(msg.contains("schema validation error"), "Got: {msg}"); + } + + #[test] + fn wrong_type_fails() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path().canonicalize().unwrap(); + + let schema_dir = root.join("schemas"); + // Schema requires "count" to be integer + let mut schema = base_schema("gts.x.test.v1~", &[]); + schema["properties"]["count"] = serde_json::json!({ "type": "integer" }); + schema["required"] = serde_json::json!(["count", "id"]); + write_schema(&schema_dir, "gts.x.test.v1~", &schema); + + // Instance provides "count" as a string + let inst = make_inst( + "instances", + "gts.x.test.v1~x.foo.v1", + r#"{"count": "not-a-number"}"#, + root.join("src.rs").to_str().unwrap(), + ); + + let result = validate_instances_against_schemas(&[inst], &root); + assert!(result.is_err()); + let msg = result.unwrap_err().to_string(); + assert!(msg.contains("schema validation error"), "Got: {msg}"); + } +} diff --git a/gts-cli/src/gen_instances/writer.rs b/gts-cli/src/gen_instances/writer.rs index 5100fa8..86e4800 100644 --- a/gts-cli/src/gen_instances/writer.rs +++ b/gts-cli/src/gen_instances/writer.rs @@ -68,21 +68,15 @@ pub fn generate_single_instance( } // Build JSON with injected "id" field - let mut obj: serde_json::Map = serde_json::from_str(&inst.json_body) - .map_err(|e| { - anyhow::anyhow!( - "{}:{}: Failed to parse JSON body for instance '{}': {}", - inst.source_file, - inst.line, - inst.attrs.id, - e - ) - })?; - obj.insert( - "id".to_owned(), - serde_json::Value::String(inst.attrs.id.clone()), - ); - let output_value = serde_json::Value::Object(obj); + let output_value = build_instance_value(inst).map_err(|e| { + anyhow::anyhow!( + "{}:{}: Failed to parse JSON body for instance '{}': {}", + inst.source_file, + inst.line, + inst.attrs.id, + e + ) + })?; // Create parent directories only after sandbox validation passes if let Some(parent) = raw_output_path.parent() { @@ -96,6 +90,23 @@ pub fn generate_single_instance( Ok(raw_output_path.display().to_string()) } +/// Parse the instance JSON body and inject the `"id"` field. +/// +/// This is the single source of truth for building the complete instance JSON value. +/// Used by both `generate_single_instance` (file emission) and `schema_check` (validation). +/// +/// # Errors +/// Returns an error if the JSON body cannot be parsed as an object. +pub fn build_instance_value(inst: &ParsedInstance) -> serde_json::Result { + let mut obj: serde_json::Map = + serde_json::from_str(&inst.json_body)?; + obj.insert( + "id".to_owned(), + serde_json::Value::String(inst.attrs.id.clone()), + ); + Ok(serde_json::Value::Object(obj)) +} + #[cfg(test)] mod tests { use super::*; diff --git a/gts-cli/tests/gen_instances_tests.rs b/gts-cli/tests/gen_instances_tests.rs index 577491e..b0883b7 100644 --- a/gts-cli/tests/gen_instances_tests.rs +++ b/gts-cli/tests/gen_instances_tests.rs @@ -780,3 +780,255 @@ fn compile_fail_dir_is_auto_skipped() { run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); } + +// ───────────────────────────────────────────────────────────────────────────── +// Schema validation – instance conforms to schema +// ───────────────────────────────────────────────────────────────────────────── + +/// Helper: write a base GTS schema into `{root}/schemas/{schema_id}.schema.json`. +fn write_schema(root: &Path, schema_id: &str, extra_props: &[(&str, &str)]) { + let mut props = serde_json::Map::new(); + props.insert( + "id".to_owned(), + serde_json::json!({ "type": "string", "format": "gts-instance-id" }), + ); + let mut required = vec!["id".to_owned()]; + for (name, ty) in extra_props { + props.insert((*name).to_owned(), serde_json::json!({ "type": *ty })); + required.push((*name).to_owned()); + } + required.sort(); + let schema = serde_json::json!({ + "$id": format!("gts://{schema_id}"), + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": props, + "required": required + }); + let dir = root.join("schemas"); + fs::create_dir_all(&dir).unwrap(); + fs::write( + dir.join(format!("{schema_id}.schema.json")), + serde_json::to_string_pretty(&schema).unwrap(), + ) + .unwrap(); +} + +#[test] +fn schema_validation_valid_instance_passes() { + let (_tmp, root) = sandbox(); + + write_schema( + &root, + "gts.x.core.events.topic.v1~", + &[("name", "string"), ("partitions", "integer")], + ); + + let src = instance_src( + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", + r#""{\"name\":\"orders\",\"partitions\":16}""#, + ); + write(&root, "inst.rs", &src); + + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); +} + +#[test] +fn schema_validation_missing_required_field_fails() { + let (_tmp, root) = sandbox(); + + // Schema requires "name" and "vendor" + write_schema( + &root, + "gts.x.core.events.topic.v1~", + &[("name", "string"), ("vendor", "string")], + ); + + // Instance provides "name" but NOT "vendor" + let src = instance_src( + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", + r#""{\"name\":\"orders\"}""#, + ); + write(&root, "inst.rs", &src); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("schema validation error"), + "Expected schema validation error, got: {msg}" + ); +} + +#[test] +fn schema_validation_extra_field_fails() { + let (_tmp, root) = sandbox(); + + // Schema only allows "name" (plus "id") + write_schema(&root, "gts.x.core.events.topic.v1~", &[("name", "string")]); + + // Instance has "name" + "extra" — violates additionalProperties: false + let src = instance_src( + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", + r#""{\"name\":\"orders\",\"extra\":\"bad\"}""#, + ); + write(&root, "inst.rs", &src); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("schema validation error"), + "Expected schema validation error, got: {msg}" + ); +} + +#[test] +fn schema_validation_wrong_type_fails() { + let (_tmp, root) = sandbox(); + + // Schema requires "count" as integer + write_schema( + &root, + "gts.x.core.events.topic.v1~", + &[("count", "integer")], + ); + + // Instance provides "count" as a string + let src = instance_src( + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", + r#""{\"count\":\"not-a-number\"}""#, + ); + write(&root, "inst.rs", &src); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("schema validation error"), + "Expected schema validation error, got: {msg}" + ); +} + +#[test] +fn schema_validation_allof_ref_inheritance_passes() { + let (_tmp, root) = sandbox(); + let dir = root.join("schemas"); + fs::create_dir_all(&dir).unwrap(); + + // Parent schema (open — no additionalProperties: false, required for allOf inheritance) + let parent = serde_json::json!({ + "$id": "gts://gts.x.core.events.topic.v1~", + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { "type": "string", "format": "gts-instance-id" }, + "name": { "type": "string" } + }, + "required": ["id", "name"] + }); + fs::write( + dir.join("gts.x.core.events.topic.v1~.schema.json"), + serde_json::to_string_pretty(&parent).unwrap(), + ) + .unwrap(); + + // Child schema: inherits parent via allOf + $ref, adds "vendor" + let child = serde_json::json!({ + "$id": "gts://gts.x.core.events.topic.v1~x.core.audit.event.v1~", + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "allOf": [ + { "$ref": "gts://gts.x.core.events.topic.v1~" }, + { + "type": "object", + "properties": { "vendor": { "type": "string" } }, + "required": ["vendor"] + } + ] + }); + fs::write( + dir.join("gts.x.core.events.topic.v1~x.core.audit.event.v1~.schema.json"), + serde_json::to_string_pretty(&child).unwrap(), + ) + .unwrap(); + + // Instance satisfies both parent ("name") and child ("vendor") + let src = instance_src( + "gts.x.core.events.topic.v1~x.core.audit.event.v1~x.commerce._.orders.v1.0", + r#""{\"name\":\"orders\",\"vendor\":\"acme\"}""#, + ); + write(&root, "inst.rs", &src); + + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); +} + +#[test] +fn schema_validation_allof_ref_missing_parent_field_fails() { + let (_tmp, root) = sandbox(); + let dir = root.join("schemas"); + fs::create_dir_all(&dir).unwrap(); + + // Parent schema (open — no additionalProperties: false) + let parent = serde_json::json!({ + "$id": "gts://gts.x.core.events.topic.v1~", + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { "type": "string", "format": "gts-instance-id" }, + "name": { "type": "string" } + }, + "required": ["id", "name"] + }); + fs::write( + dir.join("gts.x.core.events.topic.v1~.schema.json"), + serde_json::to_string_pretty(&parent).unwrap(), + ) + .unwrap(); + + // Child schema: inherits parent via allOf + $ref, adds "vendor" + let child = serde_json::json!({ + "$id": "gts://gts.x.core.events.topic.v1~x.core.audit.event.v1~", + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "allOf": [ + { "$ref": "gts://gts.x.core.events.topic.v1~" }, + { + "type": "object", + "properties": { "vendor": { "type": "string" } }, + "required": ["vendor"] + } + ] + }); + fs::write( + dir.join("gts.x.core.events.topic.v1~x.core.audit.event.v1~.schema.json"), + serde_json::to_string_pretty(&child).unwrap(), + ) + .unwrap(); + + // Instance has "vendor" but missing parent-required "name" + let src = instance_src( + "gts.x.core.events.topic.v1~x.core.audit.event.v1~x.commerce._.orders.v1.0", + r#""{\"vendor\":\"acme\"}""#, + ); + write(&root, "inst.rs", &src); + + let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("schema validation error"), + "Expected schema validation error, got: {msg}" + ); +} + +#[test] +fn schema_validation_no_schema_on_disk_passes() { + let (_tmp, root) = sandbox(); + + // No schema written — validation should be skipped silently + let src = instance_src( + "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", + r#""{\"name\":\"orders\"}""#, + ); + write(&root, "inst.rs", &src); + + run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); +} diff --git a/gts-macros/README.md b/gts-macros/README.md index 4977cc9..42e132f 100644 --- a/gts-macros/README.md +++ b/gts-macros/README.md @@ -546,7 +546,7 @@ gts generate-from-rust --source src/ --output out/ --mode instances - The JSON body **must not contain an `"id"` field** — the CLI injects it automatically from the `id` attribute. - Files in `compile_fail/` directories and files with a `// gts:ignore` directive are skipped. - Items behind `#[cfg(...)]` gates (e.g., `#[cfg(test)]`) are still extracted — extraction is lexical, not conditional. -- **Schema conformance is not validated** — the macro validates the `id` format only. It does not validate the instance JSON body against its parent schema at compile time. This is a future enhancement. +- **Schema conformance is validated at CLI time, not compile time** — the macro validates `id` format only. The CLI (`gts generate-from-rust`) validates instance JSON bodies against their parent schemas when schema files are present on disk (e.g. after `--mode all` or a prior `--mode schemas` run). If the schema file is not found, validation is skipped with a warning. ### Compile-time validation errors diff --git a/gts-macros/src/lib.rs b/gts-macros/src/lib.rs index f53a3e4..5d0dcd7 100644 --- a/gts-macros/src/lib.rs +++ b/gts-macros/src/lib.rs @@ -1982,7 +1982,8 @@ impl Parse for GtsInstanceArgs { /// /// **Note:** This macro validates the `id` *format* only (GTS ID structure). It does **not** /// validate the instance JSON body against its parent schema at compile time. Schema conformance -/// validation is a future enhancement. +/// validation is performed by the CLI (`gts generate-from-rust`) when schema files are present +/// on disk (e.g. after `--mode all` or a prior `--mode schemas` run). /// /// # Arguments /// From 516d58f3cf49ea5dbb34752ef25f3ab652860812 Mon Sep 17 00:00:00 2001 From: devjow Date: Thu, 12 Mar 2026 15:08:40 +0000 Subject: [PATCH 10/10] refactor: instance schema validation compile time Signed-off-by: devjow --- Cargo.lock | 3 + Cargo.toml | 5 + gts-cli/Cargo.toml | 3 + gts-cli/src/gen_instances/mod.rs | 28 +- gts-cli/src/gen_instances/parser.rs | 378 ++++++++++------- gts-cli/src/gen_instances/string_lit.rs | 132 ------ gts-cli/src/gen_instances/struct_expr.rs | 398 ++++++++++++++++++ gts-cli/tests/gen_instances_tests.rs | 266 ++++-------- gts-macros/README.md | 103 +++-- gts-macros/src/lib.rs | 318 ++++++++++---- .../compile_fail/instance_const_wrong_type.rs | 2 +- .../instance_const_wrong_type.stderr | 13 +- .../compile_fail/instance_missing_dir_path.rs | 2 +- .../instance_missing_instance_segment.rs | 2 +- .../instance_missing_schema_id.rs | 2 +- .../compile_fail/instance_on_non_const.rs | 2 +- .../compile_fail/instance_on_non_const.stderr | 2 +- .../instance_schema_id_no_tilde.rs | 2 +- .../instance_segment_bare_wildcard.rs | 2 +- .../instance_segment_ends_with_tilde.rs | 2 +- gts/src/gts.rs | 23 + 21 files changed, 1093 insertions(+), 595 deletions(-) delete mode 100644 gts-cli/src/gen_instances/string_lit.rs create mode 100644 gts-cli/src/gen_instances/struct_expr.rs diff --git a/Cargo.lock b/Cargo.lock index 0302f46..352020f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -549,9 +549,12 @@ dependencies = [ "gts", "gts-id", "jsonschema", + "proc-macro2", + "quote", "regex", "serde", "serde_json", + "syn", "tempfile", "tokio", "tower", diff --git a/Cargo.toml b/Cargo.toml index 66d8fbc..ac17c6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -188,5 +188,10 @@ glob = "0.3" # CLI and terminal output colored = "3.0" +# Rust syntax parsing (used by gts-cli for struct expression extraction) +syn = { version = "2.0", features = ["full", "extra-traits"] } +quote = "1.0" +proc-macro2 = "1.0" + # Format parsing serde-saphyr = "0.0.10" diff --git a/gts-cli/Cargo.toml b/gts-cli/Cargo.toml index 01911ba..1d9be3e 100644 --- a/gts-cli/Cargo.toml +++ b/gts-cli/Cargo.toml @@ -34,6 +34,9 @@ chrono.workspace = true regex.workspace = true walkdir.workspace = true jsonschema.workspace = true +syn.workspace = true +quote.workspace = true +proc-macro2.workspace = true [dev-dependencies] tempfile = "3.8" diff --git a/gts-cli/src/gen_instances/mod.rs b/gts-cli/src/gen_instances/mod.rs index 3e24f1f..54c2cf5 100644 --- a/gts-cli/src/gen_instances/mod.rs +++ b/gts-cli/src/gen_instances/mod.rs @@ -1,7 +1,7 @@ pub mod attrs; pub mod parser; pub mod schema_check; -pub mod string_lit; +pub mod struct_expr; pub mod writer; use anyhow::{Result, bail}; @@ -179,7 +179,9 @@ fn print_summary(files_scanned: usize, files_skipped: usize, instances_generated println!(" Files skipped: {files_skipped}"); println!(" Instances generated: {instances_generated}"); if instances_generated == 0 { - println!("\n No instances found. Annotate consts with `#[gts_well_known_instance(...)]`."); + println!( + "\n No instances found. Annotate fn items with `#[gts_well_known_instance(...)]`." + ); } } @@ -193,16 +195,18 @@ mod tests { fs::write(dir.join(name), content).unwrap(); } - fn valid_src(id: &str, json_body: &str) -> String { + fn valid_src(id: &str, struct_body: &str) -> String { format!( concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", " id = \"{}\"\n", ")]\n", - "pub const FOO: &str = {};\n" + "fn get_instance_item_v1() -> MyStruct {{\n", + " {}\n", + "}}\n" ), - id, json_body + id, struct_body ) } @@ -216,7 +220,7 @@ mod tests { "module.rs", &valid_src( "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", - r#""{\"name\": \"orders\", \"partitions\": 16}""#, + r#"MyStruct { name: String::from("orders"), partitions: 16 }"#, ), ); @@ -256,12 +260,16 @@ mod tests { " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "const A: &str = \"{\\\"name\\\": \\\"a\\\"}\";\n", + "fn get_instance_orders_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"a\") }\n", + "}\n", "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "const B: &str = \"{\\\"name\\\": \\\"b\\\"}\";\n" + "fn get_instance_orders2_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"b\") }\n", + "}\n" ); write_src(&root, "dup.rs", dup_src); @@ -295,7 +303,9 @@ mod tests { " dir_path = \"instances\",\n", " id = \"bad-no-tilde\"\n", ")]\n", - "const X: &str = \"{}\";\n" + "fn get_instance_bad_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"x\") }\n", + "}\n" ), ); diff --git a/gts-cli/src/gen_instances/parser.rs b/gts-cli/src/gen_instances/parser.rs index b571fbf..4e6d6a2 100644 --- a/gts-cli/src/gen_instances/parser.rs +++ b/gts-cli/src/gen_instances/parser.rs @@ -3,14 +3,14 @@ use regex::Regex; use std::path::Path; use super::attrs::{InstanceAttrs, parse_instance_attrs}; -use super::string_lit::decode_string_literal; +use super::struct_expr::struct_expr_to_json; /// A parsed and validated instance annotation, ready for file generation. #[derive(Debug)] #[allow(dead_code)] pub struct ParsedInstance { pub attrs: InstanceAttrs, - /// Raw JSON body string (as written in the const value, decoded from the literal). + /// JSON body extracted from the function's struct expression (without "id" field). pub json_body: String, /// Absolute path of the source file containing this annotation. pub source_file: String, @@ -18,7 +18,7 @@ pub struct ParsedInstance { pub line: usize, } -/// Extract all `#[gts_well_known_instance]`-annotated consts from a source text. +/// Extract all `#[gts_well_known_instance]`-annotated functions from a source text. /// /// Three outcomes per the extraction contract: /// 1. No annotation token found (preflight negative) → `Ok(vec![])` (fast path, no errors) @@ -27,6 +27,9 @@ pub struct ParsedInstance { /// /// # Errors /// Returns an error if an annotation is found but cannot be parsed or validated. +/// +/// # Panics +/// Panics if the annotation regex produces a match with no capture group (should never happen). pub fn extract_instances_from_source( content: &str, source_file: &Path, @@ -46,18 +49,26 @@ pub fn extract_instances_from_source( let mut instances = Vec::new(); for cap in annotation_re.captures_iter(&stripped) { - let full_start = cap.get(0).map_or(0, |m| m.start()); + let Some(full_match) = cap.get(0) else { + continue; + }; + let full_start = full_match.start(); + let match_end = full_match.end(); let line = byte_offset_to_line(full_start, &line_offsets); let attr_body = &cap[1]; let attrs = parse_instance_attrs(attr_body, &source_file_str, line)?; - let raw_literal = &cap[2]; - let json_body = decode_string_literal(raw_literal).map_err(|e| { - anyhow::anyhow!("{source_file_str}:{line}: Failed to decode string literal: {e}") + // The regex matches up to (but not including) the opening `{`. + // Use brace-depth counting to extract the function body. + let (_body_end, fn_body) = extract_fn_body_at(&stripped, match_end).ok_or_else(|| { + anyhow::anyhow!( + "{source_file_str}:{line}: Could not find function body for \ + #[gts_well_known_instance] annotation. Expected `{{ ... }}` after function signature." + ) })?; - validate_json_body(&json_body, &source_file_str, line)?; + let json_body = extract_json_from_fn_body(fn_body, &source_file_str, line)?; instances.push(ParsedInstance { attrs, @@ -72,59 +83,86 @@ pub fn extract_instances_from_source( // Preflight was positive but neither the main regex nor unsupported-form // checks matched anything — the annotation is in a form we don't recognise - // (e.g. applied to a fn, enum, or a completely garbled attribute body). + // (e.g. applied to a const, enum, or a completely garbled attribute body). // This is a hard error per the extraction contract. if instances.is_empty() { let needle_line = find_needle_line(content, &line_offsets); bail!( "{source_file_str}:{needle_line}: `#[gts_well_known_instance]` annotation found \ - but could not be parsed. The annotation must be on a `const NAME: &str = ;` \ - item. Check for typos, unsupported item kinds, or missing required attributes." + but could not be parsed. The annotation must be on a \ + `fn get_instance_name_v1() -> SchemaType {{ ... }}` item. \ + Check for typos, unsupported item kinds, or missing required attributes." ); } Ok(instances) } -/// Validate that the decoded JSON body is a non-empty object without an `"id"` field. -fn validate_json_body(json_body: &str, source_file: &str, line: usize) -> Result<()> { - let json_val: serde_json::Value = serde_json::from_str(json_body).map_err(|e| { +/// Extract JSON from a function body by parsing the struct expression. +/// +/// The function body should contain a single struct expression (the last/only +/// expression in the block). This is parsed with `syn` and converted to JSON +/// via the `struct_expr` module. +fn extract_json_from_fn_body(fn_body: &str, source_file: &str, line: usize) -> Result { + // Wrap the body in braces if it isn't already (the regex captures the content inside {}) + let block_src = format!("{{ {fn_body} }}"); + let block: syn::Block = syn::parse_str(&block_src).map_err(|e| { + anyhow::anyhow!("{source_file}:{line}: Failed to parse function body as Rust code: {e}") + })?; + + // Find the struct expression — it should be the last expression in the block + // (either the trailing expression or the last statement that is an expression) + let struct_expr = extract_struct_expr_from_block(&block).ok_or_else(|| { anyhow::anyhow!( - "{}:{}: Malformed JSON in instance body: {} (at JSON line {}, col {})", - source_file, - line, - e, - e.line(), - e.column() + "{source_file}:{line}: Function body must contain a struct expression \ + (e.g., `MyStruct {{ field: value }}`). Could not find a struct expression." ) })?; - if !json_val.is_object() { - bail!( - "{}:{}: Instance JSON body must be a JSON object {{...}}, got {}. \ - Arrays, strings, numbers, booleans, and null are not valid instance bodies.", - source_file, - line, - json_type_name(&json_val) - ); + let json_value = struct_expr_to_json(struct_expr).map_err(|e| { + anyhow::anyhow!("{source_file}:{line}: Failed to convert struct expression to JSON: {e}") + })?; + + // The JSON should be an object + if !json_value.is_object() { + bail!("{source_file}:{line}: Struct expression did not produce a JSON object"); } - if json_val.get("id").is_some() { - bail!( - "{source_file}:{line}: Instance JSON body must not contain an \"id\" field. \ - The id is automatically injected from the `id` attribute. \ - Remove the \"id\" field from the JSON body." - ); + serde_json::to_string(&json_value).map_err(|e| { + anyhow::anyhow!("{source_file}:{line}: Failed to serialize struct expression to JSON: {e}") + }) +} + +/// Extract the struct expression from a parsed block. +/// +/// Looks for a `syn::ExprStruct` as the trailing expression of the block. +fn extract_struct_expr_from_block(block: &syn::Block) -> Option<&syn::ExprStruct> { + // Check the trailing expression first (block without semicolon on last line) + if let Some(syn::Stmt::Expr(expr, None)) = block.stmts.last() { + return find_struct_expr(expr); } + None +} - Ok(()) +/// Recursively find a struct expression, unwrapping parentheses and other wrappers. +fn find_struct_expr(expr: &syn::Expr) -> Option<&syn::ExprStruct> { + match expr { + syn::Expr::Struct(s) => Some(s), + syn::Expr::Paren(p) => find_struct_expr(&p.expr), + syn::Expr::Group(g) => find_struct_expr(&g.expr), + _ => None, + } } -/// Build the regex matching `#[gts_well_known_instance(...)] const NAME: &str = ;` +/// Build the regex matching `#[gts_well_known_instance(...)] fn name() -> Type { ... }` /// /// Capture groups: /// 1. Attribute body (everything inside the outer parentheses) -/// 2. The string literal token (raw or regular) +/// 2. The function body content (everything inside the outermost `{ }`, extracted +/// by `extract_fn_body_at` after the regex provides the match start position) +/// +/// Note: Because function bodies can contain nested braces, the regex only matches +/// up to the opening `{`. The body is then extracted by brace-depth counting. fn build_annotation_regex() -> Result { let pattern = concat!( // (1) Macro attribute body @@ -134,15 +172,91 @@ fn build_annotation_regex() -> Result { r"\s*", // Optional visibility: pub / pub(crate) / pub(super) / pub(in path) r"(?:pub\s*(?:\([^)]*\)\s*)?)?", - // const NAME: &str = (optional 'static lifetime) - r"const\s+\w+\s*:\s*&\s*(?:'static\s+)?str\s*=\s*", - // (2) String literal: raw r"..." / r#"..."# / r##"..."## (0+ hashes) or regular "..." - "(r#*\"[\\s\\S]*?\"#*|\"(?:[^\"\\\\]|\\\\.)*\")", - r"\s*;" + // fn name() -> ReturnType (with optional generics in return type) + r"fn\s+\w+\s*\(\s*\)\s*->\s*[^{]+", ); Ok(Regex::new(pattern)?) } +/// Extract the function body starting from the opening `{` at or after `start_pos`. +/// +/// Uses brace-depth counting to correctly handle nested braces. +/// Returns the content between the outermost braces (exclusive). +fn extract_fn_body_at(content: &str, start_pos: usize) -> Option<(usize, &str)> { + let bytes = content.as_bytes(); + let len = bytes.len(); + + // Find the opening brace + let mut i = start_pos; + while i < len && bytes[i] != b'{' { + i += 1; + } + if i >= len { + return None; + } + + let body_start = i + 1; // after the opening { + let mut depth = 1; + i += 1; + + while i < len && depth > 0 { + match bytes[i] { + b'{' => depth += 1, + b'}' => depth -= 1, + b'/' if i + 1 < len && bytes[i + 1] == b'/' => { + // Skip line comments + while i < len && bytes[i] != b'\n' { + i += 1; + } + continue; + } + b'/' if i + 1 < len && bytes[i + 1] == b'*' => { + // Skip block comments + i += 2; + while i + 1 < len && !(bytes[i] == b'*' && bytes[i + 1] == b'/') { + i += 1; + } + if i + 1 < len { + i += 2; + } + continue; + } + b'"' => { + // Skip string literals + i += 1; + while i < len { + if bytes[i] == b'\\' { + i += 2; + continue; + } + if bytes[i] == b'"' { + i += 1; + break; + } + i += 1; + } + continue; + } + b'r' if i + 1 < len && (bytes[i + 1] == b'"' || bytes[i + 1] == b'#') => { + // Skip raw string literals + if let Some(after) = try_skip_raw_string(bytes, i) { + i = after; + continue; + } + } + _ => {} + } + i += 1; + } + + if depth == 0 { + let body_end = i - 1; // before the closing } + Some((i, &content[body_start..body_end])) + } else { + None + } +} + /// Token-aware scan: finds `#[gts_well_known_instance` or /// `#[gts_macros::gts_well_known_instance` outside comments and string literals. /// Returns `true` if at least one candidate attribute token is found. @@ -290,47 +404,19 @@ fn try_skip_raw_string(bytes: &[u8], start: usize) -> Option { /// /// NOTE: uses `(?s)` (dotall) flag so the attr body may span multiple lines. fn check_unsupported_forms(content: &str, source_file: &str, line_offsets: &[usize]) -> Result<()> { - // static instead of const - let static_re = Regex::new( - r"(?s)#\[(?:gts_macros::)?gts_well_known_instance\(.*?\)\]\s*(?:#\[[^\]]*\]\s*)*(?:pub\s*(?:\([^)]*\)\s*)?)?static\s", + // Old const &str form (including static) + let const_re = Regex::new( + r"(?s)#\[(?:gts_macros::)?gts_well_known_instance\(.*?\)\]\s*(?:#\[[^\]]*\]\s*)*(?:pub\s*(?:\([^)]*\)\s*)?)?(?:const|static)\s", )?; - if let Some(m) = static_re.find(content) { + if let Some(m) = const_re.find(content) { let line = byte_offset_to_line(m.start(), line_offsets); bail!( - "{source_file}:{line}: `#[gts_well_known_instance]` cannot be applied to `static` items. \ - Use `const NAME: &str = ...` instead." + "{source_file}:{line}: `#[gts_well_known_instance]` no longer supports `const` or `static` items. \ + Use a function returning a typed struct instead:\n\ + \n fn get_instance_name_v1() -> SchemaType {{\n SchemaType {{ id: GtsInstanceId::ID, ... }}\n }}" ); } - // concat!() as value - let concat_re = Regex::new( - r"(?s)#\[(?:gts_macros::)?gts_well_known_instance\(.*?\)\]\s*(?:#\[[^\]]*\]\s*)*(?:pub\s*(?:\([^)]*\)\s*)?)?const\s+\w+\s*:\s*&\s*(?:'static\s+)?str\s*=\s*concat\s*!", - )?; - if let Some(m) = concat_re.find(content) { - let line = byte_offset_to_line(m.start(), line_offsets); - bail!( - "{source_file}:{line}: `concat!()` is not supported as the const value for \ - `#[gts_well_known_instance]`. Use a raw string literal `r#\"...\"#` instead." - ); - } - - // const with wrong type (not &str) — checked last as it's broader - // Note: we use a positive match for the non-&str case to avoid lookahead - let wrong_type_re = Regex::new( - r"(?s)#\[(?:gts_macros::)?gts_well_known_instance\(.*?\)\]\s*(?:#\[[^\]]*\]\s*)*(?:pub\s*(?:\([^)]*\)\s*)?)?const\s+\w+\s*:\s*&\s*(?:'static\s+)?([A-Za-z][A-Za-z0-9_]*)\b", - )?; - if let Some(cap) = wrong_type_re.captures(content) { - let ty = cap.get(1).map_or("", |m| m.as_str()); - if ty != "str" { - let start = cap.get(0).map_or(0, |m| m.start()); - let line = byte_offset_to_line(start, line_offsets); - bail!( - "{source_file}:{line}: `#[gts_well_known_instance]` requires `const NAME: &str`. \ - The annotated const must have type `&str`." - ); - } - } - Ok(()) } @@ -431,31 +517,23 @@ fn find_needle_line(content: &str, line_offsets: &[usize]) -> usize { pos.map_or(1, |p| byte_offset_to_line(p, line_offsets)) } -fn json_type_name(val: &serde_json::Value) -> &'static str { - match val { - serde_json::Value::Null => "null", - serde_json::Value::Bool(_) => "boolean", - serde_json::Value::Number(_) => "number", - serde_json::Value::String(_) => "string", - serde_json::Value::Array(_) => "array", - serde_json::Value::Object(_) => "object", - } -} - #[cfg(test)] mod tests { use super::*; - fn src(body: &str) -> String { + /// Build a source string with a fn-based instance annotation. + fn src(fn_body: &str) -> String { format!( concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "const FOO: &str = {};\n" + "fn get_instance_orders_v1() -> MyStruct {{\n", + " {}\n", + "}}\n" ), - body + fn_body ) } @@ -481,8 +559,6 @@ mod tests { #[test] fn test_preflight_negative_bare_use_statement() { - // `use gts_macros::gts_well_known_instance;` must NOT be a positive — - // it lacks the required `#[` attribute-open prefix. assert!(!preflight_scan( "use gts_macros::gts_well_known_instance;\nconst X: u32 = 1;\n" )); @@ -490,7 +566,6 @@ mod tests { #[test] fn test_preflight_positive_after_static_lifetime() { - // `'static` before the annotation must NOT suppress it (false-negative fix). let src = concat!( "fn foo(x: &'static str) -> u32 { 0 }\n", "#[gts_well_known_instance(x)]\n" @@ -500,7 +575,6 @@ mod tests { #[test] fn test_preflight_positive_after_named_lifetime() { - // `'a` lifetime before the annotation must NOT suppress it. let src = concat!( "fn bar<'a>(x: &'a str) -> &'a str { x }\n", "#[gts_well_known_instance(x)]\n" @@ -510,8 +584,6 @@ mod tests { #[test] fn test_preflight_positive_char_literal_hash() { - // A char literal containing '#' must not be the needle itself. - // But the real annotation after it must still be found. let src = concat!( "fn check(c: char) -> bool { c == '#' }\n", "#[gts_well_known_instance(x)]\n" @@ -520,8 +592,8 @@ mod tests { } #[test] - fn test_extract_regular_string() { - let content = src(r#""{\"name\": \"orders\"}""#); + fn test_extract_simple_struct() { + let content = src("MyStruct { name: String::from(\"orders\") }"); let result = extract_instances_from_source(&content, Path::new("t.rs")).unwrap(); assert_eq!(result.len(), 1); assert_eq!( @@ -530,60 +602,42 @@ mod tests { ); assert_eq!(result[0].attrs.schema_id, "gts.x.core.events.topic.v1~"); assert_eq!(result[0].attrs.instance_segment, "x.commerce._.orders.v1.0"); + // JSON body should contain "name" + let json: serde_json::Value = serde_json::from_str(&result[0].json_body).unwrap(); + assert_eq!(json["name"], "orders"); } #[test] fn test_no_annotation_returns_empty() { - let content = "const FOO: &str = \"hello\";"; + let content = "fn foo() -> u32 { 42 }"; let result = extract_instances_from_source(content, Path::new("t.rs")).unwrap(); assert!(result.is_empty()); } #[test] - fn test_rejects_id_in_body() { - let content = src(r#""{\"id\": \"bad\", \"name\": \"x\"}""#); - let err = extract_instances_from_source(&content, Path::new("t.rs")).unwrap_err(); - assert!(err.to_string().contains("\"id\" field")); - } - - #[test] - fn test_rejects_non_object_json() { - let content = src("\"[1, 2, 3]\""); - let err = extract_instances_from_source(&content, Path::new("t.rs")).unwrap_err(); - assert!(err.to_string().contains("JSON object")); - } - - #[test] - fn test_rejects_malformed_json() { - let content = src(r#""{not valid json}""#); - let err = extract_instances_from_source(&content, Path::new("t.rs")).unwrap_err(); - assert!(err.to_string().contains("Malformed JSON")); - } - - #[test] - fn test_rejects_static_item() { + fn test_rejects_const_form() { let content = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", " id = \"gts.x.foo.v1~x.bar.v1.0\"\n", ")]\n", - "static FOO: &str = \"{}\";\n" + "const FOO: &str = \"{}\";\n" ); let err = extract_instances_from_source(content, Path::new("t.rs")).unwrap_err(); - assert!(err.to_string().contains("static")); + assert!(err.to_string().contains("no longer supports")); } #[test] - fn test_rejects_concat_macro() { + fn test_rejects_static_item() { let content = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", " id = \"gts.x.foo.v1~x.bar.v1.0\"\n", ")]\n", - "const FOO: &str = concat!(\"{\", \"}\");\n" + "static FOO: &str = \"{}\";\n" ); let err = extract_instances_from_source(content, Path::new("t.rs")).unwrap_err(); - assert!(err.to_string().contains("concat!()")); + assert!(err.to_string().contains("no longer supports")); } #[test] @@ -593,12 +647,16 @@ mod tests { " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "const A: &str = \"{\\\"name\\\": \\\"orders\\\"}\";\n", + "fn get_instance_orders_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"orders\") }\n", + "}\n", "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.payments.v1.0\"\n", ")]\n", - "const B: &str = \"{\\\"name\\\": \\\"payments\\\"}\";\n" + "fn get_instance_payments_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"payments\") }\n", + "}\n" ); let result = extract_instances_from_source(content, Path::new("t.rs")).unwrap(); assert_eq!(result.len(), 2); @@ -611,27 +669,59 @@ mod tests { " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "pub const FOO: &str = \"{\\\"name\\\": \\\"orders\\\"}\";\n" + "pub fn get_instance_orders_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"orders\") }\n", + "}\n" ); let result = extract_instances_from_source(content, Path::new("t.rs")).unwrap(); assert_eq!(result.len(), 1); } #[test] - fn test_line_number_reported() { - let content = concat!( - "// line 1\n", - "// line 2\n", - "#[gts_well_known_instance(\n", // line 3 - " dir_path = \"instances\",\n", - " id = \"gts.x.foo.v1~x.bar.v1.0\"\n", - ")]\n", - "const FOO: &str = \"{\\\"id\\\": \\\"bad\\\"}\";\n" + fn test_gts_instance_id_sentinel_skipped() { + let content = src("MyStruct { id: GtsInstanceId::ID, name: String::from(\"test\") }"); + let result = extract_instances_from_source(&content, Path::new("t.rs")).unwrap(); + assert_eq!(result.len(), 1); + let json: serde_json::Value = serde_json::from_str(&result[0].json_body).unwrap(); + assert!( + json.get("id").is_none(), + "GtsInstanceId::ID should be skipped" + ); + assert_eq!(json["name"], "test"); + } + + #[test] + fn test_unit_placeholder_skipped() { + let content = src("MyStruct { name: String::from(\"test\"), properties: () }"); + let result = extract_instances_from_source(&content, Path::new("t.rs")).unwrap(); + assert_eq!(result.len(), 1); + let json: serde_json::Value = serde_json::from_str(&result[0].json_body).unwrap(); + assert_eq!( + json["properties"], + serde_json::json!({}), + "() should produce empty object" ); - let err = extract_instances_from_source(content, Path::new("events.rs")).unwrap_err(); - let msg = err.to_string(); - assert!(msg.contains("events.rs")); - // line 3 is where the annotation starts - assert!(msg.contains(":3:"), "Expected line 3 in: {msg}"); + } + + #[test] + fn test_nested_struct() { + let content = src("Outer { inner: Inner { value: 99 }, name: String::from(\"test\") }"); + let result = extract_instances_from_source(&content, Path::new("t.rs")).unwrap(); + assert_eq!(result.len(), 1); + let json: serde_json::Value = serde_json::from_str(&result[0].json_body).unwrap(); + assert_eq!(json["inner"]["value"], 99); + assert_eq!(json["name"], "test"); + } + + #[test] + fn test_vec_macro_in_struct() { + let content = src("MyStruct { tags: vec![String::from(\"a\"), String::from(\"b\")] }"); + let result = extract_instances_from_source(&content, Path::new("t.rs")).unwrap(); + assert_eq!(result.len(), 1); + let json: serde_json::Value = serde_json::from_str(&result[0].json_body).unwrap(); + let tags = json["tags"].as_array().unwrap(); + assert_eq!(tags.len(), 2); + assert_eq!(tags[0], "a"); + assert_eq!(tags[1], "b"); } } diff --git a/gts-cli/src/gen_instances/string_lit.rs b/gts-cli/src/gen_instances/string_lit.rs deleted file mode 100644 index b517bd8..0000000 --- a/gts-cli/src/gen_instances/string_lit.rs +++ /dev/null @@ -1,132 +0,0 @@ -use anyhow::{Result, bail}; - -/// Decode a Rust string literal token to its actual string content. -/// -/// Supports: -/// - Raw strings: `r#"..."#`, `r##"..."##`, etc. (content is verbatim) -/// - Regular strings: `"..."` with standard Rust escape sequences -/// -/// # Errors -/// Returns an error if the token is not a recognized string literal form or contains invalid escapes. -pub fn decode_string_literal(token: &str) -> Result { - if token.starts_with('r') { - decode_raw_string(token) - } else if token.starts_with('"') && token.ends_with('"') && token.len() >= 2 { - let inner = &token[1..token.len() - 1]; - decode_string_escapes(inner) - } else { - bail!( - "Unrecognized string literal form: {}", - &token[..token.len().min(40)] - ) - } -} - -fn decode_raw_string(token: &str) -> Result { - let after_r = &token[1..]; - let hash_count = after_r.chars().take_while(|&c| c == '#').count(); - let inner = &after_r[hash_count..]; - let inner = inner - .strip_prefix('"') - .ok_or_else(|| anyhow::anyhow!("Invalid raw string literal: missing opening quote"))?; - let closing = format!("\"{}", "#".repeat(hash_count)); - let inner = inner.strip_suffix(closing.as_str()).ok_or_else(|| { - anyhow::anyhow!("Invalid raw string literal: missing closing quote+hashes") - })?; - Ok(inner.to_owned()) -} - -fn decode_string_escapes(s: &str) -> Result { - let mut result = String::with_capacity(s.len()); - let mut chars = s.chars().peekable(); - while let Some(c) = chars.next() { - if c != '\\' { - result.push(c); - continue; - } - match chars.next() { - Some('n') => result.push('\n'), - Some('r') => result.push('\r'), - Some('t') => result.push('\t'), - Some('\\') => result.push('\\'), - Some('"') => result.push('"'), - Some('\'') => result.push('\''), - Some('0') => result.push('\0'), - Some('u') => { - if chars.next() != Some('{') { - bail!("Invalid unicode escape: expected {{"); - } - let hex: String = chars.by_ref().take_while(|&c| c != '}').collect(); - let code = u32::from_str_radix(&hex, 16) - .map_err(|_| anyhow::anyhow!("Invalid unicode escape \\u{{{hex}}}"))?; - let ch = char::from_u32(code) - .ok_or_else(|| anyhow::anyhow!("Invalid unicode code point {code}"))?; - result.push(ch); - } - Some(c) => bail!("Unsupported escape sequence: \\{c}"), - None => bail!("Unexpected end of string after backslash"), - } - } - Ok(result) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_raw_string_no_hashes() { - let token = "r\"{\\\"k\\\": 1}\""; - // r"{\"k\": 1}" — content is verbatim including backslashes - let result = decode_string_literal(token).unwrap(); - assert_eq!(result, "{\\\"k\\\": 1}"); - } - - #[test] - fn test_raw_string_one_hash() { - // Simulated: r#"{"k": 1}"# - let token = "r#\"{\\\"k\\\": 1}\"#"; - let result = decode_string_literal(token).unwrap(); - assert_eq!(result, "{\\\"k\\\": 1}"); - } - - #[test] - fn test_regular_string_simple() { - let token = "\"hello world\""; - let result = decode_string_literal(token).unwrap(); - assert_eq!(result, "hello world"); - } - - #[test] - fn test_regular_string_escapes() { - let token = "\"line1\\nline2\\ttab\""; - let result = decode_string_literal(token).unwrap(); - assert_eq!(result, "line1\nline2\ttab"); - } - - #[test] - fn test_regular_string_escaped_quote() { - let token = r#""{\"name\":\"foo\"}""#; - let result = decode_string_literal(token).unwrap(); - assert_eq!(result, "{\"name\":\"foo\"}"); - } - - #[test] - fn test_unicode_escape() { - let token = "\"\\u{1F600}\""; - let result = decode_string_literal(token).unwrap(); - assert_eq!(result, "\u{1F600}"); - } - - #[test] - fn test_invalid_escape() { - let token = "\"\\q\""; - assert!(decode_string_literal(token).is_err()); - } - - #[test] - fn test_unrecognized_form() { - let token = "b\"bytes\""; - assert!(decode_string_literal(token).is_err()); - } -} diff --git a/gts-cli/src/gen_instances/struct_expr.rs b/gts-cli/src/gen_instances/struct_expr.rs new file mode 100644 index 0000000..9c215ac --- /dev/null +++ b/gts-cli/src/gen_instances/struct_expr.rs @@ -0,0 +1,398 @@ +use anyhow::{Result, bail}; +use serde_json::Value; + +/// Convert a `syn::ExprStruct` (parsed from a function body) to a `serde_json::Value` object. +/// +/// Handles the field value types that are valid in instance definitions: +/// - String literals: `"hello"` → JSON string +/// - `String::from("hello")` / `"hello".to_string()` / `"hello".to_owned()` → JSON string +/// - Integer literals: `42` → JSON number +/// - Float literals: `3.14` → JSON number +/// - Boolean literals: `true` / `false` → JSON boolean +/// - Unit value `()` → empty JSON object `{}` (used for generic type parameter placeholders) +/// - `GtsInstanceId::ID` → skipped (sentinel value, replaced by CLI with real ID) +/// - Nested struct expressions → recursive JSON object +/// - `Vec` / array expressions → JSON array +/// +/// # Errors +/// Returns an error if a field value cannot be converted to JSON. +pub fn struct_expr_to_json(expr: &syn::ExprStruct) -> Result { + let mut map = serde_json::Map::new(); + + for field in &expr.fields { + let field_name = field.member.clone(); + let name = match &field_name { + syn::Member::Named(ident) => ident.to_string(), + syn::Member::Unnamed(idx) => idx.index.to_string(), + }; + + // Convert the field value + match expr_to_json(&field.expr) { + Ok(Some(value)) => { + map.insert(name, value); + } + Ok(None) => { + // Skipped value (e.g., GtsInstanceId::ID, ()) + } + Err(e) => bail!("Field '{name}': {e}"), + } + } + + Ok(Value::Object(map)) +} + +/// Convert a `syn::Expr` to a `serde_json::Value`. +/// +/// Returns `Ok(None)` for values that should be skipped (sentinel values, unit). +fn expr_to_json(expr: &syn::Expr) -> Result> { + match expr { + // String literal: "hello" + syn::Expr::Lit(syn::ExprLit { + lit: syn::Lit::Str(s), + .. + }) => Ok(Some(Value::String(s.value()))), + + // Integer literal: 42, -1 + syn::Expr::Lit(syn::ExprLit { + lit: syn::Lit::Int(i), + .. + }) => { + // Try parsing as i64 first, then u64 + if let Ok(n) = i.base10_parse::() { + Ok(Some(Value::Number(serde_json::Number::from(n)))) + } else if let Ok(n) = i.base10_parse::() { + Ok(Some(Value::Number(serde_json::Number::from(n)))) + } else { + bail!("Cannot parse integer literal: {i}") + } + } + + // Float literal: 3.14 + syn::Expr::Lit(syn::ExprLit { + lit: syn::Lit::Float(f), + .. + }) => { + let n: f64 = f.base10_parse()?; + let num = serde_json::Number::from_f64(n) + .ok_or_else(|| anyhow::anyhow!("Cannot represent float as JSON: {f}"))?; + Ok(Some(Value::Number(num))) + } + + // Boolean literal: true / false + syn::Expr::Lit(syn::ExprLit { + lit: syn::Lit::Bool(b), + .. + }) => Ok(Some(Value::Bool(b.value))), + + // Unary negation: -42 + syn::Expr::Unary(syn::ExprUnary { + op: syn::UnOp::Neg(_), + expr: inner, + .. + }) => match inner.as_ref() { + syn::Expr::Lit(syn::ExprLit { + lit: syn::Lit::Int(i), + .. + }) => { + let n: i64 = i.base10_parse::().map(|v| -v)?; + Ok(Some(Value::Number(serde_json::Number::from(n)))) + } + syn::Expr::Lit(syn::ExprLit { + lit: syn::Lit::Float(f), + .. + }) => { + let n: f64 = f.base10_parse::().map(|v| -v)?; + let num = serde_json::Number::from_f64(n) + .ok_or_else(|| anyhow::anyhow!("Cannot represent float as JSON: -{f}"))?; + Ok(Some(Value::Number(num))) + } + _ => bail!("Unsupported negation expression: {}", quote::quote!(#expr)), + }, + + // Unit expression: () — produce empty object (used for generic placeholders) + syn::Expr::Tuple(tuple) if tuple.elems.is_empty() => { + Ok(Some(Value::Object(serde_json::Map::new()))) + } + + // Path expressions: check for GtsInstanceId::ID sentinel + syn::Expr::Path(path) => { + if is_gts_instance_id_sentinel(path) { + Ok(None) // Sentinel — skipped, CLI injects real ID + } else if path.path.is_ident("true") { + Ok(Some(Value::Bool(true))) + } else if path.path.is_ident("false") { + Ok(Some(Value::Bool(false))) + } else { + bail!( + "Unsupported path expression: {}. Only literal values, String::from(), \ + and GtsInstanceId::ID are supported.", + quote::quote!(#expr) + ) + } + } + + // Function/method calls: String::from("..."), "...".to_string(), "...".to_owned(), vec![...] + syn::Expr::Call(call) => handle_call_expr(call), + syn::Expr::MethodCall(method) => handle_method_call_expr(method), + + // Nested struct expression: NestedStruct { field: value, ... } + syn::Expr::Struct(s) => { + let obj = struct_expr_to_json(s)?; + Ok(Some(obj)) + } + + // Array/Vec literal: vec![...] or [...] + syn::Expr::Array(arr) => { + let items: Result> = arr + .elems + .iter() + .filter_map(|e| match expr_to_json(e) { + Ok(Some(v)) => Some(Ok(v)), + Ok(None) => None, + Err(e) => Some(Err(e)), + }) + .collect(); + Ok(Some(Value::Array(items?))) + } + + // Macro invocations: vec![...] + syn::Expr::Macro(mac) => handle_macro_expr(mac), + + // Reference expressions: &"hello" (strip the reference) + syn::Expr::Reference(syn::ExprReference { expr: inner, .. }) + // Group expressions (parenthesized): (expr) + | syn::Expr::Group(syn::ExprGroup { expr: inner, .. }) + | syn::Expr::Paren(syn::ExprParen { expr: inner, .. }) => expr_to_json(inner), + + _ => bail!( + "Unsupported expression type in instance body: {}. \ + Only literal values, String::from(), struct expressions, \ + vec![], arrays, and GtsInstanceId::ID are supported.", + quote::quote!(#expr) + ), + } +} + +/// Check if a path expression is `GtsInstanceId::ID` (or `gts::GtsInstanceId::ID`) +fn is_gts_instance_id_sentinel(path: &syn::ExprPath) -> bool { + let segs: Vec = path + .path + .segments + .iter() + .map(|s| s.ident.to_string()) + .collect(); + // Match: GtsInstanceId::ID or gts::GtsInstanceId::ID + (segs.len() == 2 && segs[0] == "GtsInstanceId" && segs[1] == "ID") + || (segs.len() == 3 && segs[0] == "gts" && segs[1] == "GtsInstanceId" && segs[2] == "ID") +} + +/// Handle function call expressions like `String::from("hello")` +fn handle_call_expr(call: &syn::ExprCall) -> Result> { + // Check for String::from("...") + if let syn::Expr::Path(func_path) = call.func.as_ref() { + let segments: Vec = func_path + .path + .segments + .iter() + .map(|s| s.ident.to_string()) + .collect(); + + if segments.len() == 2 + && segments[0] == "String" + && segments[1] == "from" + && call.args.len() == 1 + { + // SAFETY: len == 1 checked above, so first() is always Some + return expr_to_json(&call.args[0]); + } + } + + bail!( + "Unsupported function call: {}. Only String::from() is supported.", + quote::quote!(#call) + ) +} + +/// Handle method call expressions like `"hello".to_string()`, `"hello".to_owned()` +fn handle_method_call_expr(method: &syn::ExprMethodCall) -> Result> { + let method_name = method.method.to_string(); + if (method_name == "to_string" || method_name == "to_owned" || method_name == "into") + && method.args.is_empty() + { + return expr_to_json(&method.receiver); + } + + bail!( + "Unsupported method call: {}. Only .to_string(), .to_owned(), and .into() are supported.", + quote::quote!(#method) + ) +} + +/// Parse a token stream as a comma-separated list of expressions. +fn parse_comma_separated_exprs(tokens: proc_macro2::TokenStream) -> Result> { + #![allow(clippy::needless_pass_by_value)] + // Wrap in brackets to make it parseable as an array expression + let wrapped: proc_macro2::TokenStream = quote::quote! { [ #tokens ] }; + let arr: syn::ExprArray = syn::parse2(wrapped) + .map_err(|e| anyhow::anyhow!("Failed to parse comma-separated expressions: {e}"))?; + Ok(arr.elems.into_iter().collect()) +} + +/// Handle macro invocations like `vec![...]` +fn handle_macro_expr(mac: &syn::ExprMacro) -> Result> { + let path = &mac.mac.path; + let path_str = quote::quote!(#path).to_string(); + + if path_str == "vec" { + let exprs = parse_comma_separated_exprs(mac.mac.tokens.clone())?; + + let values: Result> = exprs + .iter() + .filter_map(|e| match expr_to_json(e) { + Ok(Some(v)) => Some(Ok(v)), + Ok(None) => None, + Err(e) => Some(Err(e)), + }) + .collect(); + return Ok(Some(Value::Array(values?))); + } + + bail!("Unsupported macro invocation: {path_str}. Only vec![] is supported.") +} + +#[cfg(test)] +mod tests { + use super::*; + use syn::parse_quote; + + #[test] + fn test_simple_struct() { + let expr: syn::ExprStruct = parse_quote! { + MyStruct { + name: String::from("orders"), + count: 42, + active: true + } + }; + let json = struct_expr_to_json(&expr).unwrap(); + assert_eq!(json["name"], "orders"); + assert_eq!(json["count"], 42); + assert_eq!(json["active"], true); + } + + #[test] + fn test_string_methods() { + let expr: syn::ExprStruct = parse_quote! { + MyStruct { + a: String::from("hello"), + b: "world".to_string(), + c: "foo".to_owned() + } + }; + let json = struct_expr_to_json(&expr).unwrap(); + assert_eq!(json["a"], "hello"); + assert_eq!(json["b"], "world"); + assert_eq!(json["c"], "foo"); + } + + #[test] + fn test_gts_instance_id_skipped() { + let expr: syn::ExprStruct = parse_quote! { + MyStruct { + id: GtsInstanceId::ID, + name: String::from("test") + } + }; + let json = struct_expr_to_json(&expr).unwrap(); + assert!(json.get("id").is_none()); + assert_eq!(json["name"], "test"); + } + + #[test] + fn test_unit_skipped() { + let expr: syn::ExprStruct = parse_quote! { + MyStruct { + name: String::from("test"), + properties: () + } + }; + let json = struct_expr_to_json(&expr).unwrap(); + assert_eq!(json["properties"], serde_json::json!({})); + assert_eq!(json["name"], "test"); + } + + #[test] + fn test_negative_number() { + let expr: syn::ExprStruct = parse_quote! { + MyStruct { + offset: -10 + } + }; + let json = struct_expr_to_json(&expr).unwrap(); + assert_eq!(json["offset"], -10); + } + + #[test] + fn test_float_value() { + let expr: syn::ExprStruct = parse_quote! { + MyStruct { + rate: 3.15 + } + }; + let json = struct_expr_to_json(&expr).unwrap(); + let rate = json["rate"].as_f64().unwrap(); + assert!((rate - 3.15).abs() < f64::EPSILON); + } + + #[test] + fn test_vec_macro() { + let expr: syn::ExprStruct = parse_quote! { + MyStruct { + tags: vec![String::from("a"), String::from("b")] + } + }; + let json = struct_expr_to_json(&expr).unwrap(); + let tags = json["tags"].as_array().unwrap(); + assert_eq!(tags.len(), 2); + assert_eq!(tags[0], "a"); + assert_eq!(tags[1], "b"); + } + + #[test] + fn test_array_literal() { + let expr: syn::ExprStruct = parse_quote! { + MyStruct { + items: [1, 2, 3] + } + }; + let json = struct_expr_to_json(&expr).unwrap(); + let items = json["items"].as_array().unwrap(); + assert_eq!(items.len(), 3); + assert_eq!(items[0], 1); + } + + #[test] + fn test_nested_struct() { + let expr: syn::ExprStruct = parse_quote! { + Outer { + name: String::from("parent"), + inner: Inner { + value: 99 + } + } + }; + let json = struct_expr_to_json(&expr).unwrap(); + assert_eq!(json["name"], "parent"); + assert_eq!(json["inner"]["value"], 99); + } + + #[test] + fn test_unsupported_expr_errors() { + let expr: syn::ExprStruct = parse_quote! { + MyStruct { + name: some_function() + } + }; + assert!(struct_expr_to_json(&expr).is_err()); + } +} diff --git a/gts-cli/tests/gen_instances_tests.rs b/gts-cli/tests/gen_instances_tests.rs index b0883b7..820fb90 100644 --- a/gts-cli/tests/gen_instances_tests.rs +++ b/gts-cli/tests/gen_instances_tests.rs @@ -6,7 +6,6 @@ //! - Multiple instances in one file //! - Multiple files in a directory //! - `pub` and `pub(crate)` visibility -//! - Raw string literals (r#"..."#) //! - `--output` override path //! - Source file adjacent output (no --output) //! - Duplicate instance ID hard error @@ -15,10 +14,9 @@ //! - Exclude pattern skips file //! - Missing source path error //! - `// gts:ignore` directive skips file -//! - JSON `"id"` field injection (never in body) -//! - `concat!()` form rejected -//! - `static` form rejected -//! - Wrong const type rejected +//! - JSON `"id"` field injection from `GtsInstanceId::ID` sentinel +//! - Old `const`/`static` form rejected +//! - Schema validation (valid, missing required, extra field, wrong type, allOf/$ref) use anyhow::Result; use gts_cli::gen_instances::generate_instances_from_rust; @@ -40,17 +38,23 @@ fn write(dir: &Path, name: &str, content: &str) { fs::write(dir.join(name), content).unwrap(); } -fn instance_src(id: &str, json_body: &str) -> String { +/// Build a source string with a fn-based instance annotation. +/// +/// `struct_body` is a Rust struct expression body (without enclosing `{ }`), +/// e.g. `MyStruct { name: String::from("orders"), partitions: 16 }`. +fn instance_src(id: &str, struct_body: &str) -> String { format!( concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", " id = \"{id}\"\n", ")]\n", - "pub const INST: &str = {body};\n" + "fn get_instance_item_v1() -> MyStruct {{\n", + " {body}\n", + "}}\n" ), id = id, - body = json_body + body = struct_body ) } @@ -76,7 +80,7 @@ fn golden_single_instance() { let (_tmp, root) = sandbox(); let src = instance_src( "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", - r#""{\"name\":\"orders\",\"partitions\":16}""#, + r#"MyStruct { name: String::from("orders"), partitions: 16 }"#, ); write(&root, "events.rs", &src); @@ -92,34 +96,6 @@ fn golden_single_instance() { assert_eq!(val["partitions"], 16); } -// ───────────────────────────────────────────────────────────────────────────── -// Golden fixture – raw string literal -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn golden_raw_string_literal() { - let (_tmp, root) = sandbox(); - let src = concat!( - "#[gts_well_known_instance(\n", - " dir_path = \"instances\",\n", - " id = \"gts.x.core.events.topic.v1~x.commerce._.payments.v1.0\"\n", - ")]\n", - "pub const PAYMENTS: &str = r#\"{\"name\":\"payments\",\"partitions\":8}\"#;\n" - ); - write(&root, "events.rs", src); - - run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); - - let id = "gts.x.core.events.topic.v1~x.commerce._.payments.v1.0"; - let out = inst_path(&root, id); - assert!(out.exists()); - - let val = read_json(&out); - assert_eq!(val["id"], id); - assert_eq!(val["name"], "payments"); - assert_eq!(val["partitions"], 8); -} - // ───────────────────────────────────────────────────────────────────────────── // Multiple instances in one file // ───────────────────────────────────────────────────────────────────────────── @@ -132,12 +108,16 @@ fn multiple_instances_in_one_file() { " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "pub const A: &str = \"{\\\"name\\\":\\\"orders\\\"}\";\n", + "fn get_instance_orders_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"orders\") }\n", + "}\n", "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.payments.v1.0\"\n", ")]\n", - "pub const B: &str = \"{\\\"name\\\":\\\"payments\\\"}\";\n" + "fn get_instance_payments_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"payments\") }\n", + "}\n" ); write(&root, "events.rs", src); @@ -166,7 +146,7 @@ fn multiple_files_in_directory() { "a.rs", &instance_src( "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", - "\"{\\\"name\\\":\\\"a\\\"}\"", + r#"MyStruct { name: String::from("a") }"#, ), ); write( @@ -174,7 +154,7 @@ fn multiple_files_in_directory() { "b.rs", &instance_src( "gts.x.core.events.topic.v1~x.commerce._.payments.v1.0", - "\"{\\\"name\\\":\\\"b\\\"}\"", + r#"MyStruct { name: String::from("b") }"#, ), ); @@ -202,7 +182,9 @@ fn pub_crate_visibility_accepted() { " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "pub(crate) const FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" + "pub(crate) fn get_instance_orders_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"x\") }\n", + "}\n" ); write(&root, "events.rs", src); @@ -225,7 +207,7 @@ fn output_adjacent_to_source_when_no_override() { "topic.rs", &instance_src( "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", - "\"{\\\"name\\\":\\\"orders\\\"}\"", + r#"MyStruct { name: String::from("orders") }"#, ), ); @@ -239,26 +221,6 @@ fn output_adjacent_to_source_when_no_override() { assert!(expected.exists(), "Expected: {}", expected.display()); } -// ───────────────────────────────────────────────────────────────────────────── -// id field is injected and overrides any body field named "id" — BODY REJECTED -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn id_field_in_body_is_rejected() { - let (_tmp, root) = sandbox(); - write( - &root, - "events.rs", - &instance_src( - "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", - "\"{\\\"id\\\":\\\"bad\\\",\\\"name\\\":\\\"x\\\"}\"", - ), - ); - - let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); - assert!(err.to_string().contains("\"id\" field"), "Got: {err}"); -} - // ───────────────────────────────────────────────────────────────────────────── // Duplicate instance ID → hard error // ───────────────────────────────────────────────────────────────────────────── @@ -271,12 +233,16 @@ fn duplicate_instance_id_hard_error() { " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "pub const A: &str = \"{\\\"name\\\":\\\"a\\\"}\";\n", + "fn get_instance_orders_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"a\") }\n", + "}\n", "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "pub const B: &str = \"{\\\"name\\\":\\\"b\\\"}\";\n" + "fn get_instance_orders2_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"b\") }\n", + "}\n" ); write(&root, "dup.rs", src); @@ -304,7 +270,9 @@ fn sandbox_escape_rejected() { " dir_path = \"{dir}\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "pub const FOO: &str = \"{{\\\"name\\\":\\\"x\\\"}}\";\n" + "fn get_instance_orders_v1() -> MyStruct {{\n", + " MyStruct {{ name: String::from(\"x\") }}\n", + "}}\n" ), dir = escape_dir ); @@ -339,7 +307,9 @@ fn exclude_pattern_skips_file() { " dir_path = \"instances\",\n", " id = \"bad-no-tilde\"\n", ")]\n", - "pub const X: &str = \"{}\";\n" + "fn get_instance_bad_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"x\") }\n", + "}\n" ); write(&root, "excluded_file.rs", src); @@ -365,7 +335,9 @@ fn gts_ignore_directive_skips_file() { " dir_path = \"instances\",\n", " id = \"bad-no-tilde\"\n", ")]\n", - "pub const X: &str = \"{}\";\n" + "fn get_instance_bad_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"x\") }\n", + "}\n" ); write(&root, "ignored.rs", src); @@ -381,12 +353,9 @@ fn gts_ignore_directive_skips_file() { #[test] fn missing_source_path_errors() { - // Use a path guaranteed not to exist on any platform by constructing it - // inside a TempDir that is immediately dropped (and thus deleted). let nonexistent = { let tmp = TempDir::new().unwrap(); tmp.path().join("no_such_subdir_xyz") - // tmp is dropped here — the parent dir is deleted }; let err = run(nonexistent.to_str().unwrap(), None, &[]).unwrap_err(); assert!(err.to_string().contains("does not exist"), "Got: {err}"); @@ -407,23 +376,23 @@ fn no_annotations_produces_nothing() { } // ───────────────────────────────────────────────────────────────────────────── -// concat!() value is rejected with actionable message +// Old const form is rejected with actionable message // ───────────────────────────────────────────────────────────────────────────── #[test] -fn concat_macro_value_is_rejected() { +fn const_form_is_rejected() { let (_tmp, root) = sandbox(); let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "pub const FOO: &str = concat!(\"{\", \"}\");\n" + "pub const FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" ); - write(&root, "concat.rs", src); + write(&root, "const_form.rs", src); let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); - assert!(err.to_string().contains("concat!()"), "Got: {err}"); + assert!(err.to_string().contains("no longer supports"), "Got: {err}"); } // ───────────────────────────────────────────────────────────────────────────── @@ -443,7 +412,7 @@ fn static_item_is_rejected() { write(&root, "static_item.rs", src); let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); - assert!(err.to_string().contains("static"), "Got: {err}"); + assert!(err.to_string().contains("no longer supports"), "Got: {err}"); } // ───────────────────────────────────────────────────────────────────────────── @@ -458,7 +427,9 @@ fn id_without_tilde_is_rejected() { " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1.x.commerce._.orders.v1.0\"\n", ")]\n", - "pub const FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" + "fn get_instance_orders_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"x\") }\n", + "}\n" ); write(&root, "notilde.rs", src); @@ -478,7 +449,9 @@ fn id_ending_with_tilde_is_rejected() { " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~\"\n", ")]\n", - "pub const FOO: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" + "fn get_instance_orders_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"x\") }\n", + "}\n" ); write(&root, "segtilde.rs", src); @@ -490,47 +463,7 @@ fn id_ending_with_tilde_is_rejected() { } // ───────────────────────────────────────────────────────────────────────────── -// JSON body must be an object — array is rejected -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn json_array_body_is_rejected() { - let (_tmp, root) = sandbox(); - write( - &root, - "events.rs", - &instance_src( - "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", - "\"[1,2,3]\"", - ), - ); - - let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); - assert!(err.to_string().contains("JSON object"), "Got: {err}"); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Malformed JSON body is rejected -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn malformed_json_body_is_rejected() { - let (_tmp, root) = sandbox(); - write( - &root, - "events.rs", - &instance_src( - "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", - "\"{not valid json}\"", - ), - ); - - let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); - assert!(err.to_string().contains("Malformed JSON"), "Got: {err}"); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Golden fixture: generated file content matches exactly +// Golden fixture: generated file content matches exactly (with id injected) // ───────────────────────────────────────────────────────────────────────────── #[test] @@ -541,7 +474,9 @@ fn golden_file_content_exact() { " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "pub const ORDERS: &str = \"{\\\"name\\\":\\\"orders\\\",\\\"partitions\\\":16}\";\n" + "fn get_instance_orders_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"orders\"), partitions: 16 }\n", + "}\n" ); write(&root, "events.rs", src); @@ -566,42 +501,13 @@ fn golden_file_content_exact() { ); } -// ───────────────────────────────────────────────────────────────────────────── -// Zero-hash raw string r"..." is accepted (Fix: regex r#* not r#+) -// ───────────────────────────────────────────────────────────────────────────── - -#[test] -fn zero_hash_raw_string_is_accepted() { - let (_tmp, root) = sandbox(); - // r"..." with no hashes — was previously not matched by the annotation regex - let src = concat!( - "#[gts_well_known_instance(\n", - " dir_path = \"instances\",\n", - " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", - ")]\n", - "pub const ZERO_HASH: &str = r#\"{\"name\":\"zero\"}\"#;\n" - ); - write(&root, "zero_hash.rs", src); - - run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); - - let id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0"; - let out = inst_path(&root, id); - assert!(out.exists(), "Expected file: {}", out.display()); - let val = read_json(&out); - assert_eq!(val["name"], "zero"); -} - // ───────────────────────────────────────────────────────────────────────────── // Char literals near the needle don't cause preflight false-positive -// (Fix: preflight_scan now skips char literals like '#' and '[') // ───────────────────────────────────────────────────────────────────────────── #[test] fn char_literal_near_needle_does_not_false_positive() { let (_tmp, root) = sandbox(); - // File contains '#' and '[' as char literals right before a regular ident, - // but no actual annotation — preflight must return false → quiet skip. let src = concat!( "fn check(c: char) -> bool {\n", " c == '#' || c == '['\n", @@ -611,21 +517,17 @@ fn char_literal_near_needle_does_not_false_positive() { ); write(&root, "char_lit.rs", src); - // Must succeed with no output — not a hard error run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); assert!(!root.join("instances").exists()); } // ───────────────────────────────────────────────────────────────────────────── // Unsupported form mentioned only in a comment does NOT hard-error -// (Fix: check_unsupported_forms runs on comment-stripped source) // ───────────────────────────────────────────────────────────────────────────── #[test] fn unsupported_form_in_comment_does_not_error() { let (_tmp, root) = sandbox(); - // The doc comment contains a concat!() example that would have previously - // triggered a hard error from check_unsupported_forms. let src = concat!( "/// Example (do NOT use):\n", "/// #[gts_well_known_instance(\n", @@ -637,11 +539,12 @@ fn unsupported_form_in_comment_does_not_error() { " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "pub const REAL: &str = \"{\\\"name\\\":\\\"real\\\"}\";\n" + "fn get_instance_orders_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"real\") }\n", + "}\n" ); write(&root, "comment_example.rs", src); - // Must succeed — the concat!() is only in a doc comment run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap(); let id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0"; @@ -649,33 +552,31 @@ fn unsupported_form_in_comment_does_not_error() { } // ───────────────────────────────────────────────────────────────────────────── -// Annotation applied to a fn (not a const) is a hard error -// (Fix: preflight-positive + no match → hard error, not silent skip) +// Annotation applied to a non-fn item (e.g. enum) is a hard error // ───────────────────────────────────────────────────────────────────────────── #[test] -fn annotation_on_fn_is_hard_error() { +fn annotation_on_non_fn_is_hard_error() { let (_tmp, root) = sandbox(); let src = concat!( "#[gts_well_known_instance(\n", " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "pub fn not_a_const() -> &'static str { \"{}\" }\n" + "pub enum NotAFn { A, B }\n" ); - write(&root, "on_fn.rs", src); + write(&root, "on_enum.rs", src); let err = run(root.to_str().unwrap(), Some(root.to_str().unwrap()), &[]).unwrap_err(); let msg = err.to_string(); assert!( - msg.contains("could not be parsed") || msg.contains("const NAME"), + msg.contains("could not be parsed") || msg.contains("fn get_instance"), "Got: {msg}" ); } // ───────────────────────────────────────────────────────────────────────────── // Duplicate attribute key in annotation is a hard error -// (Fix: check_duplicate_attr_keys added to parse_instance_attrs) // ───────────────────────────────────────────────────────────────────────────── #[test] @@ -687,7 +588,9 @@ fn duplicate_attribute_key_is_hard_error() { " dir_path = \"other\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "pub const DUP: &str = \"{\\\"name\\\":\\\"x\\\"}\";\n" + "fn get_instance_orders_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"x\") }\n", + "}\n" ); write(&root, "dup_key.rs", src); @@ -701,10 +604,6 @@ fn duplicate_attribute_key_is_hard_error() { // ───────────────────────────────────────────────────────────────────────────── // ./ prefix in dir_path with same ID → duplicate instance ID error -// (dir_path differs via ./ prefix but composed ID is identical, so -// check_duplicate_ids fires. The path normalisation in -// check_duplicate_output_paths is a defence-in-depth guard for the -// hypothetical future case where filenames could diverge from the ID.) // ───────────────────────────────────────────────────────────────────────────── #[test] @@ -715,12 +614,16 @@ fn dot_slash_dir_path_same_id_is_duplicate() { " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "pub const A: &str = \"{\\\"name\\\":\\\"a\\\"}\";\n", + "fn get_instance_orders_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"a\") }\n", + "}\n", "#[gts_well_known_instance(\n", " dir_path = \"./instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "pub const B: &str = \"{\\\"name\\\":\\\"b\\\"}\";\n" + "fn get_instance_orders2_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"b\") }\n", + "}\n" ); write(&root, "dotslash.rs", src); @@ -734,7 +637,6 @@ fn dot_slash_dir_path_same_id_is_duplicate() { // ───────────────────────────────────────────────────────────────────────────── // Qualified path form #[gts_macros::gts_well_known_instance(...)] is accepted -// (Fix: NEEDLE and regex updated to match optional `gts_macros::` prefix) // ───────────────────────────────────────────────────────────────────────────── #[test] @@ -745,7 +647,9 @@ fn qualified_path_form_is_accepted() { " dir_path = \"instances\",\n", " id = \"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0\"\n", ")]\n", - "pub const QUALIFIED: &str = r#\"{\"name\":\"qualified\"}\"#;\n" + "fn get_instance_orders_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"qualified\") }\n", + "}\n" ); write(&root, "qualified.rs", src); @@ -774,7 +678,9 @@ fn compile_fail_dir_is_auto_skipped() { " dir_path = \"instances\",\n", " id = \"bad-no-tilde\"\n", ")]\n", - "pub const X: &str = \"{}\";\n" + "fn get_instance_bad_v1() -> MyStruct {\n", + " MyStruct { name: String::from(\"x\") }\n", + "}\n" ); write(&cf_dir, "test.rs", src); @@ -827,7 +733,7 @@ fn schema_validation_valid_instance_passes() { let src = instance_src( "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", - r#""{\"name\":\"orders\",\"partitions\":16}""#, + r#"MyStruct { name: String::from("orders"), partitions: 16 }"#, ); write(&root, "inst.rs", &src); @@ -848,7 +754,7 @@ fn schema_validation_missing_required_field_fails() { // Instance provides "name" but NOT "vendor" let src = instance_src( "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", - r#""{\"name\":\"orders\"}""#, + r#"MyStruct { name: String::from("orders") }"#, ); write(&root, "inst.rs", &src); @@ -870,7 +776,7 @@ fn schema_validation_extra_field_fails() { // Instance has "name" + "extra" — violates additionalProperties: false let src = instance_src( "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", - r#""{\"name\":\"orders\",\"extra\":\"bad\"}""#, + r#"MyStruct { name: String::from("orders"), extra: String::from("bad") }"#, ); write(&root, "inst.rs", &src); @@ -896,7 +802,7 @@ fn schema_validation_wrong_type_fails() { // Instance provides "count" as a string let src = instance_src( "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", - r#""{\"count\":\"not-a-number\"}""#, + r#"MyStruct { count: String::from("not-a-number") }"#, ); write(&root, "inst.rs", &src); @@ -954,7 +860,7 @@ fn schema_validation_allof_ref_inheritance_passes() { // Instance satisfies both parent ("name") and child ("vendor") let src = instance_src( "gts.x.core.events.topic.v1~x.core.audit.event.v1~x.commerce._.orders.v1.0", - r#""{\"name\":\"orders\",\"vendor\":\"acme\"}""#, + r#"MyStruct { name: String::from("orders"), vendor: String::from("acme") }"#, ); write(&root, "inst.rs", &src); @@ -1007,7 +913,7 @@ fn schema_validation_allof_ref_missing_parent_field_fails() { // Instance has "vendor" but missing parent-required "name" let src = instance_src( "gts.x.core.events.topic.v1~x.core.audit.event.v1~x.commerce._.orders.v1.0", - r#""{\"vendor\":\"acme\"}""#, + r#"MyStruct { vendor: String::from("acme") }"#, ); write(&root, "inst.rs", &src); @@ -1026,7 +932,7 @@ fn schema_validation_no_schema_on_disk_passes() { // No schema written — validation should be skipped silently let src = instance_src( "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0", - r#""{\"name\":\"orders\"}""#, + r#"MyStruct { name: String::from("orders") }"#, ); write(&root, "inst.rs", &src); diff --git a/gts-macros/README.md b/gts-macros/README.md index 42e132f..4c521b7 100644 --- a/gts-macros/README.md +++ b/gts-macros/README.md @@ -425,45 +425,47 @@ The CLI automatically maps Rust types to JSON Schema types: ## `#[gts_well_known_instance]` — Well-Known Instance Declaration -The `#[gts_well_known_instance]` attribute macro declares a **well-known GTS instance** as a `const` JSON string literal. It provides: +The `#[gts_well_known_instance]` attribute macro declares a **well-known GTS instance** as a typed `fn` returning a struct expression. It provides: -1. **Compile-time validation** of the `id` (full instance ID) format. -2. **CLI extraction** — the `gts generate-from-rust --mode instances` command scans for these annotations, validates the JSON payload, injects the `"id"` field, and writes the instance file. +1. **Compile-time validation** of the `id` format, function naming convention, version consistency, and return type schema ID matching (via `GtsSchema::SCHEMA_ID` const assertion). +2. **CLI extraction** — the `gts generate-from-rust --mode instances` command scans for these annotations, extracts the struct body, converts it to JSON, injects the `"id"` field, and writes the instance file. -The macro passes the annotated `const` through **unchanged** at compile time. It is purely metadata for the CLI extraction step. +The macro passes the annotated `fn` through with `#[allow(dead_code)]` at compile time and emits a const assertion to verify the return type's schema ID matches the `id` attribute. -### Usage +### Complete Example + +Given a GTS schema struct `BaseModkitPluginV1` (derived via `#[gts_schema]`), declare a well-known instance: ```rust -#[gts_macros::gts_well_known_instance( +// src/gts/instances/default_plugin.rs +use gts::GtsInstanceId; +use gts_macros::gts_well_known_instance; + +use crate::gts::BaseModkitPluginV1; + +#[gts_well_known_instance( dir_path = "instances", - id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" + id = "gts.x.core.modkit.plugin.v1~x.core.modkit.default_plugin.v1.0" )] -pub const ORDERS_TOPIC: &str = r#"{ - "name": "orders", - "description": "Order lifecycle events topic", - "retention": "P90D", - "partitions": 16 -}"#; +pub fn get_instance_default_plugin_v1() -> BaseModkitPluginV1<()> { + BaseModkitPluginV1 { + id: GtsInstanceId::ID, // sentinel — skipped in JSON, CLI injects the real id + vendor: String::from("hypernetix"), + priority: 100, + properties: (), // () → empty JSON object {} (generic placeholder) + } +} ``` -### Quick Start Guide +**Key points:** +- **`GtsInstanceId::ID`** — sentinel value for the `id` field; the CLI skips it during JSON conversion and injects the real `id` from the attribute. +- **`()`** — produces an empty JSON object `{}`, useful when the struct has a generic type parameter (e.g., `T` in `BaseModkitPluginV1`) that isn't needed for this instance. +- **Function name** — must follow `get_instance__v` where `` matches the schema version (`v1` in this case). +- **Return type** — must implement `GtsSchema` with a `SCHEMA_ID` matching the schema portion of the `id` attribute. This is verified at compile time. -**Step 1 — Declare the instance in a Rust source file:** +### Quick Start Guide -```rust -// src/gts/mod.rs -#[gts_macros::gts_well_known_instance( - dir_path = "instances", - id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" -)] -pub const ORDERS_TOPIC: &str = r#"{ - "name": "orders", - "description": "Order lifecycle events topic", - "retention": "P90D", - "partitions": 16 -}"#; -``` +**Step 1 — Declare the instance in a Rust source file** (as shown above). **Step 2 — Run the CLI to generate the `.instance.json` file:** @@ -471,18 +473,27 @@ pub const ORDERS_TOPIC: &str = r#"{ gts generate-from-rust --source src/ --mode instances ``` -This produces `instances/gts.x.core.events.topic.v1~x.commerce._.orders.v1.0.instance.json` with the `"id"` field injected automatically. +This produces `instances/gts.x.core.modkit.plugin.v1~x.core.modkit.default_plugin.v1.0.instance.json`: + +```json +{ + "id": "gts.x.core.modkit.plugin.v1~x.core.modkit.default_plugin.v1.0", + "priority": 100, + "properties": {}, + "vendor": "hypernetix" +} +``` + +Note: the `"id"` field is injected automatically, `GtsInstanceId::ID` is omitted, and `()` becomes `{}`. **Step 3 — Use the instance:** ```rust -// Reference the const directly (it's just a &str containing JSON) -let topic: serde_json::Value = serde_json::from_str(ORDERS_TOPIC)?; - -// The full instance ID is the `id` attribute value -let instance_id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0"; +// Call the function to get the typed struct +let plugin = get_instance_default_plugin_v1(); -// Register or look up via the types-registry +// Or load the generated JSON at runtime +let instance_id = "gts.x.core.modkit.plugin.v1~x.core.modkit.default_plugin.v1.0"; let entity = registry.get(instance_id).await?; ``` @@ -540,13 +551,16 @@ gts generate-from-rust --source src/ --output out/ --mode instances ### Rules and restrictions -- The annotated item **must be a `const`** (not `static`) of type `&str`. -- The const value **must be a string literal** — raw strings (`r#"..."#`) or regular strings (`"..."`). `concat!()` and other macro invocations are not supported. -- The JSON body **must be a JSON object** (`{ ... }`). Arrays, scalars, and `null` are not valid. -- The JSON body **must not contain an `"id"` field** — the CLI injects it automatically from the `id` attribute. +- The annotated item **must be a `fn`** returning a typed struct — `const` and `static` items are not supported. +- The function **must take no arguments** and have an explicit return type. +- The function name **must follow the convention** `get_instance__v` where `` matches the schema version in the `id`. +- The function body **must return a struct expression** — the CLI extracts fields and converts them to JSON. +- The struct **must not contain an `id` field** — the CLI injects it automatically from the `id` attribute. Use `GtsInstanceId::ID` as a sentinel if the struct type requires an `id` field; it will be skipped during JSON conversion. +- Unit value `()` in a field produces an empty JSON object `{}` (useful for generic type parameter placeholders). - Files in `compile_fail/` directories and files with a `// gts:ignore` directive are skipped. - Items behind `#[cfg(...)]` gates (e.g., `#[cfg(test)]`) are still extracted — extraction is lexical, not conditional. -- **Schema conformance is validated at CLI time, not compile time** — the macro validates `id` format only. The CLI (`gts generate-from-rust`) validates instance JSON bodies against their parent schemas when schema files are present on disk (e.g. after `--mode all` or a prior `--mode schemas` run). If the schema file is not found, validation is skipped with a warning. +- **Compile-time validation** covers: `id` format, function naming convention, version consistency, and return type schema ID matching (via `GtsSchema::SCHEMA_ID` const assertion). +- **Schema conformance is validated at CLI time** — the CLI (`gts generate-from-rust`) validates instance JSON bodies against their parent schemas when schema files are present on disk (e.g. after `--mode all` or a prior `--mode schemas` run). If the schema file is not found, validation is skipped with a warning. ### Compile-time validation errors @@ -556,9 +570,12 @@ gts generate-from-rust --source src/ --output out/ --mode instances | Missing `dir_path` | `Missing required attribute: dir_path` | | `id` without `~` | `id must contain '~' separating schema from instance segment` | | `id` ending with `~` | `id must not end with '~' (that is a schema/type ID)` | -| Applied to a `static` item | `Only \`const\` items are supported` | -| Applied to a `const` with type other than `&str` | `The annotated const must have type \`&str\`` | -| Const value is `concat!()` or other macro | `The const value must be a string literal` | +| Applied to a non-`fn` item (`const`, `static`, etc.) | `Only \`fn\` items are supported` | +| Function takes arguments | `must take no arguments` | +| Missing return type | `must have an explicit return type` | +| Function name doesn't match convention | `must start with 'get_instance_'` | +| Version in fn name doesn't match schema version | `version mismatch` | +| Return type `SCHEMA_ID` doesn't match `id` | `Schema ID mismatch` | --- diff --git a/gts-macros/src/lib.rs b/gts-macros/src/lib.rs index 5d0dcd7..14505e8 100644 --- a/gts-macros/src/lib.rs +++ b/gts-macros/src/lib.rs @@ -1841,14 +1841,19 @@ pub fn struct_to_gts_schema(attr: TokenStream, item: TokenStream) -> TokenStream TokenStream::from(expanded) } -/// Arguments for the `#[gts_well_known_instance]` macro. -/// This is a parse-only validation type; fields are validated during `Parse` and not retained. -struct GtsInstanceArgs; +/// Parsed arguments for the `#[gts_well_known_instance]` macro. +#[allow(dead_code)] +struct GtsInstanceArgs { + /// The full instance ID (e.g., `gts.x.core.events.topic.v1~x.commerce._.orders.v1.0`) + id: String, + /// The schema portion of the ID (up to and including `~`, e.g., `gts.x.core.events.topic.v1~`) + schema_id: String, +} impl Parse for GtsInstanceArgs { fn parse(input: ParseStream) -> syn::Result { let mut dir_path: Option = None; - let mut id: Option<(String, proc_macro2::Span)> = None; + let mut id: Option<(String, String, proc_macro2::Span)> = None; let mut seen_keys: std::collections::HashSet = std::collections::HashSet::new(); while !input.is_empty() { @@ -1942,7 +1947,8 @@ impl Parse for GtsInstanceArgs { )); } - id = Some((full_id, value.span())); + let schema_id = schema_part.to_owned(); + id = Some((full_id, schema_id, value.span())); } _ => { return Err(syn::Error::new_spanned( @@ -1957,54 +1963,186 @@ impl Parse for GtsInstanceArgs { } } - let _id_val = id.ok_or_else(|| { + let (id_val, schema_id, _span) = id.ok_or_else(|| { input.error("gts_well_known_instance: Missing required attribute: id") })?; let _dir_path_val = dir_path.ok_or_else(|| { input.error("gts_well_known_instance: Missing required attribute: dir_path") })?; - Ok(GtsInstanceArgs) + Ok(GtsInstanceArgs { + id: id_val, + schema_id, + }) + } +} + +/// Extract version from a `snake_case` function name suffix. +/// +/// Pattern: `..._v{MAJOR}` or `..._v{MAJOR}_{MINOR}` at the end. +/// +/// Examples: +/// - `get_instance_orders_v1` → Some(Version { major: 1, minor: None }) +/// - `get_instance_config_v1_2` → Some(Version { major: 1, minor: Some(2) }) +/// - `get_instance_orders` → None +fn extract_fn_version(fn_name: &str) -> Option { + // Find the last "_v" followed by a digit + let bytes = fn_name.as_bytes(); + let mut v_pos = None; + + for i in 0..bytes.len().saturating_sub(2) { + if bytes[i] == b'_' && bytes[i + 1] == b'v' && bytes[i + 2].is_ascii_digit() { + v_pos = Some(i + 2); // Position after "_v" + } + } + + let v_pos = v_pos?; + let version_part = &fn_name[v_pos..]; // e.g., "1" or "1_2" + + // Parse version: MAJOR or MAJOR_MINOR + if let Some(underscore_pos) = version_part.find('_') { + // Has minor version: MAJOR_MINOR + let major_str = &version_part[..underscore_pos]; + let minor_str = &version_part[underscore_pos + 1..]; + + let major = major_str.parse::().ok()?; + let minor = minor_str.parse::().ok()?; + Some(Version { + major, + minor: Some(minor), + }) + } else { + // Only major version + let major = version_part.parse::().ok()?; + Some(Version { major, minor: None }) + } +} + +/// Validate that the function name follows the `get_instance_*_v{N}` convention +/// and that its version suffix matches the schema version from the `id` attribute. +fn validate_instance_fn_name(fn_ident: &syn::Ident, schema_id: &str) -> syn::Result<()> { + let fn_name = fn_ident.to_string(); + + // 1. Must start with `get_instance_` + if !fn_name.starts_with("get_instance_") { + return Err(syn::Error::new_spanned( + fn_ident, + format!( + "gts_well_known_instance: Function name must start with 'get_instance_', \ + got '{fn_name}'. Example: 'get_instance_orders_v1'" + ), + )); + } + + // 2. Must have a descriptive middle part (at least one char between prefix and version) + let after_prefix = &fn_name["get_instance_".len()..]; + if after_prefix.is_empty() { + return Err(syn::Error::new_spanned( + fn_ident, + format!( + "gts_well_known_instance: Function name must have a descriptive name after \ + 'get_instance_', got '{fn_name}'. Example: 'get_instance_orders_v1'" + ), + )); + } + + // 3. Extract and validate version suffix + let fn_version = extract_fn_version(&fn_name); + let schema_version = extract_schema_version(schema_id); + + match (fn_version, schema_version) { + (Some(fv), Some(sv)) if fv != sv => Err(syn::Error::new_spanned( + fn_ident, + format!( + "gts_well_known_instance: Version mismatch between function name and schema ID. \ + Function '{fn_name}' has version suffix '_{fv_str}' but schema_id '{schema_id}' \ + has version '{sv_str}'. The versions must match \ + (e.g., get_instance_orders_v1 with v1~, or get_instance_orders_v2_0 with v2.0~)", + fv_str = fv.to_schema_version(), + sv_str = sv.to_schema_version() + ), + )), + (Some(_), Some(_)) => Ok(()), // Versions match + (None, Some(sv)) => Err(syn::Error::new_spanned( + fn_ident, + format!( + "gts_well_known_instance: schema_id '{schema_id}' has version '{sv_str}' but \ + function '{fn_name}' does not have a version suffix. \ + Add '_{sv_str}' suffix to the function name \ + (e.g., '{fn_name}_{sv_str}')", + sv_str = sv.to_schema_version() + ), + )), + (Some(fv), None) => Err(syn::Error::new_spanned( + fn_ident, + format!( + "gts_well_known_instance: Function '{fn_name}' has version suffix '_{fv_str}' but \ + cannot extract version from schema_id '{schema_id}'. \ + Expected format with version like 'gts.x.foo.v1~' or 'gts.x.foo.v1.0~'", + fv_str = fv.to_schema_version() + ), + )), + (None, None) => Err(syn::Error::new_spanned( + fn_ident, + format!( + "gts_well_known_instance: Both function name and schema_id must have a version. \ + Function '{fn_name}' has no version suffix (e.g., _v1) and schema_id '{schema_id}' \ + has no version (e.g., v1~). Add version to both \ + (e.g., '{fn_name}_v1' with 'gts.x.foo.v1~')" + ), + )), } } -/// Declare a well-known GTS instance as a const JSON string literal. +/// Declare a well-known GTS instance as a typed function returning a schema struct. /// /// This macro: -/// 1. **At compile time**: validates the `id` GTS instance ID format -/// and verifies the annotated item is a `const` of type `&str`. +/// 1. **At compile time**: validates the `id` GTS instance ID format, +/// enforces function naming convention (`get_instance_*_v{N}`), +/// validates the version suffix matches the schema version from `id`, +/// and emits a compile-time assertion that the return type's `GtsSchema::SCHEMA_ID` +/// matches the schema portion of `id`. /// 2. **At generate time**: the CLI (`gts generate-from-rust --mode instances`) scans for -/// these annotations, validates the JSON payload, injects the `"id"` field, and writes -/// `{dir_path}/{id}.instance.json`. +/// these annotations, extracts the struct expression from the function body, builds +/// the JSON object, injects the `"id"` field, and writes `{dir_path}/{id}.instance.json`. /// -/// The macro passes the annotated `const` item through unchanged -- it is purely metadata -/// for the CLI extraction step. +/// The macro passes the annotated function through with `#[allow(dead_code)]` -- the function +/// is never called at runtime, it is purely metadata for the CLI extraction step and for +/// compile-time type checking. /// -/// **Note:** This macro validates the `id` *format* only (GTS ID structure). It does **not** -/// validate the instance JSON body against its parent schema at compile time. Schema conformance -/// validation is performed by the CLI (`gts generate-from-rust`) when schema files are present -/// on disk (e.g. after `--mode all` or a prior `--mode schemas` run). +/// The Rust source is never modified -- `GtsInstanceId::ID` stays in the code permanently. +/// The CLI reads the source and writes separate JSON files to the output directory. /// /// # Arguments /// /// * `dir_path` - Output directory for the generated instance file (relative to crate root or `--output`) /// * `id` - Full GTS instance ID (must contain `~` separating schema from instance segment, must not end with `~`) /// +/// # Naming Convention +/// +/// The function name must follow the pattern `get_instance_{name}_v{N}`: +/// - **Prefix**: `get_instance_` (required) +/// - **Middle**: descriptive name in `snake_case` (e.g., `orders`, `audit_trail`) +/// - **Suffix**: `_v{N}` or `_v{N}_{M}` -- must match the schema version from the `id` attribute +/// /// # Example /// /// ```ignore +/// use gts::GtsInstanceId; /// use gts_macros::gts_well_known_instance; /// /// #[gts_well_known_instance( /// dir_path = "instances", /// id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" /// )] -/// const ORDERS_TOPIC: &str = r#"{ -/// "name": "orders", -/// "description": "Order lifecycle events topic", -/// "retention": "P90D", -/// "partitions": 16 -/// }"#; +/// fn get_instance_orders_v1() -> BaseEventTopicV1<()> { +/// BaseEventTopicV1 { +/// id: GtsInstanceId::ID, +/// name: String::from("orders"), +/// partitions: 16, +/// properties: (), +/// } +/// } /// ``` /// /// The CLI generates: @@ -2012,66 +2150,98 @@ impl Parse for GtsInstanceArgs { /// with an injected `"id"` field: `"gts.x.core.events.topic.v1~x.commerce._.orders.v1.0"`. #[proc_macro_attribute] pub fn gts_well_known_instance(attr: TokenStream, item: TokenStream) -> TokenStream { - let _args = parse_macro_input!(attr as GtsInstanceArgs); + let args = parse_macro_input!(attr as GtsInstanceArgs); - // Parse the annotated item -- must be a const item - let item_clone = item.clone(); - let parsed: syn::Result = syn::parse(item_clone); - - match parsed { + // Parse the annotated item -- must be a function + let item_fn: syn::ItemFn = match syn::parse(item) { + Ok(f) => f, Err(_) => { return syn::Error::new( proc_macro2::Span::call_site(), - "gts_well_known_instance: Only `const` items are supported. \ - Usage: `const NAME: &str = r#\"{ ... }\"#;`", + "gts_well_known_instance: Only `fn` items are supported. \ + Usage: `fn get_instance_name_v1() -> SchemaType<()> { SchemaType { ... } }`", ) .to_compile_error() .into(); } - Ok(item_const) => { - // Validate the const has type &str (or &'lifetime str) using AST matching. - // This is more robust than stringifying tokens and avoids false rejections - // from formatting differences (spaces, lifetime names, etc.). - let ty = &item_const.ty; - let is_ref_str = match ty.as_ref() { - syn::Type::Reference(syn::TypeReference { elem, .. }) => { - matches!(elem.as_ref(), syn::Type::Path(p) if p.qself.is_none() && p.path.is_ident("str")) - } - _ => false, - }; - if !is_ref_str { - let ty_str = quote::quote!(#ty).to_string().replace(' ', ""); - return syn::Error::new_spanned( - ty, - format!( - "gts_well_known_instance: The annotated const must have type `&str`, got `{ty_str}`. \ - Usage: `const NAME: &str = r#\"{{ ... }}\"#;`" - ), - ) - .to_compile_error() - .into(); - } + }; - // Validate the const value is a string literal (not a macro invocation) - match item_const.expr.as_ref() { - syn::Expr::Lit(syn::ExprLit { - lit: syn::Lit::Str(_), - .. - }) => {} - _ => { - return syn::Error::new_spanned( - &item_const.expr, - "gts_well_known_instance: The const value must be a string literal \ - (raw string `r#\"...\"#` or regular string `\"...\"`). \ - Macro invocations like `concat!()` are not supported.", - ) - .to_compile_error() - .into(); - } - } + // Validate function has no arguments + if !item_fn.sig.inputs.is_empty() { + return syn::Error::new_spanned( + &item_fn.sig.inputs, + "gts_well_known_instance: Instance function must take no arguments.", + ) + .to_compile_error() + .into(); + } + + // Validate function has a return type + let return_type = match &item_fn.sig.output { + syn::ReturnType::Type(_, ty) => ty.clone(), + syn::ReturnType::Default => { + return syn::Error::new_spanned( + &item_fn.sig, + "gts_well_known_instance: Instance function must have a return type \ + (the schema struct type). Example: `fn get_instance_orders_v1() -> BaseEventTopicV1<()>`", + ) + .to_compile_error() + .into(); } + }; + + // Validate function naming convention and version match + if let Err(e) = validate_instance_fn_name(&item_fn.sig.ident, &args.schema_id) { + return e.to_compile_error().into(); } - // Pass the item through unchanged -- this macro is purely metadata for the CLI - item + // Build compile-time assertion: return type's SCHEMA_ID == schema portion of id + let schema_id_str = &args.schema_id; + let fn_name = &item_fn.sig.ident; + + // Generate a unique const name for the assertion to avoid collisions + let assert_ident = syn::Ident::new( + &format!("_ASSERT_SCHEMA_ID_{}", fn_name.to_string().to_uppercase()), + fn_name.span(), + ); + + let expanded = quote! { + #[allow(dead_code)] + #item_fn + + // Compile-time assertion: the return type must implement GtsSchema and + // its SCHEMA_ID must match the schema portion of the `id` attribute. + #[doc(hidden)] + #[allow(non_upper_case_globals)] + const #assert_ident: () = { + // This const block triggers a compile error if: + // 1. The return type does not implement GtsSchema (type error) + // 2. The SCHEMA_ID does not match (assert! failure) + const EXPECTED: &str = #schema_id_str; + const ACTUAL: &str = <#return_type as ::gts::GtsSchema>::SCHEMA_ID; + + // Const-compatible string equality check (byte-by-byte) + const fn const_str_eq(a: &[u8], b: &[u8]) -> bool { + if a.len() != b.len() { + return false; + } + let mut i = 0; + while i < a.len() { + if a[i] != b[i] { + return false; + } + i += 1; + } + true + } + + assert!( + const_str_eq(ACTUAL.as_bytes(), EXPECTED.as_bytes()), + "gts_well_known_instance: Schema ID mismatch. The return type's GtsSchema::SCHEMA_ID \ + does not match the schema portion of the `id` attribute." + ); + }; + }; + + TokenStream::from(expanded) } diff --git a/gts-macros/tests/compile_fail/instance_const_wrong_type.rs b/gts-macros/tests/compile_fail/instance_const_wrong_type.rs index f49828d..37555c3 100644 --- a/gts-macros/tests/compile_fail/instance_const_wrong_type.rs +++ b/gts-macros/tests/compile_fail/instance_const_wrong_type.rs @@ -1,4 +1,4 @@ -//! Test: gts_well_known_instance applied to a const with wrong type (not &str) +//! Test: gts_well_known_instance rejects const items (only fn items are supported) use gts_macros::gts_well_known_instance; diff --git a/gts-macros/tests/compile_fail/instance_const_wrong_type.stderr b/gts-macros/tests/compile_fail/instance_const_wrong_type.stderr index db4defe..b01e8b5 100644 --- a/gts-macros/tests/compile_fail/instance_const_wrong_type.stderr +++ b/gts-macros/tests/compile_fail/instance_const_wrong_type.stderr @@ -1,5 +1,10 @@ -error: gts_well_known_instance: The annotated const must have type `&str`, got `u32`. Usage: `const NAME: &str = r#"{ ... }"#;` - --> tests/compile_fail/instance_const_wrong_type.rs:9:21 +error: gts_well_known_instance: Only `fn` items are supported. Usage: `fn get_instance_name_v1() -> SchemaType<()> { SchemaType { ... } }` + --> tests/compile_fail/instance_const_wrong_type.rs:5:1 | -9 | const ORDERS_TOPIC: u32 = 42; - | ^^^ +5 | / #[gts_well_known_instance( +6 | | dir_path = "instances", +7 | | id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" +8 | | )] + | |__^ + | + = note: this error originates in the attribute macro `gts_well_known_instance` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/gts-macros/tests/compile_fail/instance_missing_dir_path.rs b/gts-macros/tests/compile_fail/instance_missing_dir_path.rs index d1a359b..dc21852 100644 --- a/gts-macros/tests/compile_fail/instance_missing_dir_path.rs +++ b/gts-macros/tests/compile_fail/instance_missing_dir_path.rs @@ -5,6 +5,6 @@ use gts_macros::gts_well_known_instance; #[gts_well_known_instance( id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" )] -const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; +fn get_instance_orders_v1() -> () {} fn main() {} diff --git a/gts-macros/tests/compile_fail/instance_missing_instance_segment.rs b/gts-macros/tests/compile_fail/instance_missing_instance_segment.rs index e78c0bd..0603c05 100644 --- a/gts-macros/tests/compile_fail/instance_missing_instance_segment.rs +++ b/gts-macros/tests/compile_fail/instance_missing_instance_segment.rs @@ -6,6 +6,6 @@ use gts_macros::gts_well_known_instance; dir_path = "instances", instance_segment = "x.commerce._.orders.v1.0" )] -const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; +fn get_instance_orders_v1() -> () {} fn main() {} diff --git a/gts-macros/tests/compile_fail/instance_missing_schema_id.rs b/gts-macros/tests/compile_fail/instance_missing_schema_id.rs index cce979f..db13c8c 100644 --- a/gts-macros/tests/compile_fail/instance_missing_schema_id.rs +++ b/gts-macros/tests/compile_fail/instance_missing_schema_id.rs @@ -5,6 +5,6 @@ use gts_macros::gts_well_known_instance; #[gts_well_known_instance( dir_path = "instances" )] -const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; +fn get_instance_orders_v1() -> () {} fn main() {} diff --git a/gts-macros/tests/compile_fail/instance_on_non_const.rs b/gts-macros/tests/compile_fail/instance_on_non_const.rs index 205e904..e96a1dd 100644 --- a/gts-macros/tests/compile_fail/instance_on_non_const.rs +++ b/gts-macros/tests/compile_fail/instance_on_non_const.rs @@ -1,4 +1,4 @@ -//! Test: gts_well_known_instance applied to a non-const item (static) +//! Test: gts_well_known_instance rejects static items (only fn items are supported) use gts_macros::gts_well_known_instance; diff --git a/gts-macros/tests/compile_fail/instance_on_non_const.stderr b/gts-macros/tests/compile_fail/instance_on_non_const.stderr index 0f5e533..5eb5de2 100644 --- a/gts-macros/tests/compile_fail/instance_on_non_const.stderr +++ b/gts-macros/tests/compile_fail/instance_on_non_const.stderr @@ -1,4 +1,4 @@ -error: gts_well_known_instance: Only `const` items are supported. Usage: `const NAME: &str = r#"{ ... }"#;` +error: gts_well_known_instance: Only `fn` items are supported. Usage: `fn get_instance_name_v1() -> SchemaType<()> { SchemaType { ... } }` --> tests/compile_fail/instance_on_non_const.rs:5:1 | 5 | / #[gts_well_known_instance( diff --git a/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.rs b/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.rs index 3250b84..8cc56c3 100644 --- a/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.rs +++ b/gts-macros/tests/compile_fail/instance_schema_id_no_tilde.rs @@ -6,6 +6,6 @@ use gts_macros::gts_well_known_instance; dir_path = "instances", id = "gts.x.core.events.topic.v1.x.commerce._.orders.v1.0" )] -const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; +fn get_instance_orders_v1() -> () {} fn main() {} diff --git a/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.rs b/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.rs index 3dcb3bd..0adfa9b 100644 --- a/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.rs +++ b/gts-macros/tests/compile_fail/instance_segment_bare_wildcard.rs @@ -6,6 +6,6 @@ use gts_macros::gts_well_known_instance; dir_path = "instances", id = "gts.x.core.events.topic.v1~*" )] -const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; +fn get_instance_orders_v1() -> () {} fn main() {} diff --git a/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.rs b/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.rs index 5709197..22c4cda 100644 --- a/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.rs +++ b/gts-macros/tests/compile_fail/instance_segment_ends_with_tilde.rs @@ -6,6 +6,6 @@ use gts_macros::gts_well_known_instance; dir_path = "instances", id = "gts.x.core.events.topic.v1~" )] -const ORDERS_TOPIC: &str = r#"{"name": "orders"}"#; +fn get_instance_orders_v1() -> () {} fn main() {} diff --git a/gts/src/gts.rs b/gts/src/gts.rs index c0d9e36..0746a0e 100644 --- a/gts/src/gts.rs +++ b/gts/src/gts.rs @@ -580,6 +580,29 @@ impl schemars::JsonSchema for GtsInstanceId { } impl GtsInstanceId { + /// Sentinel value for use in `#[gts_well_known_instance]` function bodies. + /// + /// The CLI replaces this with the real instance ID (from the `id` attribute) + /// when generating the output JSON file. This const exists solely to satisfy + /// the Rust type system — its value is never written to disk. + /// + /// # Example + /// + /// ```ignore + /// #[gts_well_known_instance( + /// dir_path = "instances", + /// id = "gts.x.core.events.topic.v1~x.commerce._.orders.v1.0" + /// )] + /// fn get_instance_orders_v1() -> BaseEventTopicV1<()> { + /// BaseEventTopicV1 { + /// id: GtsInstanceId::ID, + /// name: String::from("orders"), + /// properties: (), + /// } + /// } + /// ``` + pub const ID: Self = Self(GtsEntityId(String::new())); + /// Returns the JSON Schema representation of `GtsInstanceId` as a `serde_json::Value`. /// /// This is the canonical schema definition used by both the schemars `JsonSchema` impl