diff --git a/.claude/settings.local.json b/.claude/settings.local.json index b71dc97..325cac7 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -16,10 +16,6 @@ "Bash(gh repo view:*)", "Bash(cargo build:*)", "Bash(cargo search:*)", - "WebFetch(domain:docs.rs)", - "WebFetch(domain:github.com)", - "WebFetch(domain:raw.githubusercontent.com)", - "WebFetch(domain:index.crates.io)" ] } } diff --git a/crates/codebook-config/src/helpers.rs b/crates/codebook-config/src/helpers.rs index 393eaac..5c9cb23 100644 --- a/crates/codebook-config/src/helpers.rs +++ b/crates/codebook-config/src/helpers.rs @@ -57,7 +57,7 @@ pub(crate) fn unix_cache_dir() -> PathBuf { /// Compile user-provided ignore regex patterns, dropping invalid entries. /// Patterns are compiled with multiline mode so `^` and `$` match line boundaries. -pub(crate) fn build_ignore_regexes(patterns: &[String]) -> Vec { +pub fn build_ignore_regexes(patterns: &[String]) -> Vec { patterns .iter() .filter_map( diff --git a/crates/codebook-config/src/lib.rs b/crates/codebook-config/src/lib.rs index 29ab8f4..0405c08 100644 --- a/crates/codebook-config/src/lib.rs +++ b/crates/codebook-config/src/lib.rs @@ -1,9 +1,8 @@ -mod helpers; -mod settings; +pub mod helpers; +pub mod settings; mod watched_file; use crate::helpers::expand_tilde; pub use crate::settings::ConfigSettings; - use crate::watched_file::WatchedFile; use log::debug; use log::info; @@ -35,6 +34,12 @@ pub trait CodebookConfig: Sync + Send + Debug { fn get_min_word_length(&self) -> usize; fn should_check_tag(&self, tag: &str) -> bool; fn cache_dir(&self) -> &Path; + + /// Resolve settings with overrides applied for a specific file path. + /// Returns None if no overrides match (callers should use base config methods). + fn resolve_for_file(&self, _relative_path: &Path) -> Option> { + None + } } /// Internal mutable state @@ -546,6 +551,22 @@ impl CodebookConfig for CodebookConfigFile { fn cache_dir(&self) -> &Path { &self.cache_dir } + + /// Resolve settings with overrides applied for a specific file path. + fn resolve_for_file(&self, relative_path: &Path) -> Option> { + let snapshot = self.snapshot(); + if snapshot.overrides.is_empty() { + return None; + } + if !snapshot + .overrides + .iter() + .any(|o| o.matches_path(relative_path)) + { + return None; + } + Some(Arc::new(snapshot.resolve_for_path(relative_path))) + } } #[derive(Debug)] @@ -1188,4 +1209,263 @@ mod tests { Ok(()) } + + // --- Override integration tests --- + + #[test] + fn test_resolve_for_file_no_overrides() { + let config = CodebookConfigFile::default(); + { + let mut inner = config.inner.write().unwrap(); + let settings = ConfigSettings { + words: vec!["base".to_string()], + ..Default::default() + }; + inner.project_config = inner.project_config.clone().with_content_value(settings); + CodebookConfigFile::rebuild_snapshot(&mut inner); + } + + // No overrides, should return None + assert!(config + .resolve_for_file(Path::new("src/main.rs")) + .is_none()); + } + + #[test] + fn test_resolve_for_file_with_matching_override() -> Result<(), io::Error> { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("codebook.toml"); + let mut file = File::create(&config_path)?; + write!( + file, + r#" + words = ["base"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["markdown"] + "# + )?; + + let config = load_from_file(ConfigType::Project, &config_path)?; + + // .md file should get override + let resolved = config.resolve_for_file(Path::new("README.md")); + assert!(resolved.is_some()); + let settings = resolved.unwrap(); + assert!(settings.is_allowed_word("base")); + assert!(settings.is_allowed_word("markdown")); + + // .rs file should not match + assert!(config + .resolve_for_file(Path::new("src/main.rs")) + .is_none()); + + Ok(()) + } + + #[test] + fn test_resolve_for_file_global_and_project_overrides() -> Result<(), io::Error> { + let global_temp = TempDir::new().unwrap(); + let project_temp = TempDir::new().unwrap(); + + // Global config with an override + let global_config_path = global_temp.path().join("codebook.toml"); + fs::write( + &global_config_path, + r#" + words = ["globalbase"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["fromglobal"] + "#, + )?; + + // Project config with an override + let project_config_path = project_temp.path().join("codebook.toml"); + fs::write( + &project_config_path, + r#" + words = ["projectbase"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["fromproject"] + "#, + )?; + + // Load both configs + let config = CodebookConfigFile::default(); + { + let mut inner = config.inner.write().unwrap(); + if let Ok(global_settings) = + CodebookConfigFile::load_settings_from_file(&global_config_path) + { + inner.global_config = WatchedFile::new(Some(global_config_path)) + .with_content_value(global_settings); + } + if let Ok(project_settings) = + CodebookConfigFile::load_settings_from_file(&project_config_path) + { + inner.project_config = WatchedFile::new(Some(project_config_path)) + .with_content_value(project_settings); + } + let effective = CodebookConfigFile::calculate_effective_settings( + &inner.project_config, + &inner.global_config, + ); + inner.snapshot = Arc::new(effective); + } + + // Resolve for a .md file — both overrides should apply + let resolved = config.resolve_for_file(Path::new("docs/guide.md")); + assert!(resolved.is_some()); + let settings = resolved.unwrap(); + assert!(settings.is_allowed_word("globalbase")); + assert!(settings.is_allowed_word("projectbase")); + assert!(settings.is_allowed_word("fromglobal")); + assert!(settings.is_allowed_word("fromproject")); + + Ok(()) + } + + #[test] + fn test_resolve_for_file_use_global_false_ignores_global_overrides() -> Result<(), io::Error> { + let global_temp = TempDir::new().unwrap(); + let project_temp = TempDir::new().unwrap(); + + let global_config_path = global_temp.path().join("codebook.toml"); + fs::write( + &global_config_path, + r#" + words = ["globalbase"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["fromglobal"] + "#, + )?; + + let project_config_path = project_temp.path().join("codebook.toml"); + fs::write( + &project_config_path, + r#" + words = ["projectbase"] + use_global = false + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["fromproject"] + "#, + )?; + + let config = CodebookConfigFile::default(); + { + let mut inner = config.inner.write().unwrap(); + if let Ok(global_settings) = + CodebookConfigFile::load_settings_from_file(&global_config_path) + { + inner.global_config = WatchedFile::new(Some(global_config_path)) + .with_content_value(global_settings); + } + if let Ok(project_settings) = + CodebookConfigFile::load_settings_from_file(&project_config_path) + { + inner.project_config = WatchedFile::new(Some(project_config_path)) + .with_content_value(project_settings); + } + let effective = CodebookConfigFile::calculate_effective_settings( + &inner.project_config, + &inner.global_config, + ); + inner.snapshot = Arc::new(effective); + } + + // With use_global = false, global overrides should be ignored + let resolved = config.resolve_for_file(Path::new("README.md")); + assert!(resolved.is_some()); + let settings = resolved.unwrap(); + assert!(settings.is_allowed_word("projectbase")); + assert!(settings.is_allowed_word("fromproject")); + // Global words and overrides should NOT be present + assert!(!settings.is_allowed_word("globalbase")); + assert!(!settings.is_allowed_word("fromglobal")); + + Ok(()) + } + + #[test] + fn test_save_preserves_overrides() -> Result<(), io::Error> { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("codebook.toml"); + fs::write( + &config_path, + r#" + words = ["base"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["markdown"] + "#, + )?; + + let config = load_from_file(ConfigType::Project, &config_path)?; + + // Add a word and save + config.add_word("newword")?; + config.save()?; + + // Reload and verify overrides are preserved + let reloaded = load_from_file(ConfigType::Project, &config_path)?; + assert!(reloaded.is_allowed_word("base")); + assert!(reloaded.is_allowed_word("newword")); + + // Override should still work + let resolved = reloaded.resolve_for_file(Path::new("README.md")); + assert!(resolved.is_some()); + assert!(resolved.unwrap().is_allowed_word("markdown")); + + Ok(()) + } + + #[test] + fn test_reload_picks_up_override_changes() -> Result<(), io::Error> { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("codebook.toml"); + fs::write( + &config_path, + r#" + words = ["base"] + "#, + )?; + + let config = load_from_file(ConfigType::Project, &config_path)?; + + // No overrides initially + assert!(config + .resolve_for_file(Path::new("README.md")) + .is_none()); + + // Update config with overrides + fs::write( + &config_path, + r#" + words = ["base"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["markdown"] + "#, + )?; + + config.reload()?; + + // Now overrides should apply + let resolved = config.resolve_for_file(Path::new("README.md")); + assert!(resolved.is_some()); + assert!(resolved.unwrap().is_allowed_word("markdown")); + + Ok(()) + } } diff --git a/crates/codebook-config/src/settings.rs b/crates/codebook-config/src/settings.rs index 38d3953..78001dc 100644 --- a/crates/codebook-config/src/settings.rs +++ b/crates/codebook-config/src/settings.rs @@ -1,6 +1,125 @@ use glob::Pattern; +use log::warn; use serde::{Deserialize, Serialize}; use std::path::Path; + +/// A single `[[overrides]]` block in the config file. +#[derive(Debug, Serialize, Clone, PartialEq)] +pub struct OverrideBlock { + /// Required: glob patterns matched against file path relative to project root + pub paths: Vec, + + // --- Replace fields (replace the base list entirely) --- + #[serde(default, skip_serializing_if = "Option::is_none")] + pub dictionaries: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub words: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub flag_words: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub ignore_patterns: Option>, + + // --- Append fields (append to the resolved list) --- + #[serde(default, skip_serializing_if = "Option::is_none")] + pub extra_dictionaries: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub extra_words: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub extra_flag_words: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub extra_ignore_patterns: Option>, +} + +impl<'de> Deserialize<'de> for OverrideBlock { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + fn to_lowercase_vec(v: Vec) -> Vec { + v.into_iter().map(|s| s.to_ascii_lowercase()).collect() + } + + fn to_lowercase_opt(v: Option>) -> Option> { + v.map(to_lowercase_vec) + } + + #[derive(Deserialize)] + struct Helper { + #[serde(default)] + paths: Vec, + #[serde(default)] + dictionaries: Option>, + #[serde(default)] + words: Option>, + #[serde(default)] + flag_words: Option>, + #[serde(default)] + ignore_patterns: Option>, + #[serde(default)] + extra_dictionaries: Option>, + #[serde(default)] + extra_words: Option>, + #[serde(default)] + extra_flag_words: Option>, + #[serde(default)] + extra_ignore_patterns: Option>, + } + + let helper = Helper::deserialize(deserializer)?; + Ok(OverrideBlock { + paths: helper.paths, + // Lowercase word-related fields + dictionaries: to_lowercase_opt(helper.dictionaries), + words: to_lowercase_opt(helper.words), + flag_words: to_lowercase_opt(helper.flag_words), + extra_dictionaries: to_lowercase_opt(helper.extra_dictionaries), + extra_words: to_lowercase_opt(helper.extra_words), + extra_flag_words: to_lowercase_opt(helper.extra_flag_words), + // Don't lowercase patterns or paths + ignore_patterns: helper.ignore_patterns, + extra_ignore_patterns: helper.extra_ignore_patterns, + }) + } +} + +impl OverrideBlock { + /// Returns true if this override block is valid (has non-empty paths with at least one valid glob). + pub fn is_valid(&self) -> bool { + if self.paths.is_empty() { + return false; + } + self.paths.iter().any(|p| Pattern::new(p).is_ok()) + } + + /// Check if this override applies to the given relative file path. + pub fn matches_path(&self, relative_path: &Path) -> bool { + let path_str = relative_path.to_string_lossy(); + self.paths.iter().any(|pattern| { + Pattern::new(pattern) + .map(|p| p.matches(&path_str)) + .unwrap_or(false) + }) + } + + /// Returns true if any field besides `paths` is set (the override has an effect). + pub fn has_effect(&self) -> bool { + self.dictionaries.is_some() + || self.words.is_some() + || self.flag_words.is_some() + || self.ignore_patterns.is_some() + || self.extra_dictionaries.is_some() + || self.extra_words.is_some() + || self.extra_flag_words.is_some() + || self.extra_ignore_patterns.is_some() + } +} + #[derive(Debug, Serialize, Clone, PartialEq)] pub struct ConfigSettings { /// List of dictionaries to use for spell checking @@ -48,6 +167,10 @@ pub struct ConfigSettings { /// Tag prefixes to exclude (takes precedence over include_tags) #[serde(default, skip_serializing_if = "Vec::is_empty")] pub exclude_tags: Vec, + + /// Scoped configuration overrides + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub overrides: Vec, } fn default_use_global() -> bool { @@ -79,6 +202,7 @@ impl Default for ConfigSettings { min_word_length: default_min_word_length(), include_tags: Vec::new(), exclude_tags: Vec::new(), + overrides: Vec::new(), } } } @@ -110,12 +234,32 @@ impl<'de> Deserialize<'de> for ConfigSettings { include_tags: Vec, #[serde(default)] exclude_tags: Vec, + #[serde(default)] + overrides: Vec, } // Dictionary IDs are language codes (e.g. "en_US") — normalize to lowercase // so lookups are case-insensitive. Word lists keep their original casing and // are compared via unicase::eq. let helper = Helper::deserialize(deserializer)?; + + // Filter out invalid override blocks + let overrides: Vec = helper + .overrides + .into_iter() + .filter(|o| { + if !o.is_valid() { + warn!("Skipping invalid override block (empty or invalid paths)"); + return false; + } + if !o.has_effect() { + warn!("Skipping no-op override block (no settings specified)"); + return false; + } + true + }) + .collect(); + Ok(ConfigSettings { dictionaries: helper .dictionaries @@ -131,12 +275,14 @@ impl<'de> Deserialize<'de> for ConfigSettings { min_word_length: helper.min_word_length, include_tags: helper.include_tags, exclude_tags: helper.exclude_tags, + overrides, }) } } impl ConfigSettings { - /// Merge another config settings into this one, sorting and deduplicating all collections + /// Merge another config settings into this one, sorting and deduplicating all collections. + /// Overrides are appended (preserving order: self's overrides first, then other's). pub fn merge(&mut self, other: ConfigSettings) { // Add items from the other config self.dictionaries.extend(other.dictionaries); @@ -148,6 +294,9 @@ impl ConfigSettings { self.include_tags.extend(other.include_tags); self.exclude_tags.extend(other.exclude_tags); + // Append overrides (global first, then project — order matters) + self.overrides.extend(other.overrides); + // The use_global setting from the other config is ignored during merging // as this is a per-config setting @@ -156,11 +305,11 @@ impl ConfigSettings { self.min_word_length = other.min_word_length; } - // Sort and deduplicate each collection + // Sort and deduplicate each collection (but NOT overrides) self.sort_and_dedup(); } - /// Sort and deduplicate all collections in the config + /// Sort and deduplicate all collections in the config (but not overrides). pub fn sort_and_dedup(&mut self) { // Sort and deduplicate each Vec sort_and_dedup(&mut self.dictionaries); @@ -171,6 +320,54 @@ impl ConfigSettings { sort_and_dedup(&mut self.ignore_patterns); sort_and_dedup(&mut self.include_tags); sort_and_dedup(&mut self.exclude_tags); + // Note: overrides are NOT sorted — order matters for resolution + } + + /// Apply a single override block to this settings (mutates in place). + /// Replace fields are applied first, then append fields. + pub fn apply_override(&mut self, ovr: &OverrideBlock) { + // Replace fields: fully replace the list + if let Some(ref v) = ovr.dictionaries { + self.dictionaries = v.clone(); + } + if let Some(ref v) = ovr.words { + self.words = v.clone(); + } + if let Some(ref v) = ovr.flag_words { + self.flag_words = v.clone(); + } + if let Some(ref v) = ovr.ignore_patterns { + self.ignore_patterns = v.clone(); + } + + // Append fields: extend the current list + if let Some(ref v) = ovr.extra_dictionaries { + self.dictionaries.extend(v.clone()); + } + if let Some(ref v) = ovr.extra_words { + self.words.extend(v.clone()); + } + if let Some(ref v) = ovr.extra_flag_words { + self.flag_words.extend(v.clone()); + } + if let Some(ref v) = ovr.extra_ignore_patterns { + self.ignore_patterns.extend(v.clone()); + } + } + + /// Resolve the effective settings for a specific file path by applying matching overrides. + /// Returns a new ConfigSettings with overrides applied and the overrides list cleared. + pub fn resolve_for_path(&self, path: &Path) -> ConfigSettings { + let mut resolved = self.clone(); + resolved.overrides = vec![]; // Resolved config has no overrides + + for ovr in &self.overrides { + if ovr.matches_path(path) { + resolved.apply_override(ovr); + } + } + + resolved } } @@ -306,6 +503,7 @@ mod tests { assert_eq!(config.ignore_patterns, Vec::::new()); assert!(config.use_global); assert_eq!(config.min_word_length, 3); + assert!(config.overrides.is_empty()); } #[test] @@ -672,4 +870,310 @@ mod tests { assert_eq!(config.ignore_patterns, Vec::::new()); assert!(config.use_global); } + + // --- Override tests --- + + #[test] + fn test_override_block_deserialization() { + let toml_str = r#" + words = ["base"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["Markdown"] + dictionaries = ["EN_GB"] + "#; + + let config: ConfigSettings = toml::from_str(toml_str).unwrap(); + assert_eq!(config.overrides.len(), 1); + let ovr = &config.overrides[0]; + assert_eq!(ovr.paths, vec!["**/*.md"]); + assert_eq!(ovr.extra_words, Some(vec!["markdown".to_string()])); // lowercased + assert_eq!(ovr.dictionaries, Some(vec!["en_gb".to_string()])); // lowercased + assert_eq!(ovr.words, None); + assert_eq!(ovr.ignore_patterns, None); + } + + #[test] + fn test_override_block_empty_paths_skipped() { + let toml_str = r#" + [[overrides]] + paths = [] + extra_words = ["test"] + "#; + + let config: ConfigSettings = toml::from_str(toml_str).unwrap(); + assert!(config.overrides.is_empty()); + } + + #[test] + fn test_override_block_no_effect_skipped() { + let toml_str = r#" + [[overrides]] + paths = ["**/*.md"] + "#; + + let config: ConfigSettings = toml::from_str(toml_str).unwrap(); + assert!(config.overrides.is_empty()); + } + + #[test] + fn test_override_matches_path() { + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string(), "docs/**/*".to_string()], + extra_words: Some(vec!["test".to_string()]), + ..OverrideBlock::default_for_test() + }; + + assert!(ovr.matches_path(Path::new("README.md"))); + assert!(ovr.matches_path(Path::new("src/guide.md"))); + assert!(ovr.matches_path(Path::new("docs/api/index.html"))); + assert!(!ovr.matches_path(Path::new("src/main.rs"))); + } + + #[test] + fn test_apply_override_replace() { + let mut settings = ConfigSettings { + words: vec!["alpha".to_string(), "beta".to_string()], + ..Default::default() + }; + + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string()], + words: Some(vec!["gamma".to_string()]), + ..OverrideBlock::default_for_test() + }; + + settings.apply_override(&ovr); + assert_eq!(settings.words, vec!["gamma"]); + } + + #[test] + fn test_apply_override_append() { + let mut settings = ConfigSettings { + words: vec!["alpha".to_string(), "beta".to_string()], + ..Default::default() + }; + + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["gamma".to_string()]), + ..OverrideBlock::default_for_test() + }; + + settings.apply_override(&ovr); + assert_eq!(settings.words, vec!["alpha", "beta", "gamma"]); + } + + #[test] + fn test_apply_override_replace_then_append() { + let mut settings = ConfigSettings { + words: vec!["alpha".to_string(), "beta".to_string()], + ..Default::default() + }; + + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string()], + words: Some(vec!["gamma".to_string()]), + extra_words: Some(vec!["delta".to_string()]), + ..OverrideBlock::default_for_test() + }; + + settings.apply_override(&ovr); + assert_eq!(settings.words, vec!["gamma", "delta"]); + } + + #[test] + fn test_apply_override_no_change() { + let mut settings = ConfigSettings { + words: vec!["alpha".to_string()], + dictionaries: vec!["en_us".to_string()], + ..Default::default() + }; + + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_flag_words: Some(vec!["hack".to_string()]), + ..OverrideBlock::default_for_test() + }; + + settings.apply_override(&ovr); + // words and dictionaries unchanged + assert_eq!(settings.words, vec!["alpha"]); + assert_eq!(settings.dictionaries, vec!["en_us"]); + // flag_words changed + assert_eq!(settings.flag_words, vec!["hack"]); + } + + #[test] + fn test_resolve_for_path_no_match() { + let settings = ConfigSettings { + words: vec!["base".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["markdown".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let resolved = settings.resolve_for_path(Path::new("src/main.rs")); + assert_eq!(resolved.words, vec!["base"]); + assert!(resolved.overrides.is_empty()); + } + + #[test] + fn test_resolve_for_path_single_match() { + let settings = ConfigSettings { + words: vec!["base".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["markdown".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let resolved = settings.resolve_for_path(Path::new("README.md")); + assert_eq!(resolved.words, vec!["base", "markdown"]); + assert!(resolved.overrides.is_empty()); + } + + #[test] + fn test_resolve_for_path_multiple_matches() { + let settings = ConfigSettings { + words: vec!["base".to_string()], + overrides: vec![ + OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["markdown".to_string()]), + ..OverrideBlock::default_for_test() + }, + OverrideBlock { + paths: vec!["docs/**/*".to_string()], + extra_words: Some(vec!["documentation".to_string()]), + ..OverrideBlock::default_for_test() + }, + ], + ..Default::default() + }; + + let resolved = settings.resolve_for_path(Path::new("docs/guide.md")); + assert_eq!(resolved.words, vec!["base", "markdown", "documentation"]); + } + + #[test] + fn test_resolve_for_path_replace_overrides_base() { + let settings = ConfigSettings { + dictionaries: vec!["en_us".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["docs/de/**/*".to_string()], + dictionaries: Some(vec!["de".to_string()]), + extra_words: Some(vec!["codebook".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let resolved = settings.resolve_for_path(Path::new("docs/de/guide.md")); + assert_eq!(resolved.dictionaries, vec!["de"]); + assert_eq!(resolved.words, vec!["codebook"]); + } + + #[test] + fn test_merge_preserves_override_order() { + let mut global = ConfigSettings { + words: vec!["global".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["from_global".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let project = ConfigSettings { + words: vec!["project".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["from_project".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + global.merge(project); + + // Overrides should be: global first, then project + assert_eq!(global.overrides.len(), 2); + assert_eq!( + global.overrides[0].extra_words, + Some(vec!["from_global".to_string()]) + ); + assert_eq!( + global.overrides[1].extra_words, + Some(vec!["from_project".to_string()]) + ); + } + + #[test] + fn test_serialization_with_overrides() { + let config = ConfigSettings { + words: vec!["base".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["markdown".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let serialized = toml::to_string_pretty(&config).unwrap(); + let deserialized: ConfigSettings = toml::from_str(&serialized).unwrap(); + + assert_eq!(config, deserialized); + } + + #[test] + fn test_config_settings_query_methods() { + let settings = ConfigSettings { + dictionaries: vec!["en_us".to_string()], + words: vec!["codebook".to_string()], + flag_words: vec!["todo".to_string()], + min_word_length: 4, + ..Default::default() + }; + + assert_eq!(settings.dictionary_ids(), vec!["en_us"]); + assert!(settings.is_allowed_word("codebook")); + assert!(settings.is_allowed_word("CODEBOOK")); // case insensitive + assert!(!settings.is_allowed_word("unknown")); + assert!(settings.should_flag_word("todo")); + assert!(settings.should_flag_word("TODO")); // case insensitive + assert!(!settings.should_flag_word("done")); + assert_eq!(settings.min_word_length(), 4); + } + + #[test] + fn test_dictionary_ids_default() { + let settings = ConfigSettings::default(); + assert_eq!(settings.dictionary_ids(), vec!["en_us"]); + } + + impl OverrideBlock { + /// Helper for tests: creates an OverrideBlock with all fields set to None/empty. + fn default_for_test() -> Self { + Self { + paths: vec![], + dictionaries: None, + words: None, + flag_words: None, + ignore_patterns: None, + extra_dictionaries: None, + extra_words: None, + extra_flag_words: None, + extra_ignore_patterns: None, + } + } + } } diff --git a/crates/codebook/src/checker.rs b/crates/codebook/src/checker.rs index 95a2dff..ff04e8a 100644 --- a/crates/codebook/src/checker.rs +++ b/crates/codebook/src/checker.rs @@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet}; use crate::dictionaries::dictionary::Dictionary; use crate::parser::{TextRange, WordLocation}; -use codebook_config::CodebookConfig; +use codebook_config::{CodebookConfig, ConfigSettings}; /// A candidate word extracted from a text node, with its position /// in original-document byte offsets. Borrows the word text from the @@ -16,11 +16,13 @@ pub struct WordCandidate<'a> { /// Check candidate words against dictionaries and config rules. /// Returns WordLocations for misspelled words, grouping all locations -/// of the same word together. +/// of the same word together. When `resolved` is Some, its per-file +/// settings take precedence over the base config for flag/allow/min-length. pub fn check_words( candidates: &[WordCandidate<'_>], dictionaries: &[std::sync::Arc], config: &dyn CodebookConfig, + resolved: Option<&ConfigSettings>, ) -> Vec { // Group candidates by word text, deduplicating identical spans. let mut word_positions: HashMap<&str, HashSet> = HashMap::new(); @@ -41,18 +43,31 @@ pub fn check_words( ); } + let should_flag = |w: &str| match resolved { + Some(s) => s.should_flag_word(w), + None => config.should_flag_word(w), + }; + let min_word_length = match resolved { + Some(s) => s.min_word_length(), + None => config.get_min_word_length(), + }; + let is_allowed = |w: &str| match resolved { + Some(s) => s.is_allowed_word(w), + None => config.is_allowed_word(w), + }; + // Check each unique word once let mut results = Vec::new(); for (word, positions) in word_positions { let positions: Vec = positions.into_iter().collect(); - if config.should_flag_word(word) { + if should_flag(word) { results.push(WordLocation::new(word.to_string(), positions)); continue; } - if word.len() < config.get_min_word_length() { + if word.len() < min_word_length { continue; } - if config.is_allowed_word(word) { + if is_allowed(word) { continue; } let is_correct = dictionaries.iter().any(|dict| dict.check(word)); @@ -85,7 +100,7 @@ mod tests { let dict = Arc::new(TextDictionary::new("hello\nworld\n")); let config = Arc::new(codebook_config::CodebookConfigMemory::default()); let candidates = make_candidates(&[("hello", 0, 5), ("wrld", 6, 10)]); - let results = check_words(&candidates, &[dict], config.as_ref()); + let results = check_words(&candidates, &[dict], config.as_ref(), None); assert_eq!(results.len(), 1); assert_eq!(results[0].word, "wrld"); } @@ -95,7 +110,7 @@ mod tests { let dict = Arc::new(TextDictionary::new("hello\n")); let config = Arc::new(codebook_config::CodebookConfigMemory::default()); let candidates = make_candidates(&[("wrld", 0, 4), ("wrld", 10, 14)]); - let results = check_words(&candidates, &[dict], config.as_ref()); + let results = check_words(&candidates, &[dict], config.as_ref(), None); assert_eq!(results.len(), 1); assert_eq!(results[0].word, "wrld"); assert_eq!(results[0].locations.len(), 2); @@ -107,7 +122,7 @@ mod tests { let config = Arc::new(codebook_config::CodebookConfigMemory::default()); // Default min word length is 3 let candidates = make_candidates(&[("ab", 0, 2)]); - let results = check_words(&candidates, &[dict], config.as_ref()); + let results = check_words(&candidates, &[dict], config.as_ref(), None); assert!(results.is_empty(), "Short words should be skipped"); } @@ -117,7 +132,7 @@ mod tests { let config = Arc::new(codebook_config::CodebookConfigMemory::default()); config.add_word("codebook").unwrap(); let candidates = make_candidates(&[("codebook", 0, 8)]); - let results = check_words(&candidates, &[dict], config.as_ref()); + let results = check_words(&candidates, &[dict], config.as_ref(), None); assert!(results.is_empty(), "Allowed words should not be flagged"); } } diff --git a/crates/codebook/src/lib.rs b/crates/codebook/src/lib.rs index 19cfe95..6a65364 100644 --- a/crates/codebook/src/lib.rs +++ b/crates/codebook/src/lib.rs @@ -11,7 +11,8 @@ use std::collections::HashSet; use std::path::Path; use std::sync::Arc; -use codebook_config::CodebookConfig; +use codebook_config::helpers::build_ignore_regexes; +use codebook_config::{CodebookConfig, ConfigSettings}; use dictionaries::{dictionary, manager::DictionaryManager}; use dictionary::Dictionary; use log::debug; @@ -39,6 +40,7 @@ impl Codebook { language: Option, file_path: Option<&str>, ) -> Vec { + // ignore_paths and include_paths are evaluated BEFORE overrides if let Some(file_path) = file_path { if self.config.should_ignore_path(Path::new(file_path)) { return Vec::new(); @@ -48,11 +50,16 @@ impl Codebook { } } + // Resolve per-file settings (applies matching overrides) + let resolved = file_path.and_then(|fp| self.config.resolve_for_file(Path::new(fp))); + let language = self.resolve_language(language, file_path); // Combine default and user skip patterns let mut all_patterns = get_default_skip_patterns().clone(); - if let Some(user_patterns) = self.config.get_ignore_patterns() { + if let Some(ref settings) = resolved { + all_patterns.extend(build_ignore_regexes(&settings.ignore_patterns)); + } else if let Some(user_patterns) = self.config.get_ignore_patterns() { all_patterns.extend(user_patterns); } @@ -64,11 +71,17 @@ impl Codebook { &all_patterns, ); - // Load dictionaries for all languages encountered - let dictionaries = self.get_dictionaries_for_languages(&languages_found); + // Load dictionaries for all languages encountered (using resolved settings if any) + let dictionaries = + self.get_dictionaries_for_languages(&languages_found, resolved.as_deref()); // Check words against dictionaries - checker::check_words(&candidates, &dictionaries, self.config.as_ref()) + checker::check_words( + &candidates, + &dictionaries, + self.config.as_ref(), + resolved.as_deref(), + ) } fn resolve_language( @@ -86,11 +99,16 @@ impl Codebook { } /// Gather dictionaries for all languages encountered in a file. + /// If `resolved` is Some, its dictionary list is used in place of the base config's. fn get_dictionaries_for_languages( &self, languages: &HashSet, + resolved: Option<&ConfigSettings>, ) -> Vec> { - let mut dictionary_ids = self.config.get_dictionary_ids(); + let mut dictionary_ids = match resolved { + Some(settings) => settings.dictionary_ids(), + None => self.config.get_dictionary_ids(), + }; for lang in languages { dictionary_ids.extend(lang.dictionary_ids()); @@ -119,7 +137,7 @@ impl Codebook { pub fn get_suggestions(&self, word: &str) -> Option> { let max_results = 5; - let dictionaries = self.get_dictionaries_for_languages(&HashSet::new()); + let dictionaries = self.get_dictionaries_for_languages(&HashSet::new(), None); let mut is_misspelled = false; let suggestions: Vec> = dictionaries .iter()