From 6e62d5421aa81482f085b8af3b609958d3e1ad4a Mon Sep 17 00:00:00 2001 From: Bo Lopker Date: Mon, 9 Feb 2026 09:30:10 -0800 Subject: [PATCH 1/2] Fist cut --- .claude/settings.local.json | 3 +- crates/codebook-config/src/helpers.rs | 2 +- crates/codebook-config/src/lib.rs | 287 ++++++++++++- crates/codebook-config/src/settings.rs | 541 ++++++++++++++++++++++++- crates/codebook/src/lib.rs | 72 +++- 5 files changed, 882 insertions(+), 23 deletions(-) diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 59fcdd2f..8a2a8106 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -13,7 +13,8 @@ "Bash(git remote get-url:*)", "Bash(gh issue list:*)", "Bash(gh issue view:*)", - "Bash(gh repo view:*)" + "Bash(gh repo view:*)", + "Bash(command make build)" ] } } diff --git a/crates/codebook-config/src/helpers.rs b/crates/codebook-config/src/helpers.rs index 53d2b2c5..abc1035a 100644 --- a/crates/codebook-config/src/helpers.rs +++ b/crates/codebook-config/src/helpers.rs @@ -114,7 +114,7 @@ pub(crate) fn should_flag_word(settings: &ConfigSettings, word: &str) -> bool { /// Compile user-provided ignore regex patterns, dropping invalid entries. /// Patterns are compiled with multiline mode so `^` and `$` match line boundaries. -pub(crate) fn build_ignore_regexes(patterns: &[String]) -> Vec { +pub fn build_ignore_regexes(patterns: &[String]) -> Vec { patterns .iter() .filter_map( diff --git a/crates/codebook-config/src/lib.rs b/crates/codebook-config/src/lib.rs index 084b07e9..2f0b40fa 100644 --- a/crates/codebook-config/src/lib.rs +++ b/crates/codebook-config/src/lib.rs @@ -1,8 +1,8 @@ -mod helpers; -mod settings; +pub mod helpers; +pub mod settings; mod watched_file; use crate::helpers::expand_tilde; -use crate::settings::ConfigSettings; +pub use crate::settings::ConfigSettings; use crate::watched_file::WatchedFile; use log::debug; use log::info; @@ -31,6 +31,12 @@ pub trait CodebookConfig: Sync + Send + Debug { fn get_ignore_patterns(&self) -> Option>; fn get_min_word_length(&self) -> usize; fn cache_dir(&self) -> &Path; + + /// Resolve settings with overrides applied for a specific file path. + /// Returns None if no overrides match (callers should use base config methods). + fn resolve_for_file(&self, _relative_path: &Path) -> Option> { + None + } } /// Internal mutable state @@ -527,6 +533,22 @@ impl CodebookConfig for CodebookConfigFile { fn cache_dir(&self) -> &Path { &self.cache_dir } + + /// Resolve settings with overrides applied for a specific file path. + fn resolve_for_file(&self, relative_path: &Path) -> Option> { + let snapshot = self.snapshot(); + if snapshot.overrides.is_empty() { + return None; + } + if !snapshot + .overrides + .iter() + .any(|o| o.matches_path(relative_path)) + { + return None; + } + Some(Arc::new(snapshot.resolve_for_path(relative_path))) + } } #[derive(Debug)] @@ -1123,4 +1145,263 @@ mod tests { Ok(()) } + + // --- Override integration tests --- + + #[test] + fn test_resolve_for_file_no_overrides() { + let config = CodebookConfigFile::default(); + { + let mut inner = config.inner.write().unwrap(); + let settings = ConfigSettings { + words: vec!["base".to_string()], + ..Default::default() + }; + inner.project_config = inner.project_config.clone().with_content_value(settings); + CodebookConfigFile::rebuild_snapshot(&mut inner); + } + + // No overrides, should return None + assert!(config + .resolve_for_file(Path::new("src/main.rs")) + .is_none()); + } + + #[test] + fn test_resolve_for_file_with_matching_override() -> Result<(), io::Error> { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("codebook.toml"); + let mut file = File::create(&config_path)?; + write!( + file, + r#" + words = ["base"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["markdown"] + "# + )?; + + let config = load_from_file(ConfigType::Project, &config_path)?; + + // .md file should get override + let resolved = config.resolve_for_file(Path::new("README.md")); + assert!(resolved.is_some()); + let settings = resolved.unwrap(); + assert!(settings.is_allowed_word("base")); + assert!(settings.is_allowed_word("markdown")); + + // .rs file should not match + assert!(config + .resolve_for_file(Path::new("src/main.rs")) + .is_none()); + + Ok(()) + } + + #[test] + fn test_resolve_for_file_global_and_project_overrides() -> Result<(), io::Error> { + let global_temp = TempDir::new().unwrap(); + let project_temp = TempDir::new().unwrap(); + + // Global config with an override + let global_config_path = global_temp.path().join("codebook.toml"); + fs::write( + &global_config_path, + r#" + words = ["globalbase"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["fromglobal"] + "#, + )?; + + // Project config with an override + let project_config_path = project_temp.path().join("codebook.toml"); + fs::write( + &project_config_path, + r#" + words = ["projectbase"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["fromproject"] + "#, + )?; + + // Load both configs + let config = CodebookConfigFile::default(); + { + let mut inner = config.inner.write().unwrap(); + if let Ok(global_settings) = + CodebookConfigFile::load_settings_from_file(&global_config_path) + { + inner.global_config = WatchedFile::new(Some(global_config_path)) + .with_content_value(global_settings); + } + if let Ok(project_settings) = + CodebookConfigFile::load_settings_from_file(&project_config_path) + { + inner.project_config = WatchedFile::new(Some(project_config_path)) + .with_content_value(project_settings); + } + let effective = CodebookConfigFile::calculate_effective_settings( + &inner.project_config, + &inner.global_config, + ); + inner.snapshot = Arc::new(effective); + } + + // Resolve for a .md file — both overrides should apply + let resolved = config.resolve_for_file(Path::new("docs/guide.md")); + assert!(resolved.is_some()); + let settings = resolved.unwrap(); + assert!(settings.is_allowed_word("globalbase")); + assert!(settings.is_allowed_word("projectbase")); + assert!(settings.is_allowed_word("fromglobal")); + assert!(settings.is_allowed_word("fromproject")); + + Ok(()) + } + + #[test] + fn test_resolve_for_file_use_global_false_ignores_global_overrides() -> Result<(), io::Error> { + let global_temp = TempDir::new().unwrap(); + let project_temp = TempDir::new().unwrap(); + + let global_config_path = global_temp.path().join("codebook.toml"); + fs::write( + &global_config_path, + r#" + words = ["globalbase"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["fromglobal"] + "#, + )?; + + let project_config_path = project_temp.path().join("codebook.toml"); + fs::write( + &project_config_path, + r#" + words = ["projectbase"] + use_global = false + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["fromproject"] + "#, + )?; + + let config = CodebookConfigFile::default(); + { + let mut inner = config.inner.write().unwrap(); + if let Ok(global_settings) = + CodebookConfigFile::load_settings_from_file(&global_config_path) + { + inner.global_config = WatchedFile::new(Some(global_config_path)) + .with_content_value(global_settings); + } + if let Ok(project_settings) = + CodebookConfigFile::load_settings_from_file(&project_config_path) + { + inner.project_config = WatchedFile::new(Some(project_config_path)) + .with_content_value(project_settings); + } + let effective = CodebookConfigFile::calculate_effective_settings( + &inner.project_config, + &inner.global_config, + ); + inner.snapshot = Arc::new(effective); + } + + // With use_global = false, global overrides should be ignored + let resolved = config.resolve_for_file(Path::new("README.md")); + assert!(resolved.is_some()); + let settings = resolved.unwrap(); + assert!(settings.is_allowed_word("projectbase")); + assert!(settings.is_allowed_word("fromproject")); + // Global words and overrides should NOT be present + assert!(!settings.is_allowed_word("globalbase")); + assert!(!settings.is_allowed_word("fromglobal")); + + Ok(()) + } + + #[test] + fn test_save_preserves_overrides() -> Result<(), io::Error> { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("codebook.toml"); + fs::write( + &config_path, + r#" + words = ["base"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["markdown"] + "#, + )?; + + let config = load_from_file(ConfigType::Project, &config_path)?; + + // Add a word and save + config.add_word("newword")?; + config.save()?; + + // Reload and verify overrides are preserved + let reloaded = load_from_file(ConfigType::Project, &config_path)?; + assert!(reloaded.is_allowed_word("base")); + assert!(reloaded.is_allowed_word("newword")); + + // Override should still work + let resolved = reloaded.resolve_for_file(Path::new("README.md")); + assert!(resolved.is_some()); + assert!(resolved.unwrap().is_allowed_word("markdown")); + + Ok(()) + } + + #[test] + fn test_reload_picks_up_override_changes() -> Result<(), io::Error> { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("codebook.toml"); + fs::write( + &config_path, + r#" + words = ["base"] + "#, + )?; + + let config = load_from_file(ConfigType::Project, &config_path)?; + + // No overrides initially + assert!(config + .resolve_for_file(Path::new("README.md")) + .is_none()); + + // Update config with overrides + fs::write( + &config_path, + r#" + words = ["base"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["markdown"] + "#, + )?; + + config.reload()?; + + // Now overrides should apply + let resolved = config.resolve_for_file(Path::new("README.md")); + assert!(resolved.is_some()); + assert!(resolved.unwrap().is_allowed_word("markdown")); + + Ok(()) + } } diff --git a/crates/codebook-config/src/settings.rs b/crates/codebook-config/src/settings.rs index 9173b986..2b9a6126 100644 --- a/crates/codebook-config/src/settings.rs +++ b/crates/codebook-config/src/settings.rs @@ -1,4 +1,125 @@ +use glob::Pattern; +use log::warn; use serde::{Deserialize, Serialize}; +use std::path::Path; + +/// A single `[[overrides]]` block in the config file. +#[derive(Debug, Serialize, Clone, PartialEq)] +pub struct OverrideBlock { + /// Required: glob patterns matched against file path relative to project root + pub paths: Vec, + + // --- Replace fields (replace the base list entirely) --- + #[serde(default, skip_serializing_if = "Option::is_none")] + pub dictionaries: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub words: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub flag_words: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub ignore_patterns: Option>, + + // --- Append fields (append to the resolved list) --- + #[serde(default, skip_serializing_if = "Option::is_none")] + pub extra_dictionaries: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub extra_words: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub extra_flag_words: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub extra_ignore_patterns: Option>, +} + +impl<'de> Deserialize<'de> for OverrideBlock { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + fn to_lowercase_vec(v: Vec) -> Vec { + v.into_iter().map(|s| s.to_ascii_lowercase()).collect() + } + + fn to_lowercase_opt(v: Option>) -> Option> { + v.map(to_lowercase_vec) + } + + #[derive(Deserialize)] + struct Helper { + #[serde(default)] + paths: Vec, + #[serde(default)] + dictionaries: Option>, + #[serde(default)] + words: Option>, + #[serde(default)] + flag_words: Option>, + #[serde(default)] + ignore_patterns: Option>, + #[serde(default)] + extra_dictionaries: Option>, + #[serde(default)] + extra_words: Option>, + #[serde(default)] + extra_flag_words: Option>, + #[serde(default)] + extra_ignore_patterns: Option>, + } + + let helper = Helper::deserialize(deserializer)?; + Ok(OverrideBlock { + paths: helper.paths, + // Lowercase word-related fields + dictionaries: to_lowercase_opt(helper.dictionaries), + words: to_lowercase_opt(helper.words), + flag_words: to_lowercase_opt(helper.flag_words), + extra_dictionaries: to_lowercase_opt(helper.extra_dictionaries), + extra_words: to_lowercase_opt(helper.extra_words), + extra_flag_words: to_lowercase_opt(helper.extra_flag_words), + // Don't lowercase patterns or paths + ignore_patterns: helper.ignore_patterns, + extra_ignore_patterns: helper.extra_ignore_patterns, + }) + } +} + +impl OverrideBlock { + /// Returns true if this override block is valid (has non-empty paths with at least one valid glob). + pub fn is_valid(&self) -> bool { + if self.paths.is_empty() { + return false; + } + self.paths.iter().any(|p| Pattern::new(p).is_ok()) + } + + /// Check if this override applies to the given relative file path. + pub fn matches_path(&self, relative_path: &Path) -> bool { + let path_str = relative_path.to_string_lossy(); + self.paths.iter().any(|pattern| { + Pattern::new(pattern) + .map(|p| p.matches(&path_str)) + .unwrap_or(false) + }) + } + + /// Returns true if any field besides `paths` is set (the override has an effect). + pub fn has_effect(&self) -> bool { + self.dictionaries.is_some() + || self.words.is_some() + || self.flag_words.is_some() + || self.ignore_patterns.is_some() + || self.extra_dictionaries.is_some() + || self.extra_words.is_some() + || self.extra_flag_words.is_some() + || self.extra_ignore_patterns.is_some() + } +} + #[derive(Debug, Serialize, Clone, PartialEq)] pub struct ConfigSettings { /// List of dictionaries to use for spell checking @@ -34,6 +155,10 @@ pub struct ConfigSettings { skip_serializing_if = "is_default_min_word_length" )] pub min_word_length: usize, + + /// Scoped configuration overrides + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub overrides: Vec, } fn default_use_global() -> bool { @@ -62,6 +187,7 @@ impl Default for ConfigSettings { ignore_patterns: Vec::new(), use_global: true, min_word_length: default_min_word_length(), + overrides: Vec::new(), } } } @@ -90,9 +216,29 @@ impl<'de> Deserialize<'de> for ConfigSettings { use_global: bool, #[serde(default = "default_min_word_length")] min_word_length: usize, + #[serde(default)] + overrides: Vec, } let helper = Helper::deserialize(deserializer)?; + + // Filter out invalid override blocks + let overrides: Vec = helper + .overrides + .into_iter() + .filter(|o| { + if !o.is_valid() { + warn!("Skipping invalid override block (empty or invalid paths)"); + return false; + } + if !o.has_effect() { + warn!("Skipping no-op override block (no settings specified)"); + return false; + } + true + }) + .collect(); + Ok(ConfigSettings { dictionaries: to_lowercase_vec(helper.dictionaries), words: to_lowercase_vec(helper.words), @@ -101,12 +247,14 @@ impl<'de> Deserialize<'de> for ConfigSettings { ignore_patterns: helper.ignore_patterns, use_global: helper.use_global, min_word_length: helper.min_word_length, + overrides, }) } } impl ConfigSettings { - /// Merge another config settings into this one, sorting and deduplicating all collections + /// Merge another config settings into this one, sorting and deduplicating all collections. + /// Overrides are appended (preserving order: self's overrides first, then other's). pub fn merge(&mut self, other: ConfigSettings) { // Add items from the other config self.dictionaries.extend(other.dictionaries); @@ -115,6 +263,9 @@ impl ConfigSettings { self.ignore_paths.extend(other.ignore_paths); self.ignore_patterns.extend(other.ignore_patterns); + // Append overrides (global first, then project — order matters) + self.overrides.extend(other.overrides); + // The use_global setting from the other config is ignored during merging // as this is a per-config setting @@ -123,11 +274,11 @@ impl ConfigSettings { self.min_word_length = other.min_word_length; } - // Sort and deduplicate each collection + // Sort and deduplicate each collection (but NOT overrides) self.sort_and_dedup(); } - /// Sort and deduplicate all collections in the config + /// Sort and deduplicate all collections in the config (but not overrides). pub fn sort_and_dedup(&mut self) { // Sort and deduplicate each Vec sort_and_dedup(&mut self.dictionaries); @@ -135,6 +286,80 @@ impl ConfigSettings { sort_and_dedup(&mut self.flag_words); sort_and_dedup(&mut self.ignore_paths); sort_and_dedup(&mut self.ignore_patterns); + // Note: overrides are NOT sorted — order matters for resolution + } + + /// Apply a single override block to this settings (mutates in place). + /// Replace fields are applied first, then append fields. + pub fn apply_override(&mut self, ovr: &OverrideBlock) { + // Replace fields: fully replace the list + if let Some(ref v) = ovr.dictionaries { + self.dictionaries = v.clone(); + } + if let Some(ref v) = ovr.words { + self.words = v.clone(); + } + if let Some(ref v) = ovr.flag_words { + self.flag_words = v.clone(); + } + if let Some(ref v) = ovr.ignore_patterns { + self.ignore_patterns = v.clone(); + } + + // Append fields: extend the current list + if let Some(ref v) = ovr.extra_dictionaries { + self.dictionaries.extend(v.clone()); + } + if let Some(ref v) = ovr.extra_words { + self.words.extend(v.clone()); + } + if let Some(ref v) = ovr.extra_flag_words { + self.flag_words.extend(v.clone()); + } + if let Some(ref v) = ovr.extra_ignore_patterns { + self.ignore_patterns.extend(v.clone()); + } + } + + /// Resolve the effective settings for a specific file path by applying matching overrides. + /// Returns a new ConfigSettings with overrides applied and the overrides list cleared. + pub fn resolve_for_path(&self, path: &Path) -> ConfigSettings { + let mut resolved = self.clone(); + resolved.overrides = vec![]; // Resolved config has no overrides + + for ovr in &self.overrides { + if ovr.matches_path(path) { + resolved.apply_override(ovr); + } + } + + resolved + } + + /// Get dictionary IDs, providing a default when none are configured. + pub fn dictionary_ids(&self) -> Vec { + if self.dictionaries.is_empty() { + vec!["en_us".to_string()] + } else { + self.dictionaries.clone() + } + } + + /// Check if a word is explicitly allowed. + pub fn is_allowed_word(&self, word: &str) -> bool { + let word = word.to_ascii_lowercase(); + self.words.iter().any(|w| w == &word) + } + + /// Check if a word should be flagged. + pub fn should_flag_word(&self, word: &str) -> bool { + let word = word.to_ascii_lowercase(); + self.flag_words.iter().any(|w| w == &word) + } + + /// Get the minimum word length to check. + pub fn get_min_word_length(&self) -> usize { + self.min_word_length } } @@ -158,6 +383,7 @@ mod tests { assert_eq!(config.ignore_patterns, Vec::::new()); assert!(config.use_global); assert_eq!(config.min_word_length, 3); + assert!(config.overrides.is_empty()); } #[test] @@ -229,6 +455,7 @@ mod tests { ignore_patterns: vec!["^```.*$".to_string()], use_global: true, min_word_length: 3, + ..Default::default() }; let other = ConfigSettings { @@ -239,6 +466,7 @@ mod tests { ignore_patterns: vec!["^//.*$".to_string()], use_global: false, min_word_length: 2, + ..Default::default() }; base.merge(other); @@ -306,6 +534,7 @@ mod tests { ], use_global: true, min_word_length: 3, + ..Default::default() }; config.sort_and_dedup(); @@ -355,4 +584,310 @@ mod tests { assert_eq!(config.ignore_patterns, Vec::::new()); assert!(config.use_global); } + + // --- Override tests --- + + #[test] + fn test_override_block_deserialization() { + let toml_str = r#" + words = ["base"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["Markdown"] + dictionaries = ["EN_GB"] + "#; + + let config: ConfigSettings = toml::from_str(toml_str).unwrap(); + assert_eq!(config.overrides.len(), 1); + let ovr = &config.overrides[0]; + assert_eq!(ovr.paths, vec!["**/*.md"]); + assert_eq!(ovr.extra_words, Some(vec!["markdown".to_string()])); // lowercased + assert_eq!(ovr.dictionaries, Some(vec!["en_gb".to_string()])); // lowercased + assert_eq!(ovr.words, None); + assert_eq!(ovr.ignore_patterns, None); + } + + #[test] + fn test_override_block_empty_paths_skipped() { + let toml_str = r#" + [[overrides]] + paths = [] + extra_words = ["test"] + "#; + + let config: ConfigSettings = toml::from_str(toml_str).unwrap(); + assert!(config.overrides.is_empty()); + } + + #[test] + fn test_override_block_no_effect_skipped() { + let toml_str = r#" + [[overrides]] + paths = ["**/*.md"] + "#; + + let config: ConfigSettings = toml::from_str(toml_str).unwrap(); + assert!(config.overrides.is_empty()); + } + + #[test] + fn test_override_matches_path() { + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string(), "docs/**/*".to_string()], + extra_words: Some(vec!["test".to_string()]), + ..OverrideBlock::default_for_test() + }; + + assert!(ovr.matches_path(Path::new("README.md"))); + assert!(ovr.matches_path(Path::new("src/guide.md"))); + assert!(ovr.matches_path(Path::new("docs/api/index.html"))); + assert!(!ovr.matches_path(Path::new("src/main.rs"))); + } + + #[test] + fn test_apply_override_replace() { + let mut settings = ConfigSettings { + words: vec!["alpha".to_string(), "beta".to_string()], + ..Default::default() + }; + + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string()], + words: Some(vec!["gamma".to_string()]), + ..OverrideBlock::default_for_test() + }; + + settings.apply_override(&ovr); + assert_eq!(settings.words, vec!["gamma"]); + } + + #[test] + fn test_apply_override_append() { + let mut settings = ConfigSettings { + words: vec!["alpha".to_string(), "beta".to_string()], + ..Default::default() + }; + + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["gamma".to_string()]), + ..OverrideBlock::default_for_test() + }; + + settings.apply_override(&ovr); + assert_eq!(settings.words, vec!["alpha", "beta", "gamma"]); + } + + #[test] + fn test_apply_override_replace_then_append() { + let mut settings = ConfigSettings { + words: vec!["alpha".to_string(), "beta".to_string()], + ..Default::default() + }; + + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string()], + words: Some(vec!["gamma".to_string()]), + extra_words: Some(vec!["delta".to_string()]), + ..OverrideBlock::default_for_test() + }; + + settings.apply_override(&ovr); + assert_eq!(settings.words, vec!["gamma", "delta"]); + } + + #[test] + fn test_apply_override_no_change() { + let mut settings = ConfigSettings { + words: vec!["alpha".to_string()], + dictionaries: vec!["en_us".to_string()], + ..Default::default() + }; + + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_flag_words: Some(vec!["hack".to_string()]), + ..OverrideBlock::default_for_test() + }; + + settings.apply_override(&ovr); + // words and dictionaries unchanged + assert_eq!(settings.words, vec!["alpha"]); + assert_eq!(settings.dictionaries, vec!["en_us"]); + // flag_words changed + assert_eq!(settings.flag_words, vec!["hack"]); + } + + #[test] + fn test_resolve_for_path_no_match() { + let settings = ConfigSettings { + words: vec!["base".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["markdown".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let resolved = settings.resolve_for_path(Path::new("src/main.rs")); + assert_eq!(resolved.words, vec!["base"]); + assert!(resolved.overrides.is_empty()); + } + + #[test] + fn test_resolve_for_path_single_match() { + let settings = ConfigSettings { + words: vec!["base".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["markdown".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let resolved = settings.resolve_for_path(Path::new("README.md")); + assert_eq!(resolved.words, vec!["base", "markdown"]); + assert!(resolved.overrides.is_empty()); + } + + #[test] + fn test_resolve_for_path_multiple_matches() { + let settings = ConfigSettings { + words: vec!["base".to_string()], + overrides: vec![ + OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["markdown".to_string()]), + ..OverrideBlock::default_for_test() + }, + OverrideBlock { + paths: vec!["docs/**/*".to_string()], + extra_words: Some(vec!["documentation".to_string()]), + ..OverrideBlock::default_for_test() + }, + ], + ..Default::default() + }; + + let resolved = settings.resolve_for_path(Path::new("docs/guide.md")); + assert_eq!(resolved.words, vec!["base", "markdown", "documentation"]); + } + + #[test] + fn test_resolve_for_path_replace_overrides_base() { + let settings = ConfigSettings { + dictionaries: vec!["en_us".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["docs/de/**/*".to_string()], + dictionaries: Some(vec!["de".to_string()]), + extra_words: Some(vec!["codebook".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let resolved = settings.resolve_for_path(Path::new("docs/de/guide.md")); + assert_eq!(resolved.dictionaries, vec!["de"]); + assert_eq!(resolved.words, vec!["codebook"]); + } + + #[test] + fn test_merge_preserves_override_order() { + let mut global = ConfigSettings { + words: vec!["global".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["from_global".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let project = ConfigSettings { + words: vec!["project".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["from_project".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + global.merge(project); + + // Overrides should be: global first, then project + assert_eq!(global.overrides.len(), 2); + assert_eq!( + global.overrides[0].extra_words, + Some(vec!["from_global".to_string()]) + ); + assert_eq!( + global.overrides[1].extra_words, + Some(vec!["from_project".to_string()]) + ); + } + + #[test] + fn test_serialization_with_overrides() { + let config = ConfigSettings { + words: vec!["base".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["markdown".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let serialized = toml::to_string_pretty(&config).unwrap(); + let deserialized: ConfigSettings = toml::from_str(&serialized).unwrap(); + + assert_eq!(config, deserialized); + } + + #[test] + fn test_config_settings_query_methods() { + let settings = ConfigSettings { + dictionaries: vec!["en_us".to_string()], + words: vec!["codebook".to_string()], + flag_words: vec!["todo".to_string()], + min_word_length: 4, + ..Default::default() + }; + + assert_eq!(settings.dictionary_ids(), vec!["en_us"]); + assert!(settings.is_allowed_word("codebook")); + assert!(settings.is_allowed_word("CODEBOOK")); // case insensitive + assert!(!settings.is_allowed_word("unknown")); + assert!(settings.should_flag_word("todo")); + assert!(settings.should_flag_word("TODO")); // case insensitive + assert!(!settings.should_flag_word("done")); + assert_eq!(settings.get_min_word_length(), 4); + } + + #[test] + fn test_dictionary_ids_default() { + let settings = ConfigSettings::default(); + assert_eq!(settings.dictionary_ids(), vec!["en_us"]); + } + + impl OverrideBlock { + /// Helper for tests: creates an OverrideBlock with all fields set to None/empty. + fn default_for_test() -> Self { + Self { + paths: vec![], + dictionaries: None, + words: None, + flag_words: None, + ignore_patterns: None, + extra_dictionaries: None, + extra_words: None, + extra_flag_words: None, + extra_ignore_patterns: None, + } + } + } } diff --git a/crates/codebook/src/lib.rs b/crates/codebook/src/lib.rs index 67ecab32..1f20411a 100644 --- a/crates/codebook/src/lib.rs +++ b/crates/codebook/src/lib.rs @@ -9,7 +9,8 @@ use crate::regexes::get_default_skip_patterns; use std::path::Path; use std::sync::Arc; -use codebook_config::CodebookConfig; +use codebook_config::helpers::build_ignore_regexes; +use codebook_config::{CodebookConfig, ConfigSettings}; use dictionaries::{dictionary, manager::DictionaryManager}; use dictionary::Dictionary; use log::debug; @@ -37,33 +38,57 @@ impl Codebook { language: Option, file_path: Option<&str>, ) -> Vec { + // ignore_paths is evaluated BEFORE overrides if let Some(file_path) = file_path && self.config.should_ignore_path(Path::new(file_path)) { return Vec::new(); } - // get needed dictionary names - // get needed dictionaries - // call spell check on each dictionary + + // Resolve per-file settings (applies matching overrides) + let resolved = + file_path.and_then(|fp| self.config.resolve_for_file(Path::new(fp))); + let language = self.resolve_language(language, file_path); - let dictionaries = self.get_dictionaries(Some(language)); + + // Get dictionaries using resolved settings if overrides apply + let dictionaries = match &resolved { + Some(settings) => self.get_dictionaries_from_settings(settings, Some(language)), + None => self.get_dictionaries(Some(language)), + }; + // Combine default and user patterns let mut all_patterns = get_default_skip_patterns().clone(); - if let Some(user_patterns) = self.config.get_ignore_patterns() { + if let Some(ref settings) = resolved { + all_patterns.extend(build_ignore_regexes(&settings.ignore_patterns)); + } else if let Some(user_patterns) = self.config.get_ignore_patterns() { all_patterns.extend(user_patterns); } + parser::find_locations( text, language, |word| { - if self.config.should_flag_word(word) { - return false; - } - if word.len() < self.config.get_min_word_length() { - return true; - } - if self.config.is_allowed_word(word) { - return true; + if let Some(ref settings) = resolved { + if settings.should_flag_word(word) { + return false; + } + if word.len() < settings.get_min_word_length() { + return true; + } + if settings.is_allowed_word(word) { + return true; + } + } else { + if self.config.should_flag_word(word) { + return false; + } + if word.len() < self.config.get_min_word_length() { + return true; + } + if self.config.is_allowed_word(word) { + return true; + } } for dictionary in &dictionaries { if dictionary.check(word) { @@ -101,10 +126,27 @@ impl Codebook { dictionary_ids.extend(language_dictionary_ids); }; dictionary_ids.extend(DEFAULT_DICTIONARIES.iter().map(|f| f.to_string())); + self.load_dictionaries(&dictionary_ids) + } + + fn get_dictionaries_from_settings( + &self, + settings: &ConfigSettings, + language: Option, + ) -> Vec> { + let mut dictionary_ids = settings.dictionary_ids(); + if let Some(lang) = language { + dictionary_ids.extend(lang.dictionary_ids()); + }; + dictionary_ids.extend(DEFAULT_DICTIONARIES.iter().map(|f| f.to_string())); + self.load_dictionaries(&dictionary_ids) + } + + fn load_dictionaries(&self, dictionary_ids: &[String]) -> Vec> { let mut dictionaries = Vec::with_capacity(dictionary_ids.len()); debug!("Checking text with dictionaries: {dictionary_ids:?}"); for dictionary_id in dictionary_ids { - let dictionary = self.manager.get_dictionary(&dictionary_id); + let dictionary = self.manager.get_dictionary(dictionary_id); if let Some(d) = dictionary { dictionaries.push(d); } From 4e0f5d294488b7a9643e6ff091ec42e479cf7b30 Mon Sep 17 00:00:00 2001 From: Bo Lopker Date: Mon, 18 May 2026 16:46:00 -0700 Subject: [PATCH 2/2] Update readme --- README.md | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/README.md b/README.md index e85923a8..cdeff59d 100644 --- a/README.md +++ b/README.md @@ -348,6 +348,7 @@ use_global = true 2. If `use_global = false` in project config, global settings are ignored entirely 3. If no project config exists, global config is used 4. If neither exists, default settings are used +5. Any matching `[[overrides]]` blocks are then layered on top (global first, then project) — see [Scoped Overrides](#scoped-overrides) ### Working with Configurations @@ -418,6 +419,55 @@ exclude_tags = ["string.heredoc"] For the full list of available tags, see the [query tag reference](crates/codebook/src/queries/README.md). +### Scoped Overrides + +Use `[[overrides]]` blocks to tailor settings to specific files. Each block matches files by glob pattern (relative to the project root) and can replace or append to the base config. + +```toml +# Base config applies everywhere +dictionaries = ["en_us"] +words = ["codebook"] +flag_words = ["todo"] + +# Markdown files: add British English and allow a few prose-specific words +[[overrides]] +paths = ["**/*.md", "**/*.mdx"] +extra_dictionaries = ["en_gb"] +extra_words = ["frontmatter", "callout"] + +# Rust files: flag a few extra words +[[overrides]] +paths = ["**/*.rs"] +extra_flag_words = ["xxx", "hack"] + +# German docs: swap out the dictionary entirely +[[overrides]] +paths = ["docs/de/**/*"] +dictionaries = ["de"] +``` + +**Available fields** + +| Field | Behavior | +| ----------------------- | -------- | +| `paths` | Required. Glob patterns matched against the file path relative to the project root. A file matches the block if it matches *any* pattern. | +| `dictionaries` | Replaces the resolved `dictionaries` list. | +| `words` | Replaces the resolved `words` list. | +| `flag_words` | Replaces the resolved `flag_words` list. | +| `ignore_patterns` | Replaces the resolved `ignore_patterns` list. | +| `extra_dictionaries` | Appends to the resolved `dictionaries` list. | +| `extra_words` | Appends to the resolved `words` list. | +| `extra_flag_words` | Appends to the resolved `flag_words` list. | +| `extra_ignore_patterns` | Appends to the resolved `ignore_patterns` list. | + +Glob syntax matches `ignore_paths`: `*` (no separator), `**` (any directories), `?` (any single char), and `{a,b}` alternation. + +**Resolution order:** all matching overrides are applied in declaration order, so later blocks win on the same field. Global overrides are applied before project overrides, so project settings always have the final say. If both a replace field (e.g., `words`) and its append sibling (`extra_words`) appear in the same block, replace runs first and then append is layered on top. + +**Interaction with `ignore_paths`:** `ignore_paths` is evaluated *before* overrides — an ignored file is skipped entirely and no overrides apply to it. + +**Skipped silently:** an `[[overrides]]` block is dropped (with a warning) if `paths` is missing or empty, every glob is invalid, or no other field is set. + ### LSP Initialization Options Editors can pass `initializationOptions` when starting the Codebook LSP for LSP-specific options. Refer to your editor's documentation for how to apply these options. All values are optional, omit them for the default behavior: