diff --git a/.claude/settings.local.json b/.claude/settings.local.json index b71dc97..325cac7 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -16,10 +16,6 @@ "Bash(gh repo view:*)", "Bash(cargo build:*)", "Bash(cargo search:*)", - "WebFetch(domain:docs.rs)", - "WebFetch(domain:github.com)", - "WebFetch(domain:raw.githubusercontent.com)", - "WebFetch(domain:index.crates.io)" ] } } diff --git a/README.md b/README.md index e85923a..cdeff59 100644 --- a/README.md +++ b/README.md @@ -348,6 +348,7 @@ use_global = true 2. If `use_global = false` in project config, global settings are ignored entirely 3. If no project config exists, global config is used 4. If neither exists, default settings are used +5. Any matching `[[overrides]]` blocks are then layered on top (global first, then project) — see [Scoped Overrides](#scoped-overrides) ### Working with Configurations @@ -418,6 +419,55 @@ exclude_tags = ["string.heredoc"] For the full list of available tags, see the [query tag reference](crates/codebook/src/queries/README.md). +### Scoped Overrides + +Use `[[overrides]]` blocks to tailor settings to specific files. Each block matches files by glob pattern (relative to the project root) and can replace or append to the base config. + +```toml +# Base config applies everywhere +dictionaries = ["en_us"] +words = ["codebook"] +flag_words = ["todo"] + +# Markdown files: add British English and allow a few prose-specific words +[[overrides]] +paths = ["**/*.md", "**/*.mdx"] +extra_dictionaries = ["en_gb"] +extra_words = ["frontmatter", "callout"] + +# Rust files: flag a few extra words +[[overrides]] +paths = ["**/*.rs"] +extra_flag_words = ["xxx", "hack"] + +# German docs: swap out the dictionary entirely +[[overrides]] +paths = ["docs/de/**/*"] +dictionaries = ["de"] +``` + +**Available fields** + +| Field | Behavior | +| ----------------------- | -------- | +| `paths` | Required. Glob patterns matched against the file path relative to the project root. A file matches the block if it matches *any* pattern. | +| `dictionaries` | Replaces the resolved `dictionaries` list. | +| `words` | Replaces the resolved `words` list. | +| `flag_words` | Replaces the resolved `flag_words` list. | +| `ignore_patterns` | Replaces the resolved `ignore_patterns` list. | +| `extra_dictionaries` | Appends to the resolved `dictionaries` list. | +| `extra_words` | Appends to the resolved `words` list. | +| `extra_flag_words` | Appends to the resolved `flag_words` list. | +| `extra_ignore_patterns` | Appends to the resolved `ignore_patterns` list. | + +Glob syntax matches `ignore_paths`: `*` (no separator), `**` (any directories), `?` (any single char), and `{a,b}` alternation. + +**Resolution order:** all matching overrides are applied in declaration order, so later blocks win on the same field. Global overrides are applied before project overrides, so project settings always have the final say. If both a replace field (e.g., `words`) and its append sibling (`extra_words`) appear in the same block, replace runs first and then append is layered on top. + +**Interaction with `ignore_paths`:** `ignore_paths` is evaluated *before* overrides — an ignored file is skipped entirely and no overrides apply to it. + +**Skipped silently:** an `[[overrides]]` block is dropped (with a warning) if `paths` is missing or empty, every glob is invalid, or no other field is set. + ### LSP Initialization Options Editors can pass `initializationOptions` when starting the Codebook LSP for LSP-specific options. Refer to your editor's documentation for how to apply these options. All values are optional, omit them for the default behavior: diff --git a/crates/codebook-config/src/helpers.rs b/crates/codebook-config/src/helpers.rs index 393eaac..5c9cb23 100644 --- a/crates/codebook-config/src/helpers.rs +++ b/crates/codebook-config/src/helpers.rs @@ -57,7 +57,7 @@ pub(crate) fn unix_cache_dir() -> PathBuf { /// Compile user-provided ignore regex patterns, dropping invalid entries. /// Patterns are compiled with multiline mode so `^` and `$` match line boundaries. -pub(crate) fn build_ignore_regexes(patterns: &[String]) -> Vec { +pub fn build_ignore_regexes(patterns: &[String]) -> Vec { patterns .iter() .filter_map( diff --git a/crates/codebook-config/src/lib.rs b/crates/codebook-config/src/lib.rs index 29ab8f4..0405c08 100644 --- a/crates/codebook-config/src/lib.rs +++ b/crates/codebook-config/src/lib.rs @@ -1,9 +1,8 @@ -mod helpers; -mod settings; +pub mod helpers; +pub mod settings; mod watched_file; use crate::helpers::expand_tilde; pub use crate::settings::ConfigSettings; - use crate::watched_file::WatchedFile; use log::debug; use log::info; @@ -35,6 +34,12 @@ pub trait CodebookConfig: Sync + Send + Debug { fn get_min_word_length(&self) -> usize; fn should_check_tag(&self, tag: &str) -> bool; fn cache_dir(&self) -> &Path; + + /// Resolve settings with overrides applied for a specific file path. + /// Returns None if no overrides match (callers should use base config methods). + fn resolve_for_file(&self, _relative_path: &Path) -> Option> { + None + } } /// Internal mutable state @@ -546,6 +551,22 @@ impl CodebookConfig for CodebookConfigFile { fn cache_dir(&self) -> &Path { &self.cache_dir } + + /// Resolve settings with overrides applied for a specific file path. + fn resolve_for_file(&self, relative_path: &Path) -> Option> { + let snapshot = self.snapshot(); + if snapshot.overrides.is_empty() { + return None; + } + if !snapshot + .overrides + .iter() + .any(|o| o.matches_path(relative_path)) + { + return None; + } + Some(Arc::new(snapshot.resolve_for_path(relative_path))) + } } #[derive(Debug)] @@ -1188,4 +1209,263 @@ mod tests { Ok(()) } + + // --- Override integration tests --- + + #[test] + fn test_resolve_for_file_no_overrides() { + let config = CodebookConfigFile::default(); + { + let mut inner = config.inner.write().unwrap(); + let settings = ConfigSettings { + words: vec!["base".to_string()], + ..Default::default() + }; + inner.project_config = inner.project_config.clone().with_content_value(settings); + CodebookConfigFile::rebuild_snapshot(&mut inner); + } + + // No overrides, should return None + assert!(config + .resolve_for_file(Path::new("src/main.rs")) + .is_none()); + } + + #[test] + fn test_resolve_for_file_with_matching_override() -> Result<(), io::Error> { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("codebook.toml"); + let mut file = File::create(&config_path)?; + write!( + file, + r#" + words = ["base"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["markdown"] + "# + )?; + + let config = load_from_file(ConfigType::Project, &config_path)?; + + // .md file should get override + let resolved = config.resolve_for_file(Path::new("README.md")); + assert!(resolved.is_some()); + let settings = resolved.unwrap(); + assert!(settings.is_allowed_word("base")); + assert!(settings.is_allowed_word("markdown")); + + // .rs file should not match + assert!(config + .resolve_for_file(Path::new("src/main.rs")) + .is_none()); + + Ok(()) + } + + #[test] + fn test_resolve_for_file_global_and_project_overrides() -> Result<(), io::Error> { + let global_temp = TempDir::new().unwrap(); + let project_temp = TempDir::new().unwrap(); + + // Global config with an override + let global_config_path = global_temp.path().join("codebook.toml"); + fs::write( + &global_config_path, + r#" + words = ["globalbase"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["fromglobal"] + "#, + )?; + + // Project config with an override + let project_config_path = project_temp.path().join("codebook.toml"); + fs::write( + &project_config_path, + r#" + words = ["projectbase"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["fromproject"] + "#, + )?; + + // Load both configs + let config = CodebookConfigFile::default(); + { + let mut inner = config.inner.write().unwrap(); + if let Ok(global_settings) = + CodebookConfigFile::load_settings_from_file(&global_config_path) + { + inner.global_config = WatchedFile::new(Some(global_config_path)) + .with_content_value(global_settings); + } + if let Ok(project_settings) = + CodebookConfigFile::load_settings_from_file(&project_config_path) + { + inner.project_config = WatchedFile::new(Some(project_config_path)) + .with_content_value(project_settings); + } + let effective = CodebookConfigFile::calculate_effective_settings( + &inner.project_config, + &inner.global_config, + ); + inner.snapshot = Arc::new(effective); + } + + // Resolve for a .md file — both overrides should apply + let resolved = config.resolve_for_file(Path::new("docs/guide.md")); + assert!(resolved.is_some()); + let settings = resolved.unwrap(); + assert!(settings.is_allowed_word("globalbase")); + assert!(settings.is_allowed_word("projectbase")); + assert!(settings.is_allowed_word("fromglobal")); + assert!(settings.is_allowed_word("fromproject")); + + Ok(()) + } + + #[test] + fn test_resolve_for_file_use_global_false_ignores_global_overrides() -> Result<(), io::Error> { + let global_temp = TempDir::new().unwrap(); + let project_temp = TempDir::new().unwrap(); + + let global_config_path = global_temp.path().join("codebook.toml"); + fs::write( + &global_config_path, + r#" + words = ["globalbase"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["fromglobal"] + "#, + )?; + + let project_config_path = project_temp.path().join("codebook.toml"); + fs::write( + &project_config_path, + r#" + words = ["projectbase"] + use_global = false + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["fromproject"] + "#, + )?; + + let config = CodebookConfigFile::default(); + { + let mut inner = config.inner.write().unwrap(); + if let Ok(global_settings) = + CodebookConfigFile::load_settings_from_file(&global_config_path) + { + inner.global_config = WatchedFile::new(Some(global_config_path)) + .with_content_value(global_settings); + } + if let Ok(project_settings) = + CodebookConfigFile::load_settings_from_file(&project_config_path) + { + inner.project_config = WatchedFile::new(Some(project_config_path)) + .with_content_value(project_settings); + } + let effective = CodebookConfigFile::calculate_effective_settings( + &inner.project_config, + &inner.global_config, + ); + inner.snapshot = Arc::new(effective); + } + + // With use_global = false, global overrides should be ignored + let resolved = config.resolve_for_file(Path::new("README.md")); + assert!(resolved.is_some()); + let settings = resolved.unwrap(); + assert!(settings.is_allowed_word("projectbase")); + assert!(settings.is_allowed_word("fromproject")); + // Global words and overrides should NOT be present + assert!(!settings.is_allowed_word("globalbase")); + assert!(!settings.is_allowed_word("fromglobal")); + + Ok(()) + } + + #[test] + fn test_save_preserves_overrides() -> Result<(), io::Error> { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("codebook.toml"); + fs::write( + &config_path, + r#" + words = ["base"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["markdown"] + "#, + )?; + + let config = load_from_file(ConfigType::Project, &config_path)?; + + // Add a word and save + config.add_word("newword")?; + config.save()?; + + // Reload and verify overrides are preserved + let reloaded = load_from_file(ConfigType::Project, &config_path)?; + assert!(reloaded.is_allowed_word("base")); + assert!(reloaded.is_allowed_word("newword")); + + // Override should still work + let resolved = reloaded.resolve_for_file(Path::new("README.md")); + assert!(resolved.is_some()); + assert!(resolved.unwrap().is_allowed_word("markdown")); + + Ok(()) + } + + #[test] + fn test_reload_picks_up_override_changes() -> Result<(), io::Error> { + let temp_dir = TempDir::new().unwrap(); + let config_path = temp_dir.path().join("codebook.toml"); + fs::write( + &config_path, + r#" + words = ["base"] + "#, + )?; + + let config = load_from_file(ConfigType::Project, &config_path)?; + + // No overrides initially + assert!(config + .resolve_for_file(Path::new("README.md")) + .is_none()); + + // Update config with overrides + fs::write( + &config_path, + r#" + words = ["base"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["markdown"] + "#, + )?; + + config.reload()?; + + // Now overrides should apply + let resolved = config.resolve_for_file(Path::new("README.md")); + assert!(resolved.is_some()); + assert!(resolved.unwrap().is_allowed_word("markdown")); + + Ok(()) + } } diff --git a/crates/codebook-config/src/settings.rs b/crates/codebook-config/src/settings.rs index 38d3953..78001dc 100644 --- a/crates/codebook-config/src/settings.rs +++ b/crates/codebook-config/src/settings.rs @@ -1,6 +1,125 @@ use glob::Pattern; +use log::warn; use serde::{Deserialize, Serialize}; use std::path::Path; + +/// A single `[[overrides]]` block in the config file. +#[derive(Debug, Serialize, Clone, PartialEq)] +pub struct OverrideBlock { + /// Required: glob patterns matched against file path relative to project root + pub paths: Vec, + + // --- Replace fields (replace the base list entirely) --- + #[serde(default, skip_serializing_if = "Option::is_none")] + pub dictionaries: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub words: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub flag_words: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub ignore_patterns: Option>, + + // --- Append fields (append to the resolved list) --- + #[serde(default, skip_serializing_if = "Option::is_none")] + pub extra_dictionaries: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub extra_words: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub extra_flag_words: Option>, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub extra_ignore_patterns: Option>, +} + +impl<'de> Deserialize<'de> for OverrideBlock { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + fn to_lowercase_vec(v: Vec) -> Vec { + v.into_iter().map(|s| s.to_ascii_lowercase()).collect() + } + + fn to_lowercase_opt(v: Option>) -> Option> { + v.map(to_lowercase_vec) + } + + #[derive(Deserialize)] + struct Helper { + #[serde(default)] + paths: Vec, + #[serde(default)] + dictionaries: Option>, + #[serde(default)] + words: Option>, + #[serde(default)] + flag_words: Option>, + #[serde(default)] + ignore_patterns: Option>, + #[serde(default)] + extra_dictionaries: Option>, + #[serde(default)] + extra_words: Option>, + #[serde(default)] + extra_flag_words: Option>, + #[serde(default)] + extra_ignore_patterns: Option>, + } + + let helper = Helper::deserialize(deserializer)?; + Ok(OverrideBlock { + paths: helper.paths, + // Lowercase word-related fields + dictionaries: to_lowercase_opt(helper.dictionaries), + words: to_lowercase_opt(helper.words), + flag_words: to_lowercase_opt(helper.flag_words), + extra_dictionaries: to_lowercase_opt(helper.extra_dictionaries), + extra_words: to_lowercase_opt(helper.extra_words), + extra_flag_words: to_lowercase_opt(helper.extra_flag_words), + // Don't lowercase patterns or paths + ignore_patterns: helper.ignore_patterns, + extra_ignore_patterns: helper.extra_ignore_patterns, + }) + } +} + +impl OverrideBlock { + /// Returns true if this override block is valid (has non-empty paths with at least one valid glob). + pub fn is_valid(&self) -> bool { + if self.paths.is_empty() { + return false; + } + self.paths.iter().any(|p| Pattern::new(p).is_ok()) + } + + /// Check if this override applies to the given relative file path. + pub fn matches_path(&self, relative_path: &Path) -> bool { + let path_str = relative_path.to_string_lossy(); + self.paths.iter().any(|pattern| { + Pattern::new(pattern) + .map(|p| p.matches(&path_str)) + .unwrap_or(false) + }) + } + + /// Returns true if any field besides `paths` is set (the override has an effect). + pub fn has_effect(&self) -> bool { + self.dictionaries.is_some() + || self.words.is_some() + || self.flag_words.is_some() + || self.ignore_patterns.is_some() + || self.extra_dictionaries.is_some() + || self.extra_words.is_some() + || self.extra_flag_words.is_some() + || self.extra_ignore_patterns.is_some() + } +} + #[derive(Debug, Serialize, Clone, PartialEq)] pub struct ConfigSettings { /// List of dictionaries to use for spell checking @@ -48,6 +167,10 @@ pub struct ConfigSettings { /// Tag prefixes to exclude (takes precedence over include_tags) #[serde(default, skip_serializing_if = "Vec::is_empty")] pub exclude_tags: Vec, + + /// Scoped configuration overrides + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub overrides: Vec, } fn default_use_global() -> bool { @@ -79,6 +202,7 @@ impl Default for ConfigSettings { min_word_length: default_min_word_length(), include_tags: Vec::new(), exclude_tags: Vec::new(), + overrides: Vec::new(), } } } @@ -110,12 +234,32 @@ impl<'de> Deserialize<'de> for ConfigSettings { include_tags: Vec, #[serde(default)] exclude_tags: Vec, + #[serde(default)] + overrides: Vec, } // Dictionary IDs are language codes (e.g. "en_US") — normalize to lowercase // so lookups are case-insensitive. Word lists keep their original casing and // are compared via unicase::eq. let helper = Helper::deserialize(deserializer)?; + + // Filter out invalid override blocks + let overrides: Vec = helper + .overrides + .into_iter() + .filter(|o| { + if !o.is_valid() { + warn!("Skipping invalid override block (empty or invalid paths)"); + return false; + } + if !o.has_effect() { + warn!("Skipping no-op override block (no settings specified)"); + return false; + } + true + }) + .collect(); + Ok(ConfigSettings { dictionaries: helper .dictionaries @@ -131,12 +275,14 @@ impl<'de> Deserialize<'de> for ConfigSettings { min_word_length: helper.min_word_length, include_tags: helper.include_tags, exclude_tags: helper.exclude_tags, + overrides, }) } } impl ConfigSettings { - /// Merge another config settings into this one, sorting and deduplicating all collections + /// Merge another config settings into this one, sorting and deduplicating all collections. + /// Overrides are appended (preserving order: self's overrides first, then other's). pub fn merge(&mut self, other: ConfigSettings) { // Add items from the other config self.dictionaries.extend(other.dictionaries); @@ -148,6 +294,9 @@ impl ConfigSettings { self.include_tags.extend(other.include_tags); self.exclude_tags.extend(other.exclude_tags); + // Append overrides (global first, then project — order matters) + self.overrides.extend(other.overrides); + // The use_global setting from the other config is ignored during merging // as this is a per-config setting @@ -156,11 +305,11 @@ impl ConfigSettings { self.min_word_length = other.min_word_length; } - // Sort and deduplicate each collection + // Sort and deduplicate each collection (but NOT overrides) self.sort_and_dedup(); } - /// Sort and deduplicate all collections in the config + /// Sort and deduplicate all collections in the config (but not overrides). pub fn sort_and_dedup(&mut self) { // Sort and deduplicate each Vec sort_and_dedup(&mut self.dictionaries); @@ -171,6 +320,54 @@ impl ConfigSettings { sort_and_dedup(&mut self.ignore_patterns); sort_and_dedup(&mut self.include_tags); sort_and_dedup(&mut self.exclude_tags); + // Note: overrides are NOT sorted — order matters for resolution + } + + /// Apply a single override block to this settings (mutates in place). + /// Replace fields are applied first, then append fields. + pub fn apply_override(&mut self, ovr: &OverrideBlock) { + // Replace fields: fully replace the list + if let Some(ref v) = ovr.dictionaries { + self.dictionaries = v.clone(); + } + if let Some(ref v) = ovr.words { + self.words = v.clone(); + } + if let Some(ref v) = ovr.flag_words { + self.flag_words = v.clone(); + } + if let Some(ref v) = ovr.ignore_patterns { + self.ignore_patterns = v.clone(); + } + + // Append fields: extend the current list + if let Some(ref v) = ovr.extra_dictionaries { + self.dictionaries.extend(v.clone()); + } + if let Some(ref v) = ovr.extra_words { + self.words.extend(v.clone()); + } + if let Some(ref v) = ovr.extra_flag_words { + self.flag_words.extend(v.clone()); + } + if let Some(ref v) = ovr.extra_ignore_patterns { + self.ignore_patterns.extend(v.clone()); + } + } + + /// Resolve the effective settings for a specific file path by applying matching overrides. + /// Returns a new ConfigSettings with overrides applied and the overrides list cleared. + pub fn resolve_for_path(&self, path: &Path) -> ConfigSettings { + let mut resolved = self.clone(); + resolved.overrides = vec![]; // Resolved config has no overrides + + for ovr in &self.overrides { + if ovr.matches_path(path) { + resolved.apply_override(ovr); + } + } + + resolved } } @@ -306,6 +503,7 @@ mod tests { assert_eq!(config.ignore_patterns, Vec::::new()); assert!(config.use_global); assert_eq!(config.min_word_length, 3); + assert!(config.overrides.is_empty()); } #[test] @@ -672,4 +870,310 @@ mod tests { assert_eq!(config.ignore_patterns, Vec::::new()); assert!(config.use_global); } + + // --- Override tests --- + + #[test] + fn test_override_block_deserialization() { + let toml_str = r#" + words = ["base"] + + [[overrides]] + paths = ["**/*.md"] + extra_words = ["Markdown"] + dictionaries = ["EN_GB"] + "#; + + let config: ConfigSettings = toml::from_str(toml_str).unwrap(); + assert_eq!(config.overrides.len(), 1); + let ovr = &config.overrides[0]; + assert_eq!(ovr.paths, vec!["**/*.md"]); + assert_eq!(ovr.extra_words, Some(vec!["markdown".to_string()])); // lowercased + assert_eq!(ovr.dictionaries, Some(vec!["en_gb".to_string()])); // lowercased + assert_eq!(ovr.words, None); + assert_eq!(ovr.ignore_patterns, None); + } + + #[test] + fn test_override_block_empty_paths_skipped() { + let toml_str = r#" + [[overrides]] + paths = [] + extra_words = ["test"] + "#; + + let config: ConfigSettings = toml::from_str(toml_str).unwrap(); + assert!(config.overrides.is_empty()); + } + + #[test] + fn test_override_block_no_effect_skipped() { + let toml_str = r#" + [[overrides]] + paths = ["**/*.md"] + "#; + + let config: ConfigSettings = toml::from_str(toml_str).unwrap(); + assert!(config.overrides.is_empty()); + } + + #[test] + fn test_override_matches_path() { + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string(), "docs/**/*".to_string()], + extra_words: Some(vec!["test".to_string()]), + ..OverrideBlock::default_for_test() + }; + + assert!(ovr.matches_path(Path::new("README.md"))); + assert!(ovr.matches_path(Path::new("src/guide.md"))); + assert!(ovr.matches_path(Path::new("docs/api/index.html"))); + assert!(!ovr.matches_path(Path::new("src/main.rs"))); + } + + #[test] + fn test_apply_override_replace() { + let mut settings = ConfigSettings { + words: vec!["alpha".to_string(), "beta".to_string()], + ..Default::default() + }; + + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string()], + words: Some(vec!["gamma".to_string()]), + ..OverrideBlock::default_for_test() + }; + + settings.apply_override(&ovr); + assert_eq!(settings.words, vec!["gamma"]); + } + + #[test] + fn test_apply_override_append() { + let mut settings = ConfigSettings { + words: vec!["alpha".to_string(), "beta".to_string()], + ..Default::default() + }; + + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["gamma".to_string()]), + ..OverrideBlock::default_for_test() + }; + + settings.apply_override(&ovr); + assert_eq!(settings.words, vec!["alpha", "beta", "gamma"]); + } + + #[test] + fn test_apply_override_replace_then_append() { + let mut settings = ConfigSettings { + words: vec!["alpha".to_string(), "beta".to_string()], + ..Default::default() + }; + + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string()], + words: Some(vec!["gamma".to_string()]), + extra_words: Some(vec!["delta".to_string()]), + ..OverrideBlock::default_for_test() + }; + + settings.apply_override(&ovr); + assert_eq!(settings.words, vec!["gamma", "delta"]); + } + + #[test] + fn test_apply_override_no_change() { + let mut settings = ConfigSettings { + words: vec!["alpha".to_string()], + dictionaries: vec!["en_us".to_string()], + ..Default::default() + }; + + let ovr = OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_flag_words: Some(vec!["hack".to_string()]), + ..OverrideBlock::default_for_test() + }; + + settings.apply_override(&ovr); + // words and dictionaries unchanged + assert_eq!(settings.words, vec!["alpha"]); + assert_eq!(settings.dictionaries, vec!["en_us"]); + // flag_words changed + assert_eq!(settings.flag_words, vec!["hack"]); + } + + #[test] + fn test_resolve_for_path_no_match() { + let settings = ConfigSettings { + words: vec!["base".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["markdown".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let resolved = settings.resolve_for_path(Path::new("src/main.rs")); + assert_eq!(resolved.words, vec!["base"]); + assert!(resolved.overrides.is_empty()); + } + + #[test] + fn test_resolve_for_path_single_match() { + let settings = ConfigSettings { + words: vec!["base".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["markdown".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let resolved = settings.resolve_for_path(Path::new("README.md")); + assert_eq!(resolved.words, vec!["base", "markdown"]); + assert!(resolved.overrides.is_empty()); + } + + #[test] + fn test_resolve_for_path_multiple_matches() { + let settings = ConfigSettings { + words: vec!["base".to_string()], + overrides: vec![ + OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["markdown".to_string()]), + ..OverrideBlock::default_for_test() + }, + OverrideBlock { + paths: vec!["docs/**/*".to_string()], + extra_words: Some(vec!["documentation".to_string()]), + ..OverrideBlock::default_for_test() + }, + ], + ..Default::default() + }; + + let resolved = settings.resolve_for_path(Path::new("docs/guide.md")); + assert_eq!(resolved.words, vec!["base", "markdown", "documentation"]); + } + + #[test] + fn test_resolve_for_path_replace_overrides_base() { + let settings = ConfigSettings { + dictionaries: vec!["en_us".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["docs/de/**/*".to_string()], + dictionaries: Some(vec!["de".to_string()]), + extra_words: Some(vec!["codebook".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let resolved = settings.resolve_for_path(Path::new("docs/de/guide.md")); + assert_eq!(resolved.dictionaries, vec!["de"]); + assert_eq!(resolved.words, vec!["codebook"]); + } + + #[test] + fn test_merge_preserves_override_order() { + let mut global = ConfigSettings { + words: vec!["global".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["from_global".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let project = ConfigSettings { + words: vec!["project".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["from_project".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + global.merge(project); + + // Overrides should be: global first, then project + assert_eq!(global.overrides.len(), 2); + assert_eq!( + global.overrides[0].extra_words, + Some(vec!["from_global".to_string()]) + ); + assert_eq!( + global.overrides[1].extra_words, + Some(vec!["from_project".to_string()]) + ); + } + + #[test] + fn test_serialization_with_overrides() { + let config = ConfigSettings { + words: vec!["base".to_string()], + overrides: vec![OverrideBlock { + paths: vec!["**/*.md".to_string()], + extra_words: Some(vec!["markdown".to_string()]), + ..OverrideBlock::default_for_test() + }], + ..Default::default() + }; + + let serialized = toml::to_string_pretty(&config).unwrap(); + let deserialized: ConfigSettings = toml::from_str(&serialized).unwrap(); + + assert_eq!(config, deserialized); + } + + #[test] + fn test_config_settings_query_methods() { + let settings = ConfigSettings { + dictionaries: vec!["en_us".to_string()], + words: vec!["codebook".to_string()], + flag_words: vec!["todo".to_string()], + min_word_length: 4, + ..Default::default() + }; + + assert_eq!(settings.dictionary_ids(), vec!["en_us"]); + assert!(settings.is_allowed_word("codebook")); + assert!(settings.is_allowed_word("CODEBOOK")); // case insensitive + assert!(!settings.is_allowed_word("unknown")); + assert!(settings.should_flag_word("todo")); + assert!(settings.should_flag_word("TODO")); // case insensitive + assert!(!settings.should_flag_word("done")); + assert_eq!(settings.min_word_length(), 4); + } + + #[test] + fn test_dictionary_ids_default() { + let settings = ConfigSettings::default(); + assert_eq!(settings.dictionary_ids(), vec!["en_us"]); + } + + impl OverrideBlock { + /// Helper for tests: creates an OverrideBlock with all fields set to None/empty. + fn default_for_test() -> Self { + Self { + paths: vec![], + dictionaries: None, + words: None, + flag_words: None, + ignore_patterns: None, + extra_dictionaries: None, + extra_words: None, + extra_flag_words: None, + extra_ignore_patterns: None, + } + } + } } diff --git a/crates/codebook/src/checker.rs b/crates/codebook/src/checker.rs index 95a2dff..ff04e8a 100644 --- a/crates/codebook/src/checker.rs +++ b/crates/codebook/src/checker.rs @@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet}; use crate::dictionaries::dictionary::Dictionary; use crate::parser::{TextRange, WordLocation}; -use codebook_config::CodebookConfig; +use codebook_config::{CodebookConfig, ConfigSettings}; /// A candidate word extracted from a text node, with its position /// in original-document byte offsets. Borrows the word text from the @@ -16,11 +16,13 @@ pub struct WordCandidate<'a> { /// Check candidate words against dictionaries and config rules. /// Returns WordLocations for misspelled words, grouping all locations -/// of the same word together. +/// of the same word together. When `resolved` is Some, its per-file +/// settings take precedence over the base config for flag/allow/min-length. pub fn check_words( candidates: &[WordCandidate<'_>], dictionaries: &[std::sync::Arc], config: &dyn CodebookConfig, + resolved: Option<&ConfigSettings>, ) -> Vec { // Group candidates by word text, deduplicating identical spans. let mut word_positions: HashMap<&str, HashSet> = HashMap::new(); @@ -41,18 +43,31 @@ pub fn check_words( ); } + let should_flag = |w: &str| match resolved { + Some(s) => s.should_flag_word(w), + None => config.should_flag_word(w), + }; + let min_word_length = match resolved { + Some(s) => s.min_word_length(), + None => config.get_min_word_length(), + }; + let is_allowed = |w: &str| match resolved { + Some(s) => s.is_allowed_word(w), + None => config.is_allowed_word(w), + }; + // Check each unique word once let mut results = Vec::new(); for (word, positions) in word_positions { let positions: Vec = positions.into_iter().collect(); - if config.should_flag_word(word) { + if should_flag(word) { results.push(WordLocation::new(word.to_string(), positions)); continue; } - if word.len() < config.get_min_word_length() { + if word.len() < min_word_length { continue; } - if config.is_allowed_word(word) { + if is_allowed(word) { continue; } let is_correct = dictionaries.iter().any(|dict| dict.check(word)); @@ -85,7 +100,7 @@ mod tests { let dict = Arc::new(TextDictionary::new("hello\nworld\n")); let config = Arc::new(codebook_config::CodebookConfigMemory::default()); let candidates = make_candidates(&[("hello", 0, 5), ("wrld", 6, 10)]); - let results = check_words(&candidates, &[dict], config.as_ref()); + let results = check_words(&candidates, &[dict], config.as_ref(), None); assert_eq!(results.len(), 1); assert_eq!(results[0].word, "wrld"); } @@ -95,7 +110,7 @@ mod tests { let dict = Arc::new(TextDictionary::new("hello\n")); let config = Arc::new(codebook_config::CodebookConfigMemory::default()); let candidates = make_candidates(&[("wrld", 0, 4), ("wrld", 10, 14)]); - let results = check_words(&candidates, &[dict], config.as_ref()); + let results = check_words(&candidates, &[dict], config.as_ref(), None); assert_eq!(results.len(), 1); assert_eq!(results[0].word, "wrld"); assert_eq!(results[0].locations.len(), 2); @@ -107,7 +122,7 @@ mod tests { let config = Arc::new(codebook_config::CodebookConfigMemory::default()); // Default min word length is 3 let candidates = make_candidates(&[("ab", 0, 2)]); - let results = check_words(&candidates, &[dict], config.as_ref()); + let results = check_words(&candidates, &[dict], config.as_ref(), None); assert!(results.is_empty(), "Short words should be skipped"); } @@ -117,7 +132,7 @@ mod tests { let config = Arc::new(codebook_config::CodebookConfigMemory::default()); config.add_word("codebook").unwrap(); let candidates = make_candidates(&[("codebook", 0, 8)]); - let results = check_words(&candidates, &[dict], config.as_ref()); + let results = check_words(&candidates, &[dict], config.as_ref(), None); assert!(results.is_empty(), "Allowed words should not be flagged"); } } diff --git a/crates/codebook/src/lib.rs b/crates/codebook/src/lib.rs index 19cfe95..6a65364 100644 --- a/crates/codebook/src/lib.rs +++ b/crates/codebook/src/lib.rs @@ -11,7 +11,8 @@ use std::collections::HashSet; use std::path::Path; use std::sync::Arc; -use codebook_config::CodebookConfig; +use codebook_config::helpers::build_ignore_regexes; +use codebook_config::{CodebookConfig, ConfigSettings}; use dictionaries::{dictionary, manager::DictionaryManager}; use dictionary::Dictionary; use log::debug; @@ -39,6 +40,7 @@ impl Codebook { language: Option, file_path: Option<&str>, ) -> Vec { + // ignore_paths and include_paths are evaluated BEFORE overrides if let Some(file_path) = file_path { if self.config.should_ignore_path(Path::new(file_path)) { return Vec::new(); @@ -48,11 +50,16 @@ impl Codebook { } } + // Resolve per-file settings (applies matching overrides) + let resolved = file_path.and_then(|fp| self.config.resolve_for_file(Path::new(fp))); + let language = self.resolve_language(language, file_path); // Combine default and user skip patterns let mut all_patterns = get_default_skip_patterns().clone(); - if let Some(user_patterns) = self.config.get_ignore_patterns() { + if let Some(ref settings) = resolved { + all_patterns.extend(build_ignore_regexes(&settings.ignore_patterns)); + } else if let Some(user_patterns) = self.config.get_ignore_patterns() { all_patterns.extend(user_patterns); } @@ -64,11 +71,17 @@ impl Codebook { &all_patterns, ); - // Load dictionaries for all languages encountered - let dictionaries = self.get_dictionaries_for_languages(&languages_found); + // Load dictionaries for all languages encountered (using resolved settings if any) + let dictionaries = + self.get_dictionaries_for_languages(&languages_found, resolved.as_deref()); // Check words against dictionaries - checker::check_words(&candidates, &dictionaries, self.config.as_ref()) + checker::check_words( + &candidates, + &dictionaries, + self.config.as_ref(), + resolved.as_deref(), + ) } fn resolve_language( @@ -86,11 +99,16 @@ impl Codebook { } /// Gather dictionaries for all languages encountered in a file. + /// If `resolved` is Some, its dictionary list is used in place of the base config's. fn get_dictionaries_for_languages( &self, languages: &HashSet, + resolved: Option<&ConfigSettings>, ) -> Vec> { - let mut dictionary_ids = self.config.get_dictionary_ids(); + let mut dictionary_ids = match resolved { + Some(settings) => settings.dictionary_ids(), + None => self.config.get_dictionary_ids(), + }; for lang in languages { dictionary_ids.extend(lang.dictionary_ids()); @@ -119,7 +137,7 @@ impl Codebook { pub fn get_suggestions(&self, word: &str) -> Option> { let max_results = 5; - let dictionaries = self.get_dictionaries_for_languages(&HashSet::new()); + let dictionaries = self.get_dictionaries_for_languages(&HashSet::new(), None); let mut is_misspelled = false; let suggestions: Vec> = dictionaries .iter()