From f8abe9e957dbcf5d2c0ac9c8431f3a1766d246f0 Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Sun, 26 Apr 2026 23:32:02 -0400
Subject: [PATCH 1/4] feat: unified NormalizeOptions API + fix #23 compound
 concat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address two pieces of feedback from @hongbo-miao:

- Issue #15 comment: instead of separate `normalize_sentence_aviation`
  variants, expose a unified entry point with an options struct.
- Issue #23 comment: prefer a generic flag (not a `Domain` label) since
  other code-style speech contexts want the same "stop adding two
  numbers" behavior.

API
---
- New `NormalizeOptions { concat_compound_numbers, max_span_tokens }`
  with builder helpers.
- New `normalize_with_options` and `normalize_sentence_with_options`
  unified entry points.
- Existing `normalize_aviation`, `normalize_sentence_aviation*`,
  `normalize_sentence_with_max_span` stay as thin wrappers — no
  breaking change for current callers.
- FFI: `nemo_normalize_with_options(input, concat)` and
  `nemo_normalize_sentence_with_options(input, concat, max_span)`.
- WASM: `normalizeWithOptions` / `normalizeSentenceWithOptions`.

Issue #23 fix
-------------
`words_to_number_aviation` previously only handled digit-prefix +
grammatical compound (`"seven eighty eight"` → `"788"`). It still added
consecutive grammatical compounds together, so
`"thirty five sixty two"` resolved to `"97"` (= 35 + 62).

Replaced the digit-prefix path with a general `peel_compound_chunks`
helper that greedily splits a phrase into 0-99 chunks and concatenates
them when there are 2+. Single-chunk inputs (`"twenty one"`) still go
through grammatical, and any phrase with a scale word
(`"two thousand seventeen"`) keeps its addition semantics.

Updated one stale test (`"twenty one forty two"` was locking in the
buggy `63`; it now correctly produces `2142`).
---
 src/ffi.rs             |  94 ++++++++++++++++++++++++++-
 src/itn/en/cardinal.rs |  74 ++++++++++++++++------
 src/lib.rs             | 140 +++++++++++++++++++++++++++++++++++++++--
 src/wasm.rs            |  42 ++++++++++++-
 tests/en_tests.rs      | 131 +++++++++++++++++++++++++++++++++++++-
 5 files changed, 450 insertions(+), 31 deletions(-)

diff --git a/src/ffi.rs b/src/ffi.rs
index 0086a6e..d5f50a8 100644
--- a/src/ffi.rs
+++ b/src/ffi.rs
@@ -5,11 +5,30 @@ use std::ptr;
 
 use crate::{
     custom_rules, normalize, normalize_aviation, normalize_sentence, normalize_sentence_aviation,
-    normalize_sentence_aviation_with_max_span, normalize_sentence_with_max_span, tn_normalize,
-    tn_normalize_lang, tn_normalize_sentence, tn_normalize_sentence_lang,
-    tn_normalize_sentence_with_max_span, tn_normalize_sentence_with_max_span_lang,
+    normalize_sentence_aviation_with_max_span, normalize_sentence_with_max_span,
+    normalize_sentence_with_options, normalize_with_options, tn_normalize, tn_normalize_lang,
+    tn_normalize_sentence, tn_normalize_sentence_lang, tn_normalize_sentence_with_max_span,
+    tn_normalize_sentence_with_max_span_lang, NormalizeOptions,
 };
 
+/// Build [`NormalizeOptions`] from FFI primitives.
+///
+/// `concat_compound_numbers`: any non-zero value enables concat behavior
+/// (`"thirty five sixty two"` → `"3562"`, `"seven eighty eight"` → `"788"`).
+///
+/// `max_span_tokens`: `0` means "use library default" (16); any positive
+/// value is a caller-specified max span.
+fn options_from_ffi(concat_compound_numbers: u32, max_span_tokens: u32) -> NormalizeOptions {
+    NormalizeOptions {
+        concat_compound_numbers: concat_compound_numbers != 0,
+        max_span_tokens: if max_span_tokens == 0 {
+            None
+        } else {
+            Some(max_span_tokens as usize)
+        },
+    }
+}
+
 /// Normalize spoken-form text to written form.
 ///
 /// # Safety
@@ -174,6 +193,75 @@ pub unsafe extern "C" fn nemo_normalize_sentence_aviation_with_max_span(
     }
 }
 
+/// Unified single-expression normalize with caller-specified options.
+///
+/// `concat_compound_numbers`: `0` for standard ITN, non-zero for
+/// concat-compound (aviation-style) reading where consecutive number words
+/// concatenate rather than add — e.g. `"thirty five sixty two"` → `"3562"`,
+/// `"seven eighty eight"` → `"788"`.
+///
+/// # Safety
+/// - `input` must be a valid null-terminated UTF-8 string
+/// - Returns a newly allocated string that must be freed with `nemo_free_string`
+#[no_mangle]
+pub unsafe extern "C" fn nemo_normalize_with_options(
+    input: *const c_char,
+    concat_compound_numbers: u32,
+) -> *mut c_char {
+    if input.is_null() {
+        return ptr::null_mut();
+    }
+
+    let c_str = match CStr::from_ptr(input).to_str() {
+        Ok(s) => s,
+        Err(_) => return ptr::null_mut(),
+    };
+
+    let options = options_from_ffi(concat_compound_numbers, 0);
+    let result = normalize_with_options(c_str, options);
+
+    match CString::new(result) {
+        Ok(c_string) => c_string.into_raw(),
+        Err(_) => ptr::null_mut(),
+    }
+}
+
+/// Unified sentence normalize with caller-specified options.
+///
+/// `concat_compound_numbers`: `0` for standard ITN, non-zero for
+/// concat-compound reading.
+///
+/// `max_span_tokens`:
+/// - `0` — use library default (`16`).
+/// - `>0` — use the specified max span.
+///
+/// # Safety
+/// - `input` must be a valid null-terminated UTF-8 string
+/// - Returns a newly allocated string that must be freed with `nemo_free_string`
+#[no_mangle]
+pub unsafe extern "C" fn nemo_normalize_sentence_with_options(
+    input: *const c_char,
+    concat_compound_numbers: u32,
+    max_span_tokens: u32,
+) -> *mut c_char {
+    if input.is_null() {
+        return ptr::null_mut();
+    }
+
+    let c_str = match CStr::from_ptr(input).to_str() {
+        Ok(s) => s,
+        Err(_) => return ptr::null_mut(),
+    };
+
+    let options = options_from_ffi(concat_compound_numbers, max_span_tokens);
+    let result = normalize_sentence_with_options(c_str, options);
+
+    match CString::new(result) {
+        Ok(c_string) => c_string.into_raw(),
+        Err(_) => ptr::null_mut(),
+    }
+}
+
 /// Free a string allocated by nemo_normalize or nemo_normalize_sentence.
 ///
 /// # Safety
diff --git a/src/itn/en/cardinal.rs b/src/itn/en/cardinal.rs
index 71d4e14..3bc33c7 100644
--- a/src/itn/en/cardinal.rs
+++ b/src/itn/en/cardinal.rs
@@ -199,10 +199,17 @@ pub fn words_to_number(input: &str) -> Option<i128> {
 
 /// Aviation / flight-number / call-sign reading of a number phrase.
 ///
-/// Recognises a leading run of single-digit words concatenated with a trailing
-/// grammatical compound, e.g. `"seven eighty eight"` → `788`,
-/// `"two thirty five"` → `235`. Falls back to [`words_to_number`] when the
-/// aviation pattern does not apply (no digit prefix, scale word present, etc.).
+/// Recognises consecutive 0-99 compounds and concatenates them rather than
+/// summing. Examples:
+/// - `"seven eighty eight"` → `788` (digit + tens+ones compound)
+/// - `"two thirty five"` → `235`
+/// - `"thirty five sixty two"` → `3562` (two tens+ones compounds — fixes #23)
+/// - `"twenty one"` → `21` (single chunk; identical to grammatical)
+///
+/// Falls back to [`words_to_number`] (grammatical addition) when the chunk
+/// pattern does not apply, including any phrase containing a scale word
+/// (`hundred`, `thousand`, ...). This preserves `"two thousand seventeen"`
+/// → `2017`.
 ///
 /// This is **opt-in**: callers reach for it explicitly from flight-number /
 /// call-sign contexts. Generic ITN/TN dispatch keeps using [`words_to_number`]
@@ -230,22 +237,13 @@ pub fn words_to_number_aviation(input: &str) -> Option<i128> {
             .ok();
     }
 
-    // Aviation flight-number style: digit prefix + grammatical compound.
-    // "seven eighty eight" → "7" ‖ 88 = 788. Skipped if a scale word appears,
-    // since "two thousand seventeen" must stay grammatical (= 2017, not 22017).
+    // Concatenated 0-99 compound chunks. Skipped if a scale word appears,
+    // since `"two thousand seventeen"` must stay grammatical (= 2017).
     let has_scale = words.iter().any(|w| SCALES.contains_key(*w));
     if !has_scale {
-        let prefix_len = words
-            .iter()
-            .take_while(|w| single_digit_char(w).is_some())
-            .count();
-        if prefix_len >= 1 && prefix_len < words.len() {
-            if let Some(rest_num) = grammatical_words_to_number(&words[prefix_len..]) {
-                let prefix: String = words[..prefix_len]
-                    .iter()
-                    .map(|w| single_digit_char(w).unwrap())
-                    .collect();
-                let combined = format!("{}{}", prefix, rest_num);
+        if let Some(chunks) = peel_compound_chunks(&words) {
+            if chunks.len() >= 2 {
+                let combined: String = chunks.iter().map(|n| n.to_string()).collect();
                 return combined.parse::<i128>().ok();
             }
         }
@@ -254,6 +252,46 @@ pub fn words_to_number_aviation(input: &str) -> Option<i128> {
     grammatical_words_to_number(&words)
 }
 
+/// Greedily peel `words` into 0-99 number chunks. Each chunk is one of:
+/// - A single ONES word (0-19), e.g. `"seven"` → 7, `"sixteen"` → 16
+/// - A single TENS word (20, 30, ... 90), e.g. `"twenty"` → 20
+/// - A TENS word followed by a ones word (1-9), e.g. `"twenty one"` → 21
+///
+/// Returns `None` if any token isn't a recognised number word, so this
+/// function refuses to swallow non-number tokens. `"and"` / `"a"` filler
+/// must already be removed by the caller.
+fn peel_compound_chunks(words: &[&str]) -> Option<Vec<i128>> {
+    let mut chunks = Vec::new();
+    let mut i = 0;
+    while i < words.len() {
+        if let Some(&tens) = TENS.get(words[i]) {
+            // Greedy: try TENS + ones (1-9) before falling back to standalone.
+            if i + 1 < words.len() {
+                if let Some(&ones) = ONES.get(words[i + 1]) {
+                    if (1..=9).contains(&ones) {
+                        chunks.push((tens + ones) as i128);
+                        i += 2;
+                        continue;
+                    }
+                }
+            }
+            chunks.push(tens as i128);
+            i += 1;
+        } else if let Some(&ones) = ONES.get(words[i]) {
+            // 0-19 standalone (covers digit words, ten, and teens).
+            chunks.push(ones as i128);
+            i += 1;
+        } else {
+            return None;
+        }
+    }
+    if chunks.is_empty() {
+        None
+    } else {
+        Some(chunks)
+    }
+}
+
 /// Parse a grammatical English number with running-sum + scale multiplication.
 fn grammatical_words_to_number(words: &[&str]) -> Option<i128> {
     // "eleven hundred" = 1100, "twenty hundred" = 2000
diff --git a/src/lib.rs b/src/lib.rs
index dbc0c46..df3c2f5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -30,6 +30,68 @@ use itn::en::{
     whitelist, word,
 };
 
+/// Options for the unified [`normalize_with_options`] /
+/// [`normalize_sentence_with_options`] entry points.
+///
+/// Keeping options on a struct (rather than separate `*_aviation` /
+/// `*_with_max_span` functions) lets new knobs land without exploding the
+/// public API surface — see issues #15 and #23 for the motivating discussion.
+///
+/// The flags are intentionally orthogonal and *not* tied to a particular
+/// domain. Aviation, military codes, dispatch IDs, etc. all reuse the same
+/// underlying behavior toggles.
+///
+/// # Examples
+///
+/// ```
+/// use text_processing_rs::{normalize_sentence_with_options, NormalizeOptions};
+///
+/// let opts = NormalizeOptions {
+///     concat_compound_numbers: true,
+///     max_span_tokens: Some(8),
+/// };
+/// assert_eq!(
+///     normalize_sentence_with_options("United seven eighty eight", opts),
+///     "United 788"
+/// );
+/// ```
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub struct NormalizeOptions {
+    /// When `true`, sequences of spoken number words concatenate rather than
+    /// add. `"thirty five sixty two"` → `"3562"` (instead of `35 + 62 = 97`)
+    /// and `"seven eighty eight"` → `"788"`. Aviation, flight-numbers,
+    /// call-signs, and other code-style readings want this on.
+    ///
+    /// Scale-word grammar is preserved: `"two thousand seventeen"` still
+    /// resolves to `"2017"` regardless of this flag.
+    pub concat_compound_numbers: bool,
+    /// Maximum span size (tokens) considered in sentence mode. `None` means
+    /// use [`DEFAULT_MAX_SPAN_TOKENS`]. Ignored by [`normalize_with_options`].
+    pub max_span_tokens: Option<usize>,
+}
+
+impl NormalizeOptions {
+    /// Default options: standard ITN dispatch, default max span.
+    pub const fn new() -> Self {
+        Self {
+            concat_compound_numbers: false,
+            max_span_tokens: None,
+        }
+    }
+
+    /// Enable / disable compound-number concatenation.
+    pub const fn with_concat_compound_numbers(mut self, enabled: bool) -> Self {
+        self.concat_compound_numbers = enabled;
+        self
+    }
+
+    /// Set the sentence-mode max span (in tokens).
+    pub const fn with_max_span_tokens(mut self, max_span_tokens: usize) -> Self {
+        self.max_span_tokens = Some(max_span_tokens);
+        self
+    }
+}
+
 /// Normalize spoken-form text to written form.
 ///
 /// Tries taggers in order of specificity (most specific first).
@@ -135,6 +197,36 @@ pub fn normalize(input: &str) -> String {
 /// assert_eq!(normalize_aviation("hello world"), "hello world");
 /// ```
 pub fn normalize_aviation(input: &str) -> String {
+    normalize_with_options(
+        input,
+        NormalizeOptions::new().with_concat_compound_numbers(true),
+    )
+}
+
+/// Unified single-expression normalize entry point.
+///
+/// Switches between standard and concat-compound (aviation-style) dispatch
+/// based on `options.concat_compound_numbers`. The `max_span_tokens` field on
+/// [`NormalizeOptions`] is ignored here — it only applies to
+/// [`normalize_sentence_with_options`].
+///
+/// ```
+/// use text_processing_rs::{normalize_with_options, NormalizeOptions};
+///
+/// let opts = NormalizeOptions::new().with_concat_compound_numbers(true);
+/// assert_eq!(normalize_with_options("seven eighty eight", opts), "788");
+/// ```
+pub fn normalize_with_options(input: &str, options: NormalizeOptions) -> String {
+    if options.concat_compound_numbers {
+        normalize_aviation_inner(input)
+    } else {
+        normalize(input)
+    }
+}
+
+/// Aviation single-expression dispatch. Kept private; callers go through
+/// [`normalize_aviation`] or [`normalize_with_options`].
+fn normalize_aviation_inner(input: &str) -> String {
     let input = input.trim();
 
     // High-confidence rules still win.
@@ -152,7 +244,7 @@ pub fn normalize_aviation(input: &str) -> String {
     }
 
     // Aviation cardinal beats time/date here. This is the whole point of
-    // calling `normalize_aviation` instead of `normalize`.
+    // the aviation domain.
     if let Some(num) = cardinal::parse_aviation(input) {
         return num;
     }
@@ -975,6 +1067,35 @@ pub fn normalize_sentence(input: &str) -> String {
     normalize_sentence_with_max_span(input, DEFAULT_MAX_SPAN_TOKENS)
 }
 
+/// Unified sentence-mode entry point.
+///
+/// Combines `concat_compound_numbers` and `max_span_tokens` configuration in
+/// a single call. When `max_span_tokens` is `None`, [`DEFAULT_MAX_SPAN_TOKENS`]
+/// (16) is used.
+///
+/// ```
+/// use text_processing_rs::{normalize_sentence_with_options, NormalizeOptions};
+///
+/// // Default behavior, default span
+/// assert_eq!(
+///     normalize_sentence_with_options("I have twenty one apples", NormalizeOptions::new()),
+///     "I have 21 apples"
+/// );
+///
+/// // Concat-compound (aviation-style), custom span
+/// let opts = NormalizeOptions::new()
+///     .with_concat_compound_numbers(true)
+///     .with_max_span_tokens(8);
+/// assert_eq!(
+///     normalize_sentence_with_options("United seven eighty eight", opts),
+///     "United 788"
+/// );
+/// ```
+pub fn normalize_sentence_with_options(input: &str, options: NormalizeOptions) -> String {
+    let max_span = options.max_span_tokens.unwrap_or(DEFAULT_MAX_SPAN_TOKENS);
+    normalize_sentence_inner(input, max_span, options.concat_compound_numbers)
+}
+
 /// Sentence-mode equivalent of [`normalize_aviation`]. Aviation cardinal
 /// runs at priority 89 (above `date`=88 / `time`=85, below `measure`=90 /
 /// `money`=95), so flight-number-style spans win over date/time while
@@ -999,12 +1120,20 @@ pub fn normalize_sentence(input: &str) -> String {
 /// );
 /// ```
 pub fn normalize_sentence_aviation(input: &str) -> String {
-    normalize_sentence_aviation_with_max_span(input, DEFAULT_MAX_SPAN_TOKENS)
+    normalize_sentence_with_options(
+        input,
+        NormalizeOptions::new().with_concat_compound_numbers(true),
+    )
 }
 
 /// [`normalize_sentence_aviation`] with a configurable max span size.
 pub fn normalize_sentence_aviation_with_max_span(input: &str, max_span_tokens: usize) -> String {
-    normalize_sentence_inner(input, max_span_tokens, true)
+    normalize_sentence_with_options(
+        input,
+        NormalizeOptions::new()
+            .with_concat_compound_numbers(true)
+            .with_max_span_tokens(max_span_tokens),
+    )
 }
 
 /// Normalize a full sentence with a configurable max span size.
@@ -1021,7 +1150,10 @@ pub fn normalize_sentence_aviation_with_max_span(input: &str, max_span_tokens: u
 /// assert_eq!(normalize_sentence_with_max_span("I have twenty one apples", 4), "I have 21 apples");
 /// ```
 pub fn normalize_sentence_with_max_span(input: &str, max_span_tokens: usize) -> String {
-    normalize_sentence_inner(input, max_span_tokens, false)
+    normalize_sentence_with_options(
+        input,
+        NormalizeOptions::new().with_max_span_tokens(max_span_tokens),
+    )
 }
 
 /// Sentence-mode dispatch loop. The `aviation` flag is forwarded to
diff --git a/src/wasm.rs b/src/wasm.rs
index 76b54da..5a3d375 100644
--- a/src/wasm.rs
+++ b/src/wasm.rs
@@ -5,11 +5,27 @@ use wasm_bindgen::prelude::*;
 use crate::{
     custom_rules, normalize, normalize_aviation, normalize_sentence, normalize_sentence_aviation,
     normalize_sentence_aviation_with_max_span, normalize_sentence_with_max_span,
-    normalize_with_lang, tn_normalize, tn_normalize_lang, tn_normalize_sentence,
-    tn_normalize_sentence_lang, tn_normalize_sentence_with_max_span,
-    tn_normalize_sentence_with_max_span_lang,
+    normalize_sentence_with_options, normalize_with_lang, normalize_with_options, tn_normalize,
+    tn_normalize_lang, tn_normalize_sentence, tn_normalize_sentence_lang,
+    tn_normalize_sentence_with_max_span, tn_normalize_sentence_with_max_span_lang,
+    NormalizeOptions,
 };
 
+/// Build [`NormalizeOptions`] from JS-friendly primitives.
+///
+/// `max_span_tokens == 0` is treated as "use library default" so JS callers
+/// can pass `0` rather than dealing with optional values across the boundary.
+fn js_options(concat_compound_numbers: bool, max_span_tokens: u32) -> NormalizeOptions {
+    NormalizeOptions {
+        concat_compound_numbers,
+        max_span_tokens: if max_span_tokens == 0 {
+            None
+        } else {
+            Some(max_span_tokens as usize)
+        },
+    }
+}
+
 /// Initialize panic hook for better error messages in browser devtools.
 #[wasm_bindgen]
 pub fn set_panic_hook() {
@@ -51,6 +67,26 @@ pub fn normalize_sentence_aviation_with_max_span_js(input: &str, max_span_tokens
     normalize_sentence_aviation_with_max_span(input, max_span_tokens as usize)
 }
 
+/// Unified single-expression normalize. `concatCompoundNumbers=true` reads
+/// consecutive number words as concatenation rather than addition, e.g.
+/// `"thirty five sixty two"` → `"3562"`, `"seven eighty eight"` → `"788"`.
+#[wasm_bindgen(js_name = normalizeWithOptions)]
+pub fn normalize_with_options_js(input: &str, concat_compound_numbers: bool) -> String {
+    normalize_with_options(input, js_options(concat_compound_numbers, 0))
+}
+
+/// Unified sentence normalize. `concatCompoundNumbers` mirrors the
+/// single-expression flag; `maxSpanTokens == 0` means "use library default"
+/// (16).
+#[wasm_bindgen(js_name = normalizeSentenceWithOptions)]
+pub fn normalize_sentence_with_options_js(
+    input: &str,
+    concat_compound_numbers: bool,
+    max_span_tokens: u32,
+) -> String {
+    normalize_sentence_with_options(input, js_options(concat_compound_numbers, max_span_tokens))
+}
+
 #[wasm_bindgen(js_name = tnNormalize)]
 pub fn tn_normalize_js(input: &str) -> String {
     tn_normalize(input)
diff --git a/tests/en_tests.rs b/tests/en_tests.rs
index 414144b..2a0bd62 100644
--- a/tests/en_tests.rs
+++ b/tests/en_tests.rs
@@ -8,7 +8,8 @@ mod common;
 use std::path::Path;
 use text_processing_rs::{
     custom_rules, normalize, normalize_aviation, normalize_sentence, normalize_sentence_aviation,
-    normalize_sentence_with_max_span,
+    normalize_sentence_with_max_span, normalize_sentence_with_options, normalize_with_options,
+    NormalizeOptions,
 };
 
 fn print_failures(results: &common::TestResults) {
@@ -928,8 +929,9 @@ fn test_issue_14_normalize_aviation() {
     assert_eq!(normalize_aviation("seven eighty eight"), "788");
     // Beats time tagger.
     assert_eq!(normalize_aviation("two thirty five"), "235");
-    // Beats date old-year reading.
-    assert_eq!(normalize_aviation("twenty one forty two"), "63");
+    // Two consecutive 0-99 compounds concatenate (issue #23 fix).
+    // Was previously `"63"` (= 20+1+40+2) under the old grammatical fallback.
+    assert_eq!(normalize_aviation("twenty one forty two"), "2142");
     // Non-number phrases fall through unchanged.
     assert_eq!(normalize_aviation("hello world"), "hello world");
     // Money / measure / decimal / ordinal still work via fallback to
@@ -974,3 +976,126 @@ fn test_issue_14_normalize_sentence_aviation() {
         "I have 21 apples"
     );
 }
+
+// ── Unified options API (issues #15 and #23 follow-up) ────────────────
+
+/// `NormalizeOptions::default()` should behave identically to `normalize`.
+#[test]
+fn test_options_default_matches_normalize() {
+    let opts = NormalizeOptions::default();
+    assert_eq!(normalize_with_options("two hundred", opts), "200");
+    assert_eq!(normalize_with_options("five dollars", opts), "$5");
+    // Time tagger still wins by default.
+    assert_eq!(normalize_with_options("two thirty five", opts), "02:35");
+}
+
+/// `concat_compound_numbers: true` should make `normalize_with_options`
+/// behave like `normalize_aviation`.
+#[test]
+fn test_options_concat_matches_aviation() {
+    let opts = NormalizeOptions::new().with_concat_compound_numbers(true);
+    assert_eq!(normalize_with_options("seven eighty eight", opts), "788");
+    assert_eq!(normalize_with_options("two thirty five", opts), "235");
+    assert_eq!(normalize_with_options("hello world", opts), "hello world");
+    // Money / scale-word fallthrough still works.
+    assert_eq!(normalize_with_options("five dollars", opts), "$5");
+    assert_eq!(
+        normalize_with_options("two thousand seventeen", opts),
+        "2017"
+    );
+}
+
+/// Sentence mode default options match `normalize_sentence`.
+#[test]
+fn test_sentence_options_default_matches_default() {
+    let opts = NormalizeOptions::default();
+    assert_eq!(
+        normalize_sentence_with_options("I have twenty one apples", opts),
+        "I have 21 apples"
+    );
+    assert_eq!(
+        normalize_sentence_with_options("hello world", opts),
+        "hello world"
+    );
+}
+
+/// Sentence mode with concat enabled matches `normalize_sentence_aviation`.
+#[test]
+fn test_sentence_options_concat_compound() {
+    let opts = NormalizeOptions::new().with_concat_compound_numbers(true);
+    assert_eq!(
+        normalize_sentence_with_options("United seven eighty eight", opts),
+        "United 788"
+    );
+    assert_eq!(
+        normalize_sentence_with_options("flight two thirty five departs at gate four", opts),
+        "flight 235 departs at gate 4"
+    );
+}
+
+/// `max_span_tokens` on the options struct is honoured.
+#[test]
+fn test_sentence_options_max_span() {
+    let opts = NormalizeOptions::new().with_max_span_tokens(4);
+    assert_eq!(
+        normalize_sentence_with_options("I have twenty one apples", opts),
+        "I have 21 apples"
+    );
+}
+
+/// `None` for `max_span_tokens` should give the same default as
+/// `normalize_sentence` (16).
+#[test]
+fn test_sentence_options_none_max_span_uses_default() {
+    let with_default = NormalizeOptions::new();
+    let with_explicit = NormalizeOptions::new().with_max_span_tokens(16);
+    let input = "United seven eighty eight";
+    assert_eq!(
+        normalize_sentence_with_options(input, with_default),
+        normalize_sentence_with_options(input, with_explicit),
+    );
+}
+
+/// Builder methods compose: concat flag + max span on one struct.
+#[test]
+fn test_sentence_options_builder_compose() {
+    let opts = NormalizeOptions::new()
+        .with_concat_compound_numbers(true)
+        .with_max_span_tokens(8);
+    assert_eq!(
+        normalize_sentence_with_options("United seven eighty eight", opts),
+        "United 788"
+    );
+}
+
+/// Issue #23: consecutive 0-99 compounds should concatenate, not add.
+/// `"thirty five sixty two"` → `"3562"`, not `"97"` (= 35 + 62).
+#[test]
+fn test_issue_23_compound_concat() {
+    // Whole-input single-expression form.
+    assert_eq!(normalize_aviation("thirty five sixty two"), "3562");
+
+    // Sentence form — the original report.
+    assert_eq!(
+        normalize_sentence_aviation(
+            "Alright thirty five sixty two appreciate your help United seven eighty eight"
+        ),
+        "Alright 3562 appreciate your help United 788"
+    );
+
+    // Through the unified options API too.
+    let opts = NormalizeOptions::new().with_concat_compound_numbers(true);
+    assert_eq!(
+        normalize_sentence_with_options("thirty five sixty two", opts),
+        "3562"
+    );
+
+    // Mixed digit prefix + compounds: "two thirty five sixty two" → 23562.
+    assert_eq!(normalize_aviation("two thirty five sixty two"), "23562");
+
+    // Single chunks must NOT concatenate (preserves grammatical reading).
+    assert_eq!(normalize_aviation("twenty one"), "21");
+
+    // Scale words still anchor grammatical addition.
+    assert_eq!(normalize_aviation("two thousand seventeen"), "2017");
+}

From b2d22685ccb702f70e1fb273849296a72114211f Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Sun, 26 Apr 2026 23:49:35 -0400
Subject: [PATCH 2/4] refactor!: drop deprecated aviation/max-span wrappers

Per issue #15 and #23 follow-up: remove all backwards-compat wrappers
now that callers pass through the unified `NormalizeOptions` API.

Removed Rust functions:
- `normalize_aviation`, `normalize_sentence_aviation`
- `normalize_sentence_aviation_with_max_span`
- `normalize_sentence_with_max_span`

Removed FFI bindings:
- `nemo_normalize_aviation`, `nemo_normalize_sentence_aviation`
- `nemo_normalize_sentence_with_max_span`
- `nemo_normalize_sentence_aviation_with_max_span`

Removed WASM bindings:
- `normalizeAviation`, `normalizeSentenceAviation`
- `normalizeSentenceWithMaxSpan`, `normalizeSentenceAviationWithMaxSpan`

Callers should switch to:
- Rust: `normalize_with_options` / `normalize_sentence_with_options`
- FFI:  `nemo_normalize_with_options` / `nemo_normalize_sentence_with_options`
- WASM: `normalizeWithOptions` / `normalizeSentenceWithOptions`

Swift wrapper and headers updated accordingly. All Rust tests
(2050 across the workspace incl. doc tests) and FFI tests pass.
---
 src/ffi.rs                                    | 128 +--------------
 src/lib.rs                                    | 155 ++++--------------
 src/wasm.rs                                   |  30 +---
 .../include/nemo_text_processing.h            |   7 +-
 swift-test/Sources/NemoTest/NemoTest.swift    |  18 +-
 swift/NemoTextProcessing.swift                |  43 ++++-
 swift/include/nemo_text_processing.h          |  24 ++-
 tests/en_tests.rs                             | 106 +++++++-----
 tests/extensive_tests.rs                      |   9 +-
 9 files changed, 198 insertions(+), 322 deletions(-)

diff --git a/src/ffi.rs b/src/ffi.rs
index d5f50a8..3a4909b 100644
--- a/src/ffi.rs
+++ b/src/ffi.rs
@@ -4,10 +4,9 @@ use std::ffi::{c_char, CStr, CString};
 use std::ptr;
 
 use crate::{
-    custom_rules, normalize, normalize_aviation, normalize_sentence, normalize_sentence_aviation,
-    normalize_sentence_aviation_with_max_span, normalize_sentence_with_max_span,
-    normalize_sentence_with_options, normalize_with_options, tn_normalize, tn_normalize_lang,
-    tn_normalize_sentence, tn_normalize_sentence_lang, tn_normalize_sentence_with_max_span,
+    custom_rules, normalize, normalize_sentence, normalize_sentence_with_options,
+    normalize_with_options, tn_normalize, tn_normalize_lang, tn_normalize_sentence,
+    tn_normalize_sentence_lang, tn_normalize_sentence_with_max_span,
     tn_normalize_sentence_with_max_span_lang, NormalizeOptions,
 };
 
@@ -80,119 +79,6 @@ pub unsafe extern "C" fn nemo_normalize_sentence(input: *const c_char) -> *mut c
     }
 }
 
-/// Normalize a full sentence with a configurable max span size.
-///
-/// `max_span_tokens` controls the maximum number of consecutive tokens
-/// considered as a single normalizable expression (default is 16).
-///
-/// # Safety
-/// - `input` must be a valid null-terminated UTF-8 string
-/// - Returns a newly allocated string that must be freed with `nemo_free_string`
-#[no_mangle]
-pub unsafe extern "C" fn nemo_normalize_sentence_with_max_span(
-    input: *const c_char,
-    max_span_tokens: u32,
-) -> *mut c_char {
-    if input.is_null() {
-        return ptr::null_mut();
-    }
-
-    let c_str = match CStr::from_ptr(input).to_str() {
-        Ok(s) => s,
-        Err(_) => return ptr::null_mut(),
-    };
-
-    let result = normalize_sentence_with_max_span(c_str, max_span_tokens as usize);
-
-    match CString::new(result) {
-        Ok(c_string) => c_string.into_raw(),
-        Err(_) => ptr::null_mut(),
-    }
-}
-
-/// Aviation-flavoured single-input normalize.
-///
-/// Layered on top of [`nemo_normalize`]: tries `cardinal::parse_aviation`
-/// first so flight-number / call-sign phrases like `"seven eighty eight"`
-/// resolve to `"788"`, then falls back to the regular dispatch.
-///
-/// # Safety
-/// - `input` must be a valid null-terminated UTF-8 string
-/// - Returns a newly allocated string that must be freed with `nemo_free_string`
-#[no_mangle]
-pub unsafe extern "C" fn nemo_normalize_aviation(input: *const c_char) -> *mut c_char {
-    if input.is_null() {
-        return ptr::null_mut();
-    }
-
-    let c_str = match CStr::from_ptr(input).to_str() {
-        Ok(s) => s,
-        Err(_) => return ptr::null_mut(),
-    };
-
-    let result = normalize_aviation(c_str);
-
-    match CString::new(result) {
-        Ok(c_string) => c_string.into_raw(),
-        Err(_) => ptr::null_mut(),
-    }
-}
-
-/// Aviation-flavoured sentence normalize.
-///
-/// Sentence-mode equivalent of [`nemo_normalize_aviation`]. Aviation cardinal
-/// runs at priority 89 (above date / time, below money / measure), so
-/// flight-number-style spans win without disturbing money / measure / decimal.
-///
-/// # Safety
-/// - `input` must be a valid null-terminated UTF-8 string
-/// - Returns a newly allocated string that must be freed with `nemo_free_string`
-#[no_mangle]
-pub unsafe extern "C" fn nemo_normalize_sentence_aviation(input: *const c_char) -> *mut c_char {
-    if input.is_null() {
-        return ptr::null_mut();
-    }
-
-    let c_str = match CStr::from_ptr(input).to_str() {
-        Ok(s) => s,
-        Err(_) => return ptr::null_mut(),
-    };
-
-    let result = normalize_sentence_aviation(c_str);
-
-    match CString::new(result) {
-        Ok(c_string) => c_string.into_raw(),
-        Err(_) => ptr::null_mut(),
-    }
-}
-
-/// Aviation sentence normalize with a configurable max span size.
-///
-/// # Safety
-/// - `input` must be a valid null-terminated UTF-8 string
-/// - Returns a newly allocated string that must be freed with `nemo_free_string`
-#[no_mangle]
-pub unsafe extern "C" fn nemo_normalize_sentence_aviation_with_max_span(
-    input: *const c_char,
-    max_span_tokens: u32,
-) -> *mut c_char {
-    if input.is_null() {
-        return ptr::null_mut();
-    }
-
-    let c_str = match CStr::from_ptr(input).to_str() {
-        Ok(s) => s,
-        Err(_) => return ptr::null_mut(),
-    };
-
-    let result = normalize_sentence_aviation_with_max_span(c_str, max_span_tokens as usize);
-
-    match CString::new(result) {
-        Ok(c_string) => c_string.into_raw(),
-        Err(_) => ptr::null_mut(),
-    }
-}
-
 /// Unified single-expression normalize with caller-specified options.
 ///
 /// `concat_compound_numbers`: `0` for standard ITN, non-zero for
@@ -546,10 +432,10 @@ mod tests {
     }
 
     #[test]
-    fn test_ffi_normalize_aviation() {
+    fn test_ffi_normalize_with_options_concat_compound() {
         unsafe {
             let input = CString::new("seven eighty eight").unwrap();
-            let result = nemo_normalize_aviation(input.as_ptr());
+            let result = nemo_normalize_with_options(input.as_ptr(), 1);
             assert!(!result.is_null());
             let result_str = CStr::from_ptr(result).to_str().unwrap();
             assert_eq!(result_str, "788");
@@ -558,10 +444,10 @@ mod tests {
     }
 
     #[test]
-    fn test_ffi_normalize_sentence_aviation() {
+    fn test_ffi_normalize_sentence_with_options_concat_compound() {
         unsafe {
             let input = CString::new("United seven eighty eight").unwrap();
-            let result = nemo_normalize_sentence_aviation(input.as_ptr());
+            let result = nemo_normalize_sentence_with_options(input.as_ptr(), 1, 0);
             assert!(!result.is_null());
             let result_str = CStr::from_ptr(result).to_str().unwrap();
             assert_eq!(result_str, "United 788");
diff --git a/src/lib.rs b/src/lib.rs
index df3c2f5..7920f81 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -173,60 +173,31 @@ pub fn normalize(input: &str) -> String {
     input.to_string()
 }
 
-/// Normalize a single input with **aviation flight-number reading
-/// prioritized**.
+/// Single-expression normalize with caller-specified [`NormalizeOptions`].
 ///
-/// Same dispatch as [`normalize`], with one twist: `cardinal::parse_aviation`
+/// When `options.concat_compound_numbers` is `true`, `cardinal::parse_aviation`
 /// is tried *after* the high-confidence taggers (`custom_rules`, `whitelist`,
 /// `punctuation`, `word`) but *before* `time` and `date`. The result: when
 /// the whole input is a number-only phrase like `"two thirty five"` or
-/// `"seven eighty eight"`, the aviation reading wins (`"235"`, `"788"`)
-/// instead of being eaten as a time (`"02:35"`) or as an old-year via the
-/// date tagger.
-///
-/// Use this from flight-number / call-sign / aviation-radio contexts. Phrases
-/// that aren't pure number words still flow through the rest of the
-/// pipeline normally (`"five dollars"` → `"$5"` via the money tagger).
-///
-/// ```
-/// use text_processing_rs::normalize_aviation;
-///
-/// assert_eq!(normalize_aviation("seven eighty eight"), "788");
-/// assert_eq!(normalize_aviation("two thirty five"), "235");
-/// // Non-number phrases are unaffected.
-/// assert_eq!(normalize_aviation("hello world"), "hello world");
-/// ```
-pub fn normalize_aviation(input: &str) -> String {
-    normalize_with_options(
-        input,
-        NormalizeOptions::new().with_concat_compound_numbers(true),
-    )
-}
-
-/// Unified single-expression normalize entry point.
-///
-/// Switches between standard and concat-compound (aviation-style) dispatch
-/// based on `options.concat_compound_numbers`. The `max_span_tokens` field on
-/// [`NormalizeOptions`] is ignored here — it only applies to
-/// [`normalize_sentence_with_options`].
+/// `"seven eighty eight"`, concat-compound reading wins (`"235"`, `"788"`)
+/// instead of being eaten as a time (`"02:35"`) or an old-year via the date
+/// tagger. Non-number phrases still flow through the rest of the pipeline
+/// (`"five dollars"` → `"$5"` via the money tagger).
 ///
 /// ```
 /// use text_processing_rs::{normalize_with_options, NormalizeOptions};
 ///
 /// let opts = NormalizeOptions::new().with_concat_compound_numbers(true);
 /// assert_eq!(normalize_with_options("seven eighty eight", opts), "788");
+/// assert_eq!(normalize_with_options("two thirty five", opts), "235");
+/// // Non-number phrases are unaffected.
+/// assert_eq!(normalize_with_options("hello world", opts), "hello world");
 /// ```
 pub fn normalize_with_options(input: &str, options: NormalizeOptions) -> String {
-    if options.concat_compound_numbers {
-        normalize_aviation_inner(input)
-    } else {
-        normalize(input)
+    if !options.concat_compound_numbers {
+        return normalize(input);
     }
-}
 
-/// Aviation single-expression dispatch. Kept private; callers go through
-/// [`normalize_aviation`] or [`normalize_with_options`].
-fn normalize_aviation_inner(input: &str) -> String {
     let input = input.trim();
 
     // High-confidence rules still win.
@@ -243,14 +214,13 @@ fn normalize_aviation_inner(input: &str) -> String {
         return result;
     }
 
-    // Aviation cardinal beats time/date here. This is the whole point of
-    // the aviation domain.
+    // Concat-compound cardinal beats time/date.
     if let Some(num) = cardinal::parse_aviation(input) {
         return num;
     }
 
-    // Fall back to the standard pipeline for anything aviation cardinal
-    // didn't recognise (money, measure, decimal, ordinal, telephone, etc.).
+    // Fall back to the standard pipeline for anything not recognised
+    // (money, measure, decimal, ordinal, telephone, etc.).
     normalize(input)
 }
 
@@ -987,11 +957,12 @@ const DEFAULT_MAX_SPAN_TOKENS: usize = 16;
 ///
 /// Excluded in sentence mode: `word` and `telephone` (over-fire on natural language).
 ///
-/// `aviation`: when `true`, `cardinal::parse_aviation` is tried at priority 89
-/// (above `date`=88 and `time`=85, below `measure`=90 / `money`=95) and the
-/// regular cardinal fallback at 70 is skipped (the aviation reader already
-/// falls back to grammatical when no digit prefix is present).
-fn parse_span(span: &str, aviation: bool) -> Option<(String, u8)> {
+/// `concat_compound`: when `true`, `cardinal::parse_aviation` is tried at
+/// priority 89 (above `date`=88 and `time`=85, below `measure`=90 /
+/// `money`=95) and the regular cardinal fallback at 70 is skipped (the
+/// concat-compound reader already falls back to grammatical when the
+/// concat pattern does not apply).
+fn parse_span(span: &str, concat_compound: bool) -> Option<(String, u8)> {
     let token_count = span.split_whitespace().count();
     if token_count == 0 {
         return None;
@@ -1013,12 +984,12 @@ fn parse_span(span: &str, aviation: bool) -> Option<(String, u8)> {
         return Some((result, 90));
     }
 
-    // Aviation cardinal opt-in: priority 89, beats date/time. No short-span
-    // gate — aviation mode is opt-in, so the caller has accepted aggressive
-    // matching across longer spans like "one thousand two hundred thirty
-    // four". `parse_aviation` falls back to grammatical when the digit-prefix
-    // pattern does not apply, so non-aviation phrases still resolve.
-    if aviation {
+    // Concat-compound cardinal opt-in: priority 89, beats date/time. No
+    // short-span gate — this is opt-in, so the caller has accepted
+    // aggressive matching across longer spans like "one thousand two
+    // hundred thirty four". `parse_aviation` falls back to grammatical when
+    // the concat pattern does not apply, so non-concat phrases still resolve.
+    if concat_compound {
         if let Some(result) = cardinal::parse_aviation(span) {
             return Some((result, 89));
         }
@@ -1040,9 +1011,9 @@ fn parse_span(span: &str, aviation: bool) -> Option<(String, u8)> {
         return Some((result, 75));
     }
 
-    // Default cardinal fallback (priority 70). In aviation mode the cardinal
-    // path is already covered by the priority-89 branch above.
-    if !aviation && token_count <= 4 {
+    // Default cardinal fallback (priority 70). In concat-compound mode the
+    // cardinal path is already covered by the priority-89 branch above.
+    if !concat_compound && token_count <= 4 {
         if let Some(result) = cardinal::parse(span) {
             return Some((result, 70));
         }
@@ -1064,7 +1035,7 @@ fn parse_span(span: &str, aviation: bool) -> Option<(String, u8)> {
 /// assert_eq!(normalize_sentence("hello world"), "hello world");
 /// ```
 pub fn normalize_sentence(input: &str) -> String {
-    normalize_sentence_with_max_span(input, DEFAULT_MAX_SPAN_TOKENS)
+    normalize_sentence_inner(input, DEFAULT_MAX_SPAN_TOKENS, false)
 }
 
 /// Unified sentence-mode entry point.
@@ -1096,69 +1067,9 @@ pub fn normalize_sentence_with_options(input: &str, options: NormalizeOptions) -
     normalize_sentence_inner(input, max_span, options.concat_compound_numbers)
 }
 
-/// Sentence-mode equivalent of [`normalize_aviation`]. Aviation cardinal
-/// runs at priority 89 (above `date`=88 / `time`=85, below `measure`=90 /
-/// `money`=95), so flight-number-style spans win over date/time while
-/// measure / money phrases keep their existing semantics.
-///
-/// ```
-/// use text_processing_rs::normalize_sentence_aviation;
-///
-/// // Aviation cardinal beats time/date for pure-number spans.
-/// assert_eq!(
-///     normalize_sentence_aviation("United seven eighty eight"),
-///     "United 788"
-/// );
-/// assert_eq!(
-///     normalize_sentence_aviation("flight two thirty five departs at gate four"),
-///     "flight 235 departs at gate 4"
-/// );
-/// // Non-aviation spans flow through normally.
-/// assert_eq!(
-///     normalize_sentence_aviation("I have twenty one apples"),
-///     "I have 21 apples"
-/// );
-/// ```
-pub fn normalize_sentence_aviation(input: &str) -> String {
-    normalize_sentence_with_options(
-        input,
-        NormalizeOptions::new().with_concat_compound_numbers(true),
-    )
-}
-
-/// [`normalize_sentence_aviation`] with a configurable max span size.
-pub fn normalize_sentence_aviation_with_max_span(input: &str, max_span_tokens: usize) -> String {
-    normalize_sentence_with_options(
-        input,
-        NormalizeOptions::new()
-            .with_concat_compound_numbers(true)
-            .with_max_span_tokens(max_span_tokens),
-    )
-}
-
-/// Normalize a full sentence with a configurable max span size.
-///
-/// `max_span_tokens` controls the maximum number of consecutive tokens
-/// that will be considered as a single normalizable expression.
-/// Smaller values are faster but may miss multi-word expressions.
-/// Larger values catch more patterns but do more work per token.
-///
-/// ```
-/// use text_processing_rs::normalize_sentence_with_max_span;
-///
-/// // Short span: only catches small expressions
-/// assert_eq!(normalize_sentence_with_max_span("I have twenty one apples", 4), "I have 21 apples");
-/// ```
-pub fn normalize_sentence_with_max_span(input: &str, max_span_tokens: usize) -> String {
-    normalize_sentence_with_options(
-        input,
-        NormalizeOptions::new().with_max_span_tokens(max_span_tokens),
-    )
-}
-
-/// Sentence-mode dispatch loop. The `aviation` flag is forwarded to
+/// Sentence-mode dispatch loop. The `concat_compound` flag is forwarded to
 /// [`parse_span`] so each span sees the right tagger priorities.
-fn normalize_sentence_inner(input: &str, max_span_tokens: usize, aviation: bool) -> String {
+fn normalize_sentence_inner(input: &str, max_span_tokens: usize, concat_compound: bool) -> String {
     let trimmed = input.trim();
     if trimmed.is_empty() {
         return trimmed.to_string();
@@ -1180,7 +1091,7 @@ fn normalize_sentence_inner(input: &str, max_span_tokens: usize, aviation: bool)
         // Longest-span-first search keeps replacements stable and non-overlapping.
         for end in (i + 1..=max_end).rev() {
             let span = tokens[i..end].join(" ");
-            let Some((candidate, score)) = parse_span(&span, aviation) else {
+            let Some((candidate, score)) = parse_span(&span, concat_compound) else {
                 continue;
             };
 
diff --git a/src/wasm.rs b/src/wasm.rs
index 5a3d375..e9de333 100644
--- a/src/wasm.rs
+++ b/src/wasm.rs
@@ -3,12 +3,10 @@
 use wasm_bindgen::prelude::*;
 
 use crate::{
-    custom_rules, normalize, normalize_aviation, normalize_sentence, normalize_sentence_aviation,
-    normalize_sentence_aviation_with_max_span, normalize_sentence_with_max_span,
-    normalize_sentence_with_options, normalize_with_lang, normalize_with_options, tn_normalize,
-    tn_normalize_lang, tn_normalize_sentence, tn_normalize_sentence_lang,
-    tn_normalize_sentence_with_max_span, tn_normalize_sentence_with_max_span_lang,
-    NormalizeOptions,
+    custom_rules, normalize, normalize_sentence, normalize_sentence_with_options,
+    normalize_with_lang, normalize_with_options, tn_normalize, tn_normalize_lang,
+    tn_normalize_sentence, tn_normalize_sentence_lang, tn_normalize_sentence_with_max_span,
+    tn_normalize_sentence_with_max_span_lang, NormalizeOptions,
 };
 
 /// Build [`NormalizeOptions`] from JS-friendly primitives.
@@ -47,26 +45,6 @@ pub fn normalize_sentence_js(input: &str) -> String {
     normalize_sentence(input)
 }
 
-#[wasm_bindgen(js_name = normalizeSentenceWithMaxSpan)]
-pub fn normalize_sentence_with_max_span_js(input: &str, max_span_tokens: u32) -> String {
-    normalize_sentence_with_max_span(input, max_span_tokens as usize)
-}
-
-#[wasm_bindgen(js_name = normalizeAviation)]
-pub fn normalize_aviation_js(input: &str) -> String {
-    normalize_aviation(input)
-}
-
-#[wasm_bindgen(js_name = normalizeSentenceAviation)]
-pub fn normalize_sentence_aviation_js(input: &str) -> String {
-    normalize_sentence_aviation(input)
-}
-
-#[wasm_bindgen(js_name = normalizeSentenceAviationWithMaxSpan)]
-pub fn normalize_sentence_aviation_with_max_span_js(input: &str, max_span_tokens: u32) -> String {
-    normalize_sentence_aviation_with_max_span(input, max_span_tokens as usize)
-}
-
 /// Unified single-expression normalize. `concatCompoundNumbers=true` reads
 /// consecutive number words as concatenation rather than addition, e.g.
 /// `"thirty five sixty two"` → `"3562"`, `"seven eighty eight"` → `"788"`.
diff --git a/swift-test/Sources/CNemoTextProcessing/include/nemo_text_processing.h b/swift-test/Sources/CNemoTextProcessing/include/nemo_text_processing.h
index 9199417..833fc41 100644
--- a/swift-test/Sources/CNemoTextProcessing/include/nemo_text_processing.h
+++ b/swift-test/Sources/CNemoTextProcessing/include/nemo_text_processing.h
@@ -9,7 +9,12 @@ extern "C" {
 
 char* nemo_normalize(const char* input);
 char* nemo_normalize_sentence(const char* input);
-char* nemo_normalize_sentence_with_max_span(const char* input, uint32_t max_span_tokens);
+char* nemo_normalize_with_options(const char* input, uint32_t concat_compound_numbers);
+char* nemo_normalize_sentence_with_options(
+    const char* input,
+    uint32_t concat_compound_numbers,
+    uint32_t max_span_tokens
+);
 void nemo_add_rule(const char* spoken, const char* written);
 int32_t nemo_remove_rule(const char* spoken);
 void nemo_clear_rules(void);
diff --git a/swift-test/Sources/NemoTest/NemoTest.swift b/swift-test/Sources/NemoTest/NemoTest.swift
index e69f421..5d9add2 100644
--- a/swift-test/Sources/NemoTest/NemoTest.swift
+++ b/swift-test/Sources/NemoTest/NemoTest.swift
@@ -16,8 +16,22 @@ enum NemoTextProcessing {
         return String(cString: resultPtr)
     }
 
-    static func normalizeSentence(_ input: String, maxSpanTokens: UInt32) -> String {
-        guard let resultPtr = nemo_normalize_sentence_with_max_span(input, maxSpanTokens) else { return input }
+    static func normalizeSentence(
+        _ input: String,
+        concatCompoundNumbers: Bool = false,
+        maxSpanTokens: UInt32 = 0
+    ) -> String {
+        let concatFlag: UInt32 = concatCompoundNumbers ? 1 : 0
+        guard let resultPtr = nemo_normalize_sentence_with_options(
+            input, concatFlag, maxSpanTokens
+        ) else { return input }
+        defer { nemo_free_string(resultPtr) }
+        return String(cString: resultPtr)
+    }
+
+    static func normalize(_ input: String, concatCompoundNumbers: Bool) -> String {
+        let concatFlag: UInt32 = concatCompoundNumbers ? 1 : 0
+        guard let resultPtr = nemo_normalize_with_options(input, concatFlag) else { return input }
         defer { nemo_free_string(resultPtr) }
         return String(cString: resultPtr)
     }
diff --git a/swift/NemoTextProcessing.swift b/swift/NemoTextProcessing.swift
index 12a501c..d27d592 100644
--- a/swift/NemoTextProcessing.swift
+++ b/swift/NemoTextProcessing.swift
@@ -59,18 +59,53 @@ public enum NemoTextProcessing {
         return String(cString: resultPtr)
     }
 
-    /// Normalize a full sentence with a configurable max span size.
+    /// Normalize a full sentence with caller-specified options.
     ///
     /// - Parameters:
     ///   - input: Sentence containing spoken-form spans
-    ///   - maxSpanTokens: Maximum consecutive tokens per normalizable span (default 16)
+    ///   - concatCompoundNumbers: When true, consecutive 0-99 number words
+    ///     concatenate (e.g. `"thirty five sixty two"` → `"3562"`,
+    ///     `"seven eighty eight"` → `"788"`) instead of adding.
+    ///   - maxSpanTokens: Maximum consecutive tokens per normalizable span.
+    ///     Pass `0` to use the library default (16).
     /// - Returns: Sentence with spoken-form spans replaced
-    public static func normalizeSentence(_ input: String, maxSpanTokens: UInt32) -> String {
+    public static func normalizeSentence(
+        _ input: String,
+        concatCompoundNumbers: Bool = false,
+        maxSpanTokens: UInt32 = 0
+    ) -> String {
+        guard let cString = input.cString(using: .utf8) else {
+            return input
+        }
+
+        let concatFlag: UInt32 = concatCompoundNumbers ? 1 : 0
+        guard let resultPtr = nemo_normalize_sentence_with_options(
+            cString, concatFlag, maxSpanTokens
+        ) else {
+            return input
+        }
+
+        defer { nemo_free_string(resultPtr) }
+
+        return String(cString: resultPtr)
+    }
+
+    /// Normalize a single spoken-form expression with caller-specified options.
+    ///
+    /// - Parameters:
+    ///   - input: Spoken-form text
+    ///   - concatCompoundNumbers: See `normalizeSentence(_:concatCompoundNumbers:maxSpanTokens:)`.
+    /// - Returns: Written-form text, or original if no normalization applies.
+    public static func normalize(
+        _ input: String,
+        concatCompoundNumbers: Bool
+    ) -> String {
         guard let cString = input.cString(using: .utf8) else {
             return input
         }
 
-        guard let resultPtr = nemo_normalize_sentence_with_max_span(cString, maxSpanTokens) else {
+        let concatFlag: UInt32 = concatCompoundNumbers ? 1 : 0
+        guard let resultPtr = nemo_normalize_with_options(cString, concatFlag) else {
             return input
         }
 
diff --git a/swift/include/nemo_text_processing.h b/swift/include/nemo_text_processing.h
index b8ec478..7a2abec 100644
--- a/swift/include/nemo_text_processing.h
+++ b/swift/include/nemo_text_processing.h
@@ -33,13 +33,31 @@ char* nemo_normalize(const char* input);
 char* nemo_normalize_sentence(const char* input);
 
 /**
- * Normalize a full sentence with a configurable max span size.
+ * Normalize a single spoken-form expression with caller-specified options.
  *
  * @param input Null-terminated UTF-8 string
- * @param max_span_tokens Maximum number of consecutive tokens per span (default 16)
+ * @param concat_compound_numbers When non-zero, consecutive 0-99 number
+ *        words concatenate (aviation flight-number style — e.g.
+ *        "thirty five sixty two" -> "3562", "seven eighty eight" -> "788")
+ *        instead of adding.
+ * @return Newly allocated string, must be freed with nemo_free_string().
+ */
+char* nemo_normalize_with_options(const char* input, uint32_t concat_compound_numbers);
+
+/**
+ * Normalize a full sentence with caller-specified options.
+ *
+ * @param input Null-terminated UTF-8 string
+ * @param concat_compound_numbers See nemo_normalize_with_options.
+ * @param max_span_tokens Maximum consecutive tokens per span. Pass 0 to
+ *        use the library default (16).
  * @return Newly allocated string, must be freed with nemo_free_string().
  */
-char* nemo_normalize_sentence_with_max_span(const char* input, uint32_t max_span_tokens);
+char* nemo_normalize_sentence_with_options(
+    const char* input,
+    uint32_t concat_compound_numbers,
+    uint32_t max_span_tokens
+);
 
 /**
  * Add a custom spoken-to-written normalization rule.
diff --git a/tests/en_tests.rs b/tests/en_tests.rs
index 2a0bd62..b149270 100644
--- a/tests/en_tests.rs
+++ b/tests/en_tests.rs
@@ -7,11 +7,15 @@ mod common;
 
 use std::path::Path;
 use text_processing_rs::{
-    custom_rules, normalize, normalize_aviation, normalize_sentence, normalize_sentence_aviation,
-    normalize_sentence_with_max_span, normalize_sentence_with_options, normalize_with_options,
-    NormalizeOptions,
+    custom_rules, normalize, normalize_sentence, normalize_sentence_with_options,
+    normalize_with_options, NormalizeOptions,
 };
 
+/// Test helper: shorthand for the concat-compound (aviation-style) options.
+fn concat_opts() -> NormalizeOptions {
+    NormalizeOptions::new().with_concat_compound_numbers(true)
+}
+
 fn print_failures(results: &common::TestResults) {
     for f in &results.failures {
         println!(
@@ -554,7 +558,10 @@ fn test_max_span_tokens() {
 
     // Span of 2 is too short to catch "five dollars and fifty cents" (5 tokens)
     // but can still catch "five dollars" (2 tokens) → "$5"
-    let result = normalize_sentence_with_max_span("five dollars and fifty cents for lunch", 2);
+    let result = normalize_sentence_with_options(
+        "five dollars and fifty cents for lunch",
+        NormalizeOptions::new().with_max_span_tokens(2),
+    );
     // With max_span=2, it can only see 2 tokens at a time
     // "five dollars" → "$5", "and" → pass, "fifty cents" → "$0.50"
     // The exact behavior depends on money tagger matching "fifty cents" alone
@@ -563,7 +570,10 @@ fn test_max_span_tokens() {
     assert_ne!(result, "$5.50 for lunch");
 
     // Span of 1 should basically only catch single-word tokens
-    let result_1 = normalize_sentence_with_max_span("I have twenty one apples", 1);
+    let result_1 = normalize_sentence_with_options(
+        "I have twenty one apples",
+        NormalizeOptions::new().with_max_span_tokens(1),
+    );
     // "twenty" alone isn't meaningful as a cardinal in most taggers,
     // but "one" alone → "1"
     println!("max_span=1: {}", result_1);
@@ -900,12 +910,12 @@ fn test_spelled_digit_cardinal_does_not_break_normal_cardinals() {
     assert_eq!(normalize("one thousand two hundred thirty four"), "1234");
 }
 
-/// Issue #14: aviation flight-number reading is exposed as an **opt-in**
-/// pipeline. Generic dispatch keeps upstream NeMo semantics (date wins for
-/// `"twenty one forty two"`, time wins for `"two thirty five"`); callers
-/// who know they're in aviation context reach for the `*_aviation`
-/// variants, which run aviation cardinal at priority 89 (above date 88
-/// and time 85).
+/// Issue #14: concat-compound (aviation-style) reading is exposed as an
+/// **opt-in** option. Default dispatch keeps upstream NeMo semantics
+/// (date wins for `"twenty one forty two"`, time wins for `"two thirty
+/// five"`); callers who know they're in aviation context pass
+/// `concat_compound_numbers: true`, which runs the concat cardinal at
+/// priority 89 (above date 88 and time 85).
 #[test]
 fn test_issue_14_default_dispatch_unchanged() {
     // Scale-word grammar is preserved everywhere.
@@ -922,57 +932,61 @@ fn test_issue_14_default_dispatch_unchanged() {
     assert_eq!(normalize("seven eighty eight"), "788");
 }
 
-/// Aviation pipeline `normalize_aviation` (single-input). Aviation cardinal
-/// runs early enough to beat time/date.
+/// Single-input concat-compound mode. Aviation cardinal runs early enough
+/// to beat time/date.
 #[test]
 fn test_issue_14_normalize_aviation() {
-    assert_eq!(normalize_aviation("seven eighty eight"), "788");
+    let opts = concat_opts();
+    assert_eq!(normalize_with_options("seven eighty eight", opts), "788");
     // Beats time tagger.
-    assert_eq!(normalize_aviation("two thirty five"), "235");
+    assert_eq!(normalize_with_options("two thirty five", opts), "235");
     // Two consecutive 0-99 compounds concatenate (issue #23 fix).
     // Was previously `"63"` (= 20+1+40+2) under the old grammatical fallback.
-    assert_eq!(normalize_aviation("twenty one forty two"), "2142");
+    assert_eq!(normalize_with_options("twenty one forty two", opts), "2142");
     // Non-number phrases fall through unchanged.
-    assert_eq!(normalize_aviation("hello world"), "hello world");
+    assert_eq!(normalize_with_options("hello world", opts), "hello world");
     // Money / measure / decimal / ordinal still work via fallback to
     // standard `normalize`.
-    assert_eq!(normalize_aviation("five dollars"), "$5");
-    assert_eq!(normalize_aviation("five point two"), "5.2");
-    assert_eq!(normalize_aviation("twenty first"), "21st");
+    assert_eq!(normalize_with_options("five dollars", opts), "$5");
+    assert_eq!(normalize_with_options("five point two", opts), "5.2");
+    assert_eq!(normalize_with_options("twenty first", opts), "21st");
     // Scale-word grammar still wins (no digit prefix → grammatical).
-    assert_eq!(normalize_aviation("two thousand seventeen"), "2017");
+    assert_eq!(
+        normalize_with_options("two thousand seventeen", opts),
+        "2017"
+    );
 }
 
-/// Aviation pipeline `normalize_sentence_aviation` (sentence mode). The
-/// cardinal-aviation priority bump (89) makes flight-number spans win
-/// over date/time in real sentences.
+/// Sentence-mode concat-compound. The cardinal-aviation priority bump
+/// (89) makes flight-number spans win over date/time in real sentences.
 #[test]
 fn test_issue_14_normalize_sentence_aviation() {
+    let opts = concat_opts();
     // The original bug from issue #14.
     assert_eq!(
-        normalize_sentence_aviation("United seven eighty eight"),
+        normalize_sentence_with_options("United seven eighty eight", opts),
         "United 788"
     );
     assert_eq!(
-        normalize_sentence_aviation("flight two thirty five departs at gate four"),
+        normalize_sentence_with_options("flight two thirty five departs at gate four", opts),
         "flight 235 departs at gate 4"
     );
 
     // Scale-word grammar is preserved.
     assert_eq!(
-        normalize_sentence_aviation("two thousand seventeen"),
+        normalize_sentence_with_options("two thousand seventeen", opts),
         "2017"
     );
 
     // Money / measure stay above aviation (priority 95 / 90 > 89).
     assert_eq!(
-        normalize_sentence_aviation("I owe five dollars"),
+        normalize_sentence_with_options("I owe five dollars", opts),
         "I owe $5"
     );
 
     // Plain natural language is untouched.
     assert_eq!(
-        normalize_sentence_aviation("I have twenty one apples"),
+        normalize_sentence_with_options("I have twenty one apples", opts),
         "I have 21 apples"
     );
 }
@@ -989,8 +1003,8 @@ fn test_options_default_matches_normalize() {
     assert_eq!(normalize_with_options("two thirty five", opts), "02:35");
 }
 
-/// `concat_compound_numbers: true` should make `normalize_with_options`
-/// behave like `normalize_aviation`.
+/// `concat_compound_numbers: true` enables aviation-style concat-compound
+/// reading on the unified single-expression path.
 #[test]
 fn test_options_concat_matches_aviation() {
     let opts = NormalizeOptions::new().with_concat_compound_numbers(true);
@@ -1019,7 +1033,8 @@ fn test_sentence_options_default_matches_default() {
     );
 }
 
-/// Sentence mode with concat enabled matches `normalize_sentence_aviation`.
+/// Sentence mode with concat enabled produces aviation-style flight-number
+/// spans.
 #[test]
 fn test_sentence_options_concat_compound() {
     let opts = NormalizeOptions::new().with_concat_compound_numbers(true);
@@ -1072,30 +1087,41 @@ fn test_sentence_options_builder_compose() {
 /// `"thirty five sixty two"` → `"3562"`, not `"97"` (= 35 + 62).
 #[test]
 fn test_issue_23_compound_concat() {
+    let opts = concat_opts();
+
     // Whole-input single-expression form.
-    assert_eq!(normalize_aviation("thirty five sixty two"), "3562");
+    assert_eq!(
+        normalize_with_options("thirty five sixty two", opts),
+        "3562"
+    );
 
     // Sentence form — the original report.
     assert_eq!(
-        normalize_sentence_aviation(
-            "Alright thirty five sixty two appreciate your help United seven eighty eight"
+        normalize_sentence_with_options(
+            "Alright thirty five sixty two appreciate your help United seven eighty eight",
+            opts,
         ),
         "Alright 3562 appreciate your help United 788"
     );
 
-    // Through the unified options API too.
-    let opts = NormalizeOptions::new().with_concat_compound_numbers(true);
+    // Through the sentence options API too.
     assert_eq!(
         normalize_sentence_with_options("thirty five sixty two", opts),
         "3562"
     );
 
     // Mixed digit prefix + compounds: "two thirty five sixty two" → 23562.
-    assert_eq!(normalize_aviation("two thirty five sixty two"), "23562");
+    assert_eq!(
+        normalize_with_options("two thirty five sixty two", opts),
+        "23562"
+    );
 
     // Single chunks must NOT concatenate (preserves grammatical reading).
-    assert_eq!(normalize_aviation("twenty one"), "21");
+    assert_eq!(normalize_with_options("twenty one", opts), "21");
 
     // Scale words still anchor grammatical addition.
-    assert_eq!(normalize_aviation("two thousand seventeen"), "2017");
+    assert_eq!(
+        normalize_with_options("two thousand seventeen", opts),
+        "2017"
+    );
 }
diff --git a/tests/extensive_tests.rs b/tests/extensive_tests.rs
index c369cfc..f850c1a 100644
--- a/tests/extensive_tests.rs
+++ b/tests/extensive_tests.rs
@@ -5,8 +5,8 @@
 //! boundary conditions, roundtrip consistency, and cross-tagger interference.
 
 use text_processing_rs::{
-    normalize, normalize_sentence, normalize_sentence_with_max_span, tn_normalize,
-    tn_normalize_sentence,
+    normalize, normalize_sentence, normalize_sentence_with_options, tn_normalize,
+    tn_normalize_sentence, NormalizeOptions,
 };
 
 // ════════════════════════════════════════════════════════════════════════
@@ -953,7 +953,10 @@ fn test_sentence_itn_single_word_number() {
 #[test]
 fn test_sentence_itn_max_span_tokens() {
     // With max_span=1, multi-word expressions shouldn't be matched
-    let result = normalize_sentence_with_max_span("twenty one", 1);
+    let result = normalize_sentence_with_options(
+        "twenty one",
+        NormalizeOptions::new().with_max_span_tokens(1),
+    );
     // With span=1, "twenty" alone and "one" alone are both single cardinals
     // This tests the sliding window behavior
     assert_eq!(result, "20 1");

From c3c79e2b39b8ac01182e0c41edf96f90a0ca562e Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Mon, 27 Apr 2026 00:09:25 -0400
Subject: [PATCH 3/4] refactor: extract NormalizeOptions into dedicated
 `options` module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Moves `NormalizeOptions`, its builder methods, and `DEFAULT_MAX_SPAN_TOKENS`
out of `src/lib.rs` and into a new `src/options.rs`. The struct is the
extension point for caller-tunable normalization behavior, and giving it
its own module makes room for richer per-field documentation and future
flags without further bloating the crate root.

Each field now carries:
- a "Default" line stating the no-op behavior
- a bulleted list of concrete input → output examples
- the originating issue number for the behavior
- guidance on which use cases want it on/off
- explicit interaction notes (which other taggers still win)

Also documents the `with_*` builder convention as the preferred construction
path so new fields can land without breaking existing call sites that use
struct literals.

`pub use options::{NormalizeOptions, DEFAULT_MAX_SPAN_TOKENS};` keeps the
public path stable — no changes required in FFI, WASM, Swift, or test code.
All Rust + FFI tests pass; WASM check clean.
---
 src/lib.rs     |  68 ++-----------------------
 src/options.rs | 132 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+), 65 deletions(-)
 create mode 100644 src/options.rs

diff --git a/src/lib.rs b/src/lib.rs
index 7920f81..cd58e2e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -18,6 +18,7 @@
 
 pub mod custom_rules;
 pub mod itn;
+pub mod options;
 pub mod tn;
 
 #[cfg(feature = "ffi")]
@@ -25,73 +26,13 @@ pub mod ffi;
 #[cfg(all(target_arch = "wasm32", feature = "wasm"))]
 pub mod wasm;
 
+pub use options::{NormalizeOptions, DEFAULT_MAX_SPAN_TOKENS};
+
 use itn::en::{
     cardinal, date, decimal, electronic, measure, money, ordinal, punctuation, telephone, time,
     whitelist, word,
 };
 
-/// Options for the unified [`normalize_with_options`] /
-/// [`normalize_sentence_with_options`] entry points.
-///
-/// Keeping options on a struct (rather than separate `*_aviation` /
-/// `*_with_max_span` functions) lets new knobs land without exploding the
-/// public API surface — see issues #15 and #23 for the motivating discussion.
-///
-/// The flags are intentionally orthogonal and *not* tied to a particular
-/// domain. Aviation, military codes, dispatch IDs, etc. all reuse the same
-/// underlying behavior toggles.
-///
-/// # Examples
-///
-/// ```
-/// use text_processing_rs::{normalize_sentence_with_options, NormalizeOptions};
-///
-/// let opts = NormalizeOptions {
-///     concat_compound_numbers: true,
-///     max_span_tokens: Some(8),
-/// };
-/// assert_eq!(
-///     normalize_sentence_with_options("United seven eighty eight", opts),
-///     "United 788"
-/// );
-/// ```
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
-pub struct NormalizeOptions {
-    /// When `true`, sequences of spoken number words concatenate rather than
-    /// add. `"thirty five sixty two"` → `"3562"` (instead of `35 + 62 = 97`)
-    /// and `"seven eighty eight"` → `"788"`. Aviation, flight-numbers,
-    /// call-signs, and other code-style readings want this on.
-    ///
-    /// Scale-word grammar is preserved: `"two thousand seventeen"` still
-    /// resolves to `"2017"` regardless of this flag.
-    pub concat_compound_numbers: bool,
-    /// Maximum span size (tokens) considered in sentence mode. `None` means
-    /// use [`DEFAULT_MAX_SPAN_TOKENS`]. Ignored by [`normalize_with_options`].
-    pub max_span_tokens: Option<usize>,
-}
-
-impl NormalizeOptions {
-    /// Default options: standard ITN dispatch, default max span.
-    pub const fn new() -> Self {
-        Self {
-            concat_compound_numbers: false,
-            max_span_tokens: None,
-        }
-    }
-
-    /// Enable / disable compound-number concatenation.
-    pub const fn with_concat_compound_numbers(mut self, enabled: bool) -> Self {
-        self.concat_compound_numbers = enabled;
-        self
-    }
-
-    /// Set the sentence-mode max span (in tokens).
-    pub const fn with_max_span_tokens(mut self, max_span_tokens: usize) -> Self {
-        self.max_span_tokens = Some(max_span_tokens);
-        self
-    }
-}
-
 /// Normalize spoken-form text to written form.
 ///
 /// Tries taggers in order of specificity (most specific first).
@@ -946,9 +887,6 @@ fn tn_parse_span_lang(span: &str, lang: &str) -> Option<(String, u8)> {
     None
 }
 
-/// Default maximum token span to consider when scanning a sentence.
-const DEFAULT_MAX_SPAN_TOKENS: usize = 16;
-
 /// Try to parse a span of text using sentence-safe taggers.
 ///
 /// Returns `(replacement, priority_score)` if a tagger matches.
diff --git a/src/options.rs b/src/options.rs
new file mode 100644
index 0000000..78a76f3
--- /dev/null
+++ b/src/options.rs
@@ -0,0 +1,132 @@
+//! Public configuration for the unified `*_with_options` entry points.
+//!
+//! [`NormalizeOptions`] is the single extension point for caller-tunable
+//! normalization behavior. Each field is an **orthogonal behavior flag**, not
+//! a domain label — aviation flight numbers, sports scores, dispatch IDs and
+//! similar code-style readings all reuse the same toggles, and new knobs are
+//! added as additional fields rather than as new enum variants or new
+//! function names.
+//!
+//! See issues
+//! [#14](https://github.com/FluidInference/text-processing-rs/issues/14),
+//! [#15](https://github.com/FluidInference/text-processing-rs/issues/15) and
+//! [#23](https://github.com/FluidInference/text-processing-rs/issues/23) for
+//! the motivating discussion on why this is a struct rather than a `Domain`
+//! enum.
+//!
+//! # Stability
+//!
+//! New fields may be added in minor releases. Always construct
+//! [`NormalizeOptions`] via [`NormalizeOptions::new`] (or `default()`) and
+//! the chainable `with_*` methods — direct struct literals will break when
+//! new fields are introduced.
+//!
+//! # Examples
+//!
+//! ```
+//! use text_processing_rs::{normalize_sentence_with_options, NormalizeOptions};
+//!
+//! // Aviation / flight-number style: consecutive 0-99 chunks concatenate.
+//! let opts = NormalizeOptions::new()
+//!     .with_concat_compound_numbers(true)
+//!     .with_max_span_tokens(8);
+//!
+//! assert_eq!(
+//!     normalize_sentence_with_options("United seven eighty eight", opts),
+//!     "United 788"
+//! );
+//! ```
+
+/// Default maximum token span to consider when scanning a sentence.
+///
+/// Used by [`crate::normalize_sentence`] and by
+/// [`crate::normalize_sentence_with_options`] when
+/// [`NormalizeOptions::max_span_tokens`] is `None`.
+pub const DEFAULT_MAX_SPAN_TOKENS: usize = 16;
+
+/// Caller-tunable knobs for the unified
+/// [`crate::normalize_with_options`] /
+/// [`crate::normalize_sentence_with_options`] entry points.
+///
+/// Construct via [`NormalizeOptions::new`] or [`Default::default`] and
+/// configure with the chainable `with_*` methods so future fields don't
+/// break existing call sites.
+///
+/// All fields default to behavior matching plain
+/// [`crate::normalize`] / [`crate::normalize_sentence`] — opt-in is the
+/// only way to change semantics.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub struct NormalizeOptions {
+    /// Read consecutive small-number compounds as concatenated digit groups
+    /// instead of summing them.
+    ///
+    /// **Default:** `false` (preserve upstream NeMo grammatical reading).
+    ///
+    /// When `true`, the priority-89 aviation cardinal pass runs ahead of the
+    /// time/date taggers and uses [`peel_compound_chunks`] semantics:
+    /// - `"seven eighty eight"` → `"788"` (was `"95"` = 7 + 88) — issue
+    ///   [#14](https://github.com/FluidInference/text-processing-rs/issues/14)
+    /// - `"thirty five sixty two"` → `"3562"` (was `"97"` = 35 + 62) —
+    ///   issue
+    ///   [#23](https://github.com/FluidInference/text-processing-rs/issues/23)
+    /// - `"two thirty five sixty two"` → `"23562"`
+    /// - `"two thousand seventeen"` → `"2017"` (scale words still anchor
+    ///   grammatical addition)
+    /// - `"twenty one"` → `"21"` (single chunks never concatenate)
+    ///
+    /// Use cases: aviation flight numbers / call-signs, sports scores,
+    /// jersey/room numbers, dispatch IDs, any code-style reading where
+    /// consecutive small numbers should remain distinct.
+    ///
+    /// Money, measure, decimal and ordinal taggers retain their normal
+    /// priorities and continue to win where they apply (e.g.
+    /// `"five dollars"` → `"$5"` regardless of this flag).
+    ///
+    /// [`peel_compound_chunks`]: ../itn/en/cardinal/fn.peel_compound_chunks.html
+    pub concat_compound_numbers: bool,
+
+    /// Maximum span size (in whitespace-separated tokens) considered by the
+    /// sliding-window sentence scanner.
+    ///
+    /// **Default:** `None`, which resolves to [`DEFAULT_MAX_SPAN_TOKENS`]
+    /// (currently `16`).
+    ///
+    /// Lower values trade recall for speed and false-positive resistance —
+    /// a span of `2` will catch `"twenty one"` → `"21"` but not the
+    /// 5-token `"five dollars and fifty cents"` → `"$5.50"`. A span of `1`
+    /// disables multi-token matching entirely.
+    ///
+    /// Ignored by [`crate::normalize_with_options`] — single-expression
+    /// mode does not slide.
+    pub max_span_tokens: Option<usize>,
+}
+
+impl NormalizeOptions {
+    /// Construct an options bag with all fields at their library defaults.
+    ///
+    /// Equivalent to [`Default::default`] but `const`, so it can be used
+    /// in `const` contexts.
+    pub const fn new() -> Self {
+        Self {
+            concat_compound_numbers: false,
+            max_span_tokens: None,
+        }
+    }
+
+    /// Toggle [`Self::concat_compound_numbers`] (concatenate consecutive
+    /// small-number chunks instead of summing them).
+    pub const fn with_concat_compound_numbers(mut self, enabled: bool) -> Self {
+        self.concat_compound_numbers = enabled;
+        self
+    }
+
+    /// Set [`Self::max_span_tokens`] (sentence-mode sliding-window cap).
+    ///
+    /// Pass [`DEFAULT_MAX_SPAN_TOKENS`] explicitly to lock in the current
+    /// default; pass `0` for single-token-only matching (rarely useful
+    /// outside tests).
+    pub const fn with_max_span_tokens(mut self, max_span_tokens: usize) -> Self {
+        self.max_span_tokens = Some(max_span_tokens);
+        self
+    }
+}

From a4321d895cc1d249d14e80681fd0c4e13f3de1cb Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Mon, 27 Apr 2026 00:14:37 -0400
Subject: [PATCH 4/4] docs: trim NormalizeOptions doc comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Compresses the per-field and module-level docs in `src/options.rs` down
to the essentials. The two flags are the user-facing priority — readers
need the one-line behavior, the issue ref, and the default; everything
else (use-case lists, interaction tables, exhaustive examples) belongs
in the README and the integration tests, not in rustdoc bullet walls.
---
 src/options.rs | 118 +++++++------------------------------------------
 1 file changed, 15 insertions(+), 103 deletions(-)

diff --git a/src/options.rs b/src/options.rs
index 78a76f3..24b5c99 100644
--- a/src/options.rs
+++ b/src/options.rs
@@ -1,111 +1,28 @@
-//! Public configuration for the unified `*_with_options` entry points.
+//! Caller-tunable options for the unified `*_with_options` entry points.
 //!
-//! [`NormalizeOptions`] is the single extension point for caller-tunable
-//! normalization behavior. Each field is an **orthogonal behavior flag**, not
-//! a domain label — aviation flight numbers, sports scores, dispatch IDs and
-//! similar code-style readings all reuse the same toggles, and new knobs are
-//! added as additional fields rather than as new enum variants or new
-//! function names.
-//!
-//! See issues
-//! [#14](https://github.com/FluidInference/text-processing-rs/issues/14),
-//! [#15](https://github.com/FluidInference/text-processing-rs/issues/15) and
-//! [#23](https://github.com/FluidInference/text-processing-rs/issues/23) for
-//! the motivating discussion on why this is a struct rather than a `Domain`
-//! enum.
-//!
-//! # Stability
-//!
-//! New fields may be added in minor releases. Always construct
-//! [`NormalizeOptions`] via [`NormalizeOptions::new`] (or `default()`) and
-//! the chainable `with_*` methods — direct struct literals will break when
-//! new fields are introduced.
-//!
-//! # Examples
-//!
-//! ```
-//! use text_processing_rs::{normalize_sentence_with_options, NormalizeOptions};
-//!
-//! // Aviation / flight-number style: consecutive 0-99 chunks concatenate.
-//! let opts = NormalizeOptions::new()
-//!     .with_concat_compound_numbers(true)
-//!     .with_max_span_tokens(8);
-//!
-//! assert_eq!(
-//!     normalize_sentence_with_options("United seven eighty eight", opts),
-//!     "United 788"
-//! );
-//! ```
+//! Construct via [`NormalizeOptions::new`] + chainable `with_*` methods so
+//! new fields can land without breaking existing call sites.
 
-/// Default maximum token span to consider when scanning a sentence.
-///
-/// Used by [`crate::normalize_sentence`] and by
-/// [`crate::normalize_sentence_with_options`] when
-/// [`NormalizeOptions::max_span_tokens`] is `None`.
+/// Default sentence-mode sliding-window cap.
 pub const DEFAULT_MAX_SPAN_TOKENS: usize = 16;
 
-/// Caller-tunable knobs for the unified
-/// [`crate::normalize_with_options`] /
-/// [`crate::normalize_sentence_with_options`] entry points.
-///
-/// Construct via [`NormalizeOptions::new`] or [`Default::default`] and
-/// configure with the chainable `with_*` methods so future fields don't
-/// break existing call sites.
-///
-/// All fields default to behavior matching plain
-/// [`crate::normalize`] / [`crate::normalize_sentence`] — opt-in is the
-/// only way to change semantics.
+/// Options for [`crate::normalize_with_options`] and
+/// [`crate::normalize_sentence_with_options`]. Defaults match plain
+/// [`crate::normalize`] / [`crate::normalize_sentence`].
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
 pub struct NormalizeOptions {
-    /// Read consecutive small-number compounds as concatenated digit groups
-    /// instead of summing them.
-    ///
-    /// **Default:** `false` (preserve upstream NeMo grammatical reading).
-    ///
-    /// When `true`, the priority-89 aviation cardinal pass runs ahead of the
-    /// time/date taggers and uses [`peel_compound_chunks`] semantics:
-    /// - `"seven eighty eight"` → `"788"` (was `"95"` = 7 + 88) — issue
-    ///   [#14](https://github.com/FluidInference/text-processing-rs/issues/14)
-    /// - `"thirty five sixty two"` → `"3562"` (was `"97"` = 35 + 62) —
-    ///   issue
-    ///   [#23](https://github.com/FluidInference/text-processing-rs/issues/23)
-    /// - `"two thirty five sixty two"` → `"23562"`
-    /// - `"two thousand seventeen"` → `"2017"` (scale words still anchor
-    ///   grammatical addition)
-    /// - `"twenty one"` → `"21"` (single chunks never concatenate)
-    ///
-    /// Use cases: aviation flight numbers / call-signs, sports scores,
-    /// jersey/room numbers, dispatch IDs, any code-style reading where
-    /// consecutive small numbers should remain distinct.
-    ///
-    /// Money, measure, decimal and ordinal taggers retain their normal
-    /// priorities and continue to win where they apply (e.g.
-    /// `"five dollars"` → `"$5"` regardless of this flag).
-    ///
-    /// [`peel_compound_chunks`]: ../itn/en/cardinal/fn.peel_compound_chunks.html
+    /// Concatenate consecutive small-number chunks instead of summing them.
+    /// `"seven eighty eight"` → `"788"` (issue #14), `"thirty five sixty
+    /// two"` → `"3562"` (issue #23). Default `false`.
     pub concat_compound_numbers: bool,
 
-    /// Maximum span size (in whitespace-separated tokens) considered by the
-    /// sliding-window sentence scanner.
-    ///
-    /// **Default:** `None`, which resolves to [`DEFAULT_MAX_SPAN_TOKENS`]
-    /// (currently `16`).
-    ///
-    /// Lower values trade recall for speed and false-positive resistance —
-    /// a span of `2` will catch `"twenty one"` → `"21"` but not the
-    /// 5-token `"five dollars and fifty cents"` → `"$5.50"`. A span of `1`
-    /// disables multi-token matching entirely.
-    ///
-    /// Ignored by [`crate::normalize_with_options`] — single-expression
-    /// mode does not slide.
+    /// Sentence-mode sliding-window cap (in tokens). `None` uses
+    /// [`DEFAULT_MAX_SPAN_TOKENS`]. Ignored in single-expression mode.
     pub max_span_tokens: Option<usize>,
 }
 
 impl NormalizeOptions {
-    /// Construct an options bag with all fields at their library defaults.
-    ///
-    /// Equivalent to [`Default::default`] but `const`, so it can be used
-    /// in `const` contexts.
+    /// `const` constructor with library defaults.
     pub const fn new() -> Self {
         Self {
             concat_compound_numbers: false,
@@ -113,18 +30,13 @@ impl NormalizeOptions {
         }
     }
 
-    /// Toggle [`Self::concat_compound_numbers`] (concatenate consecutive
-    /// small-number chunks instead of summing them).
+    /// Set [`Self::concat_compound_numbers`].
     pub const fn with_concat_compound_numbers(mut self, enabled: bool) -> Self {
         self.concat_compound_numbers = enabled;
         self
     }
 
-    /// Set [`Self::max_span_tokens`] (sentence-mode sliding-window cap).
-    ///
-    /// Pass [`DEFAULT_MAX_SPAN_TOKENS`] explicitly to lock in the current
-    /// default; pass `0` for single-token-only matching (rarely useful
-    /// outside tests).
+    /// Set [`Self::max_span_tokens`].
     pub const fn with_max_span_tokens(mut self, max_span_tokens: usize) -> Self {
         self.max_span_tokens = Some(max_span_tokens);
         self