Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 45 additions & 71 deletions src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,30 @@ use std::ffi::{c_char, CStr, CString};
use std::ptr;

use crate::{
custom_rules, normalize, normalize_aviation, normalize_sentence, normalize_sentence_aviation,
normalize_sentence_aviation_with_max_span, normalize_sentence_with_max_span, tn_normalize,
tn_normalize_lang, tn_normalize_sentence, tn_normalize_sentence_lang,
tn_normalize_sentence_with_max_span, tn_normalize_sentence_with_max_span_lang,
custom_rules, normalize, normalize_sentence, normalize_sentence_with_options,
normalize_with_options, tn_normalize, tn_normalize_lang, tn_normalize_sentence,
tn_normalize_sentence_lang, tn_normalize_sentence_with_max_span,
tn_normalize_sentence_with_max_span_lang, NormalizeOptions,
};

/// Build [`NormalizeOptions`] from FFI primitives.
///
/// `concat_compound_numbers`: any non-zero value enables concat behavior
/// (`"thirty five sixty two"` → `"3562"`, `"seven eighty eight"` → `"788"`).
///
/// `max_span_tokens`: `0` means "use library default" (16); any positive
/// value is a caller-specified max span.
fn options_from_ffi(concat_compound_numbers: u32, max_span_tokens: u32) -> NormalizeOptions {
NormalizeOptions {
concat_compound_numbers: concat_compound_numbers != 0,
max_span_tokens: if max_span_tokens == 0 {
None
} else {
Some(max_span_tokens as usize)
},
}
}

/// Normalize spoken-form text to written form.
///
/// # Safety
Expand Down Expand Up @@ -61,18 +79,20 @@ pub unsafe extern "C" fn nemo_normalize_sentence(input: *const c_char) -> *mut c
}
}

/// Normalize a full sentence with a configurable max span size.
/// Unified single-expression normalize with caller-specified options.
///
/// `max_span_tokens` controls the maximum number of consecutive tokens
/// considered as a single normalizable expression (default is 16).
/// `concat_compound_numbers`: `0` for standard ITN, non-zero for
/// concat-compound (aviation-style) reading where consecutive number words
/// concatenate rather than add — e.g. `"thirty five sixty two"` → `"3562"`,
/// `"seven eighty eight"` → `"788"`.
///
/// # Safety
/// - `input` must be a valid null-terminated UTF-8 string
/// - Returns a newly allocated string that must be freed with `nemo_free_string`
#[no_mangle]
pub unsafe extern "C" fn nemo_normalize_sentence_with_max_span(
pub unsafe extern "C" fn nemo_normalize_with_options(
input: *const c_char,
max_span_tokens: u32,
concat_compound_numbers: u32,
) -> *mut c_char {
if input.is_null() {
return ptr::null_mut();
Expand All @@ -83,78 +103,31 @@ pub unsafe extern "C" fn nemo_normalize_sentence_with_max_span(
Err(_) => return ptr::null_mut(),
};

let result = normalize_sentence_with_max_span(c_str, max_span_tokens as usize);

match CString::new(result) {
Ok(c_string) => c_string.into_raw(),
Err(_) => ptr::null_mut(),
}
}

/// Aviation-flavoured single-input normalize.
///
/// Layered on top of [`nemo_normalize`]: tries `cardinal::parse_aviation`
/// first so flight-number / call-sign phrases like `"seven eighty eight"`
/// resolve to `"788"`, then falls back to the regular dispatch.
///
/// # Safety
/// - `input` must be a valid null-terminated UTF-8 string
/// - Returns a newly allocated string that must be freed with `nemo_free_string`
#[no_mangle]
pub unsafe extern "C" fn nemo_normalize_aviation(input: *const c_char) -> *mut c_char {
if input.is_null() {
return ptr::null_mut();
}

let c_str = match CStr::from_ptr(input).to_str() {
Ok(s) => s,
Err(_) => return ptr::null_mut(),
};

let result = normalize_aviation(c_str);
let options = options_from_ffi(concat_compound_numbers, 0);
let result = normalize_with_options(c_str, options);

match CString::new(result) {
Ok(c_string) => c_string.into_raw(),
Err(_) => ptr::null_mut(),
}
}

/// Aviation-flavoured sentence normalize.
/// Unified sentence normalize with caller-specified options.
///
/// Sentence-mode equivalent of [`nemo_normalize_aviation`]. Aviation cardinal
/// runs at priority 89 (above date / time, below money / measure), so
/// flight-number-style spans win without disturbing money / measure / decimal.
/// `concat_compound_numbers`: `0` for standard ITN, non-zero for
/// concat-compound reading.
///
/// # Safety
/// - `input` must be a valid null-terminated UTF-8 string
/// - Returns a newly allocated string that must be freed with `nemo_free_string`
#[no_mangle]
pub unsafe extern "C" fn nemo_normalize_sentence_aviation(input: *const c_char) -> *mut c_char {
if input.is_null() {
return ptr::null_mut();
}

let c_str = match CStr::from_ptr(input).to_str() {
Ok(s) => s,
Err(_) => return ptr::null_mut(),
};

let result = normalize_sentence_aviation(c_str);

match CString::new(result) {
Ok(c_string) => c_string.into_raw(),
Err(_) => ptr::null_mut(),
}
}

/// Aviation sentence normalize with a configurable max span size.
/// `max_span_tokens`:
/// - `0` — use library default (`16`).
/// - `>0` — use the specified max span.
///
/// # Safety
/// - `input` must be a valid null-terminated UTF-8 string
/// - Returns a newly allocated string that must be freed with `nemo_free_string`
#[no_mangle]
pub unsafe extern "C" fn nemo_normalize_sentence_aviation_with_max_span(
pub unsafe extern "C" fn nemo_normalize_sentence_with_options(
input: *const c_char,
concat_compound_numbers: u32,
max_span_tokens: u32,
) -> *mut c_char {
if input.is_null() {
Expand All @@ -166,7 +139,8 @@ pub unsafe extern "C" fn nemo_normalize_sentence_aviation_with_max_span(
Err(_) => return ptr::null_mut(),
};

let result = normalize_sentence_aviation_with_max_span(c_str, max_span_tokens as usize);
let options = options_from_ffi(concat_compound_numbers, max_span_tokens);
let result = normalize_sentence_with_options(c_str, options);

match CString::new(result) {
Ok(c_string) => c_string.into_raw(),
Expand Down Expand Up @@ -458,10 +432,10 @@ mod tests {
}

#[test]
fn test_ffi_normalize_aviation() {
fn test_ffi_normalize_with_options_concat_compound() {
unsafe {
let input = CString::new("seven eighty eight").unwrap();
let result = nemo_normalize_aviation(input.as_ptr());
let result = nemo_normalize_with_options(input.as_ptr(), 1);
assert!(!result.is_null());
let result_str = CStr::from_ptr(result).to_str().unwrap();
assert_eq!(result_str, "788");
Expand All @@ -470,10 +444,10 @@ mod tests {
}

#[test]
fn test_ffi_normalize_sentence_aviation() {
fn test_ffi_normalize_sentence_with_options_concat_compound() {
unsafe {
let input = CString::new("United seven eighty eight").unwrap();
let result = nemo_normalize_sentence_aviation(input.as_ptr());
let result = nemo_normalize_sentence_with_options(input.as_ptr(), 1, 0);
assert!(!result.is_null());
let result_str = CStr::from_ptr(result).to_str().unwrap();
assert_eq!(result_str, "United 788");
Expand Down
74 changes: 56 additions & 18 deletions src/itn/en/cardinal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,17 @@ pub fn words_to_number(input: &str) -> Option<i128> {

/// Aviation / flight-number / call-sign reading of a number phrase.
///
/// Recognises a leading run of single-digit words concatenated with a trailing
/// grammatical compound, e.g. `"seven eighty eight"` → `788`,
/// `"two thirty five"` → `235`. Falls back to [`words_to_number`] when the
/// aviation pattern does not apply (no digit prefix, scale word present, etc.).
/// Recognises consecutive 0-99 compounds and concatenates them rather than
/// summing. Examples:
/// - `"seven eighty eight"` → `788` (digit + tens+ones compound)
/// - `"two thirty five"` → `235`
/// - `"thirty five sixty two"` → `3562` (two tens+ones compounds — fixes #23)
/// - `"twenty one"` → `21` (single chunk; identical to grammatical)
///
/// Falls back to [`words_to_number`] (grammatical addition) when the chunk
/// pattern does not apply, including any phrase containing a scale word
/// (`hundred`, `thousand`, ...). This preserves `"two thousand seventeen"`
/// → `2017`.
///
/// This is **opt-in**: callers reach for it explicitly from flight-number /
/// call-sign contexts. Generic ITN/TN dispatch keeps using [`words_to_number`]
Expand Down Expand Up @@ -230,22 +237,13 @@ pub fn words_to_number_aviation(input: &str) -> Option<i128> {
.ok();
}

// Aviation flight-number style: digit prefix + grammatical compound.
// "seven eighty eight" → "7" ‖ 88 = 788. Skipped if a scale word appears,
// since "two thousand seventeen" must stay grammatical (= 2017, not 22017).
// Concatenated 0-99 compound chunks. Skipped if a scale word appears,
// since `"two thousand seventeen"` must stay grammatical (= 2017).
let has_scale = words.iter().any(|w| SCALES.contains_key(*w));
if !has_scale {
let prefix_len = words
.iter()
.take_while(|w| single_digit_char(w).is_some())
.count();
if prefix_len >= 1 && prefix_len < words.len() {
if let Some(rest_num) = grammatical_words_to_number(&words[prefix_len..]) {
let prefix: String = words[..prefix_len]
.iter()
.map(|w| single_digit_char(w).unwrap())
.collect();
let combined = format!("{}{}", prefix, rest_num);
if let Some(chunks) = peel_compound_chunks(&words) {
if chunks.len() >= 2 {
let combined: String = chunks.iter().map(|n| n.to_string()).collect();
return combined.parse::<i128>().ok();
}
}
Expand All @@ -254,6 +252,46 @@ pub fn words_to_number_aviation(input: &str) -> Option<i128> {
grammatical_words_to_number(&words)
}

/// Greedily peel `words` into 0-99 number chunks. Each chunk is one of:
/// - A single ONES word (0-19), e.g. `"seven"` → 7, `"sixteen"` → 16
/// - A single TENS word (20, 30, ... 90), e.g. `"twenty"` → 20
/// - A TENS word followed by a ones word (1-9), e.g. `"twenty one"` → 21
///
/// Returns `None` if any token isn't a recognised number word, so this
/// function refuses to swallow non-number tokens. `"and"` / `"a"` filler
/// must already be removed by the caller.
fn peel_compound_chunks(words: &[&str]) -> Option<Vec<i128>> {
let mut chunks = Vec::new();
let mut i = 0;
while i < words.len() {
if let Some(&tens) = TENS.get(words[i]) {
// Greedy: try TENS + ones (1-9) before falling back to standalone.
if i + 1 < words.len() {
if let Some(&ones) = ONES.get(words[i + 1]) {
if (1..=9).contains(&ones) {
chunks.push((tens + ones) as i128);
i += 2;
continue;
}
}
}
chunks.push(tens as i128);
i += 1;
} else if let Some(&ones) = ONES.get(words[i]) {
// 0-19 standalone (covers digit words, ten, and teens).
chunks.push(ones as i128);
i += 1;
} else {
return None;
}
}
if chunks.is_empty() {
None
} else {
Some(chunks)
}
}

/// Parse a grammatical English number with running-sum + scale multiplication.
fn grammatical_words_to_number(words: &[&str]) -> Option<i128> {
// "eleven hundred" = 1100, "twenty hundred" = 2000
Expand Down
Loading
Loading